core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21    unsafe {
22        let a = a.as_i32x16();
23        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
24        transmute(r)
25    }
26}
27
28/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29/// unsigned results in `dst` using writemask `k` (elements are copied from
30/// `src` when the corresponding mask bit is not set).
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33#[inline]
34#[target_feature(enable = "avx512f")]
35#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36#[cfg_attr(test, assert_instr(vpabsd))]
37pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38    unsafe {
39        let abs = _mm512_abs_epi32(a).as_i32x16();
40        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
41    }
42}
43
44/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46/// the corresponding mask bit is not set).
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49#[inline]
50#[target_feature(enable = "avx512f")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(test, assert_instr(vpabsd))]
53pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54    unsafe {
55        let abs = _mm512_abs_epi32(a).as_i32x16();
56        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
57    }
58}
59
60/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63#[inline]
64#[target_feature(enable = "avx512f,avx512vl")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68    unsafe {
69        let abs = _mm256_abs_epi32(a).as_i32x8();
70        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
71    }
72}
73
74/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77#[inline]
78#[target_feature(enable = "avx512f,avx512vl")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82    unsafe {
83        let abs = _mm256_abs_epi32(a).as_i32x8();
84        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
85    }
86}
87
88/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91#[inline]
92#[target_feature(enable = "avx512f,avx512vl")]
93#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94#[cfg_attr(test, assert_instr(vpabsd))]
95pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96    unsafe {
97        let abs = _mm_abs_epi32(a).as_i32x4();
98        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
99    }
100}
101
102/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105#[inline]
106#[target_feature(enable = "avx512f,avx512vl")]
107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
108#[cfg_attr(test, assert_instr(vpabsd))]
109pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi32(a).as_i32x4();
112        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119#[inline]
120#[target_feature(enable = "avx512f")]
121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
122#[cfg_attr(test, assert_instr(vpabsq))]
123pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124    unsafe {
125        let a = a.as_i64x8();
126        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
127        transmute(r)
128    }
129}
130
131/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134#[inline]
135#[target_feature(enable = "avx512f")]
136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
137#[cfg_attr(test, assert_instr(vpabsq))]
138pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139    unsafe {
140        let abs = _mm512_abs_epi64(a).as_i64x8();
141        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
142    }
143}
144
145/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148#[inline]
149#[target_feature(enable = "avx512f")]
150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
151#[cfg_attr(test, assert_instr(vpabsq))]
152pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153    unsafe {
154        let abs = _mm512_abs_epi64(a).as_i64x8();
155        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
156    }
157}
158
159/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162#[inline]
163#[target_feature(enable = "avx512f,avx512vl")]
164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165#[cfg_attr(test, assert_instr(vpabsq))]
166pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167    unsafe {
168        let a = a.as_i64x4();
169        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
170        transmute(r)
171    }
172}
173
174/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177#[inline]
178#[target_feature(enable = "avx512f,avx512vl")]
179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
180#[cfg_attr(test, assert_instr(vpabsq))]
181pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182    unsafe {
183        let abs = _mm256_abs_epi64(a).as_i64x4();
184        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
185    }
186}
187
188/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191#[inline]
192#[target_feature(enable = "avx512f,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpabsq))]
195pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196    unsafe {
197        let abs = _mm256_abs_epi64(a).as_i64x4();
198        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
199    }
200}
201
202/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205#[inline]
206#[target_feature(enable = "avx512f,avx512vl")]
207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
208#[cfg_attr(test, assert_instr(vpabsq))]
209pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210    unsafe {
211        let a = a.as_i64x2();
212        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
213        transmute(r)
214    }
215}
216
217/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220#[inline]
221#[target_feature(enable = "avx512f,avx512vl")]
222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223#[cfg_attr(test, assert_instr(vpabsq))]
224pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225    unsafe {
226        let abs = _mm_abs_epi64(a).as_i64x2();
227        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239    unsafe {
240        let abs = _mm_abs_epi64(a).as_i64x2();
241        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
242    }
243}
244
245/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251#[cfg_attr(test, assert_instr(vpandd))]
252pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253    unsafe { simd_fabs(v2) }
254}
255
256/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259#[inline]
260#[target_feature(enable = "avx512f")]
261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
262#[cfg_attr(test, assert_instr(vpandd))]
263pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
265}
266
267/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268///
269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270#[inline]
271#[target_feature(enable = "avx512f")]
272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
273#[cfg_attr(test, assert_instr(vpandq))]
274pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275    unsafe { simd_fabs(v2) }
276}
277
278/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281#[inline]
282#[target_feature(enable = "avx512f")]
283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
284#[cfg_attr(test, assert_instr(vpandq))]
285pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
287}
288
289/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292#[inline]
293#[target_feature(enable = "avx512f")]
294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295#[cfg_attr(test, assert_instr(vmovdqa32))]
296pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297    unsafe {
298        let mov = a.as_i32x16();
299        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
300    }
301}
302
303/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306#[inline]
307#[target_feature(enable = "avx512f")]
308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
309#[cfg_attr(test, assert_instr(vmovdqa32))]
310pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311    unsafe {
312        let mov = a.as_i32x16();
313        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
314    }
315}
316
317/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325    unsafe {
326        let mov = a.as_i32x8();
327        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
328    }
329}
330
331/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334#[inline]
335#[target_feature(enable = "avx512f,avx512vl")]
336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
337#[cfg_attr(test, assert_instr(vmovdqa32))]
338pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339    unsafe {
340        let mov = a.as_i32x8();
341        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
342    }
343}
344
345/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348#[inline]
349#[target_feature(enable = "avx512f,avx512vl")]
350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
351#[cfg_attr(test, assert_instr(vmovdqa32))]
352pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353    unsafe {
354        let mov = a.as_i32x4();
355        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
356    }
357}
358
359/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362#[inline]
363#[target_feature(enable = "avx512f,avx512vl")]
364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
365#[cfg_attr(test, assert_instr(vmovdqa32))]
366pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367    unsafe {
368        let mov = a.as_i32x4();
369        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
370    }
371}
372
373/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376#[inline]
377#[target_feature(enable = "avx512f")]
378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
379#[cfg_attr(test, assert_instr(vmovdqa64))]
380pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381    unsafe {
382        let mov = a.as_i64x8();
383        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
384    }
385}
386
387/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390#[inline]
391#[target_feature(enable = "avx512f")]
392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
393#[cfg_attr(test, assert_instr(vmovdqa64))]
394pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395    unsafe {
396        let mov = a.as_i64x8();
397        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
398    }
399}
400
401/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404#[inline]
405#[target_feature(enable = "avx512f,avx512vl")]
406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
407#[cfg_attr(test, assert_instr(vmovdqa64))]
408pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409    unsafe {
410        let mov = a.as_i64x4();
411        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
412    }
413}
414
415/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418#[inline]
419#[target_feature(enable = "avx512f,avx512vl")]
420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
421#[cfg_attr(test, assert_instr(vmovdqa64))]
422pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423    unsafe {
424        let mov = a.as_i64x4();
425        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437    unsafe {
438        let mov = a.as_i64x2();
439        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
440    }
441}
442
443/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444///
445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446#[inline]
447#[target_feature(enable = "avx512f,avx512vl")]
448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
449#[cfg_attr(test, assert_instr(vmovdqa64))]
450pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451    unsafe {
452        let mov = a.as_i64x2();
453        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
454    }
455}
456
457/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460#[inline]
461#[target_feature(enable = "avx512f")]
462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
463#[cfg_attr(test, assert_instr(vmovaps))]
464pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465    unsafe {
466        let mov = a.as_f32x16();
467        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
468    }
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474#[inline]
475#[target_feature(enable = "avx512f")]
476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479    unsafe {
480        let mov = a.as_f32x16();
481        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
482    }
483}
484
485/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488#[inline]
489#[target_feature(enable = "avx512f,avx512vl")]
490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
491#[cfg_attr(test, assert_instr(vmovaps))]
492pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493    unsafe {
494        let mov = a.as_f32x8();
495        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
496    }
497}
498
499/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502#[inline]
503#[target_feature(enable = "avx512f,avx512vl")]
504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505#[cfg_attr(test, assert_instr(vmovaps))]
506pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507    unsafe {
508        let mov = a.as_f32x8();
509        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
510    }
511}
512
513/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516#[inline]
517#[target_feature(enable = "avx512f,avx512vl")]
518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
519#[cfg_attr(test, assert_instr(vmovaps))]
520pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521    unsafe {
522        let mov = a.as_f32x4();
523        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
524    }
525}
526
527/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530#[inline]
531#[target_feature(enable = "avx512f,avx512vl")]
532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
533#[cfg_attr(test, assert_instr(vmovaps))]
534pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535    unsafe {
536        let mov = a.as_f32x4();
537        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
538    }
539}
540
541/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544#[inline]
545#[target_feature(enable = "avx512f")]
546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
547#[cfg_attr(test, assert_instr(vmovapd))]
548pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549    unsafe {
550        let mov = a.as_f64x8();
551        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
552    }
553}
554
555/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558#[inline]
559#[target_feature(enable = "avx512f")]
560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
561#[cfg_attr(test, assert_instr(vmovapd))]
562pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563    unsafe {
564        let mov = a.as_f64x8();
565        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
566    }
567}
568
569/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572#[inline]
573#[target_feature(enable = "avx512f,avx512vl")]
574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
575#[cfg_attr(test, assert_instr(vmovapd))]
576pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577    unsafe {
578        let mov = a.as_f64x4();
579        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
580    }
581}
582
583/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584///
585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586#[inline]
587#[target_feature(enable = "avx512f,avx512vl")]
588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
589#[cfg_attr(test, assert_instr(vmovapd))]
590pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591    unsafe {
592        let mov = a.as_f64x4();
593        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
594    }
595}
596
597/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600#[inline]
601#[target_feature(enable = "avx512f,avx512vl")]
602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
603#[cfg_attr(test, assert_instr(vmovapd))]
604pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605    unsafe {
606        let mov = a.as_f64x2();
607        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
608    }
609}
610
611/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614#[inline]
615#[target_feature(enable = "avx512f,avx512vl")]
616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
617#[cfg_attr(test, assert_instr(vmovapd))]
618pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619    unsafe {
620        let mov = a.as_f64x2();
621        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
622    }
623}
624
625/// Add packed 32-bit integers in a and b, and store the results in dst.
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628#[inline]
629#[target_feature(enable = "avx512f")]
630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
631#[cfg_attr(test, assert_instr(vpaddd))]
632pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
634}
635
636/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639#[inline]
640#[target_feature(enable = "avx512f")]
641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
642#[cfg_attr(test, assert_instr(vpaddd))]
643pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644    unsafe {
645        let add = _mm512_add_epi32(a, b).as_i32x16();
646        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
647    }
648}
649
650/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653#[inline]
654#[target_feature(enable = "avx512f")]
655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
656#[cfg_attr(test, assert_instr(vpaddd))]
657pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658    unsafe {
659        let add = _mm512_add_epi32(a, b).as_i32x16();
660        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
661    }
662}
663
664/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667#[inline]
668#[target_feature(enable = "avx512f,avx512vl")]
669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670#[cfg_attr(test, assert_instr(vpaddd))]
671pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672    unsafe {
673        let add = _mm256_add_epi32(a, b).as_i32x8();
674        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
675    }
676}
677
678/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681#[inline]
682#[target_feature(enable = "avx512f,avx512vl")]
683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
684#[cfg_attr(test, assert_instr(vpaddd))]
685pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686    unsafe {
687        let add = _mm256_add_epi32(a, b).as_i32x8();
688        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
689    }
690}
691
692/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695#[inline]
696#[target_feature(enable = "avx512f,avx512vl")]
697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
698#[cfg_attr(test, assert_instr(vpaddd))]
699pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700    unsafe {
701        let add = _mm_add_epi32(a, b).as_i32x4();
702        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
703    }
704}
705
706/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709#[inline]
710#[target_feature(enable = "avx512f,avx512vl")]
711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
712#[cfg_attr(test, assert_instr(vpaddd))]
713pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714    unsafe {
715        let add = _mm_add_epi32(a, b).as_i32x4();
716        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
717    }
718}
719
720/// Add packed 64-bit integers in a and b, and store the results in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723#[inline]
724#[target_feature(enable = "avx512f")]
725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
726#[cfg_attr(test, assert_instr(vpaddq))]
727pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
729}
730
731/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734#[inline]
735#[target_feature(enable = "avx512f")]
736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
737#[cfg_attr(test, assert_instr(vpaddq))]
738pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739    unsafe {
740        let add = _mm512_add_epi64(a, b).as_i64x8();
741        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
742    }
743}
744
745/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748#[inline]
749#[target_feature(enable = "avx512f")]
750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
751#[cfg_attr(test, assert_instr(vpaddq))]
752pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_add_epi64(a, b).as_i64x8();
755        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
756    }
757}
758
759/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762#[inline]
763#[target_feature(enable = "avx512f,avx512vl")]
764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765#[cfg_attr(test, assert_instr(vpaddq))]
766pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767    unsafe {
768        let add = _mm256_add_epi64(a, b).as_i64x4();
769        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
770    }
771}
772
773/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776#[inline]
777#[target_feature(enable = "avx512f,avx512vl")]
778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
779#[cfg_attr(test, assert_instr(vpaddq))]
780pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781    unsafe {
782        let add = _mm256_add_epi64(a, b).as_i64x4();
783        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
784    }
785}
786
787/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790#[inline]
791#[target_feature(enable = "avx512f,avx512vl")]
792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
793#[cfg_attr(test, assert_instr(vpaddq))]
794pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795    unsafe {
796        let add = _mm_add_epi64(a, b).as_i64x2();
797        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
798    }
799}
800
801/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804#[inline]
805#[target_feature(enable = "avx512f,avx512vl")]
806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807#[cfg_attr(test, assert_instr(vpaddq))]
808pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809    unsafe {
810        let add = _mm_add_epi64(a, b).as_i64x2();
811        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
812    }
813}
814
815/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818#[inline]
819#[target_feature(enable = "avx512f")]
820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
821#[cfg_attr(test, assert_instr(vaddps))]
822pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
824}
825
826/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829#[inline]
830#[target_feature(enable = "avx512f")]
831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
832#[cfg_attr(test, assert_instr(vaddps))]
833pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834    unsafe {
835        let add = _mm512_add_ps(a, b).as_f32x16();
836        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
837    }
838}
839
840/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
846#[cfg_attr(test, assert_instr(vaddps))]
847pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848    unsafe {
849        let add = _mm512_add_ps(a, b).as_f32x16();
850        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
851    }
852}
853
854/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857#[inline]
858#[target_feature(enable = "avx512f,avx512vl")]
859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
860#[cfg_attr(test, assert_instr(vaddps))]
861pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862    unsafe {
863        let add = _mm256_add_ps(a, b).as_f32x8();
864        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
865    }
866}
867
868/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871#[inline]
872#[target_feature(enable = "avx512f,avx512vl")]
873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
874#[cfg_attr(test, assert_instr(vaddps))]
875pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876    unsafe {
877        let add = _mm256_add_ps(a, b).as_f32x8();
878        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
879    }
880}
881
882/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885#[inline]
886#[target_feature(enable = "avx512f,avx512vl")]
887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
888#[cfg_attr(test, assert_instr(vaddps))]
889pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890    unsafe {
891        let add = _mm_add_ps(a, b).as_f32x4();
892        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
893    }
894}
895
896/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899#[inline]
900#[target_feature(enable = "avx512f,avx512vl")]
901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
902#[cfg_attr(test, assert_instr(vaddps))]
903pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904    unsafe {
905        let add = _mm_add_ps(a, b).as_f32x4();
906        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
907    }
908}
909
910/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913#[inline]
914#[target_feature(enable = "avx512f")]
915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
916#[cfg_attr(test, assert_instr(vaddpd))]
917pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
919}
920
921/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924#[inline]
925#[target_feature(enable = "avx512f")]
926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
927#[cfg_attr(test, assert_instr(vaddpd))]
928pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929    unsafe {
930        let add = _mm512_add_pd(a, b).as_f64x8();
931        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
932    }
933}
934
935/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938#[inline]
939#[target_feature(enable = "avx512f")]
940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941#[cfg_attr(test, assert_instr(vaddpd))]
942pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943    unsafe {
944        let add = _mm512_add_pd(a, b).as_f64x8();
945        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
946    }
947}
948
949/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952#[inline]
953#[target_feature(enable = "avx512f,avx512vl")]
954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955#[cfg_attr(test, assert_instr(vaddpd))]
956pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957    unsafe {
958        let add = _mm256_add_pd(a, b).as_f64x4();
959        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
960    }
961}
962
963/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969#[cfg_attr(test, assert_instr(vaddpd))]
970pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971    unsafe {
972        let add = _mm256_add_pd(a, b).as_f64x4();
973        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
974    }
975}
976
977/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980#[inline]
981#[target_feature(enable = "avx512f,avx512vl")]
982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
983#[cfg_attr(test, assert_instr(vaddpd))]
984pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985    unsafe {
986        let add = _mm_add_pd(a, b).as_f64x2();
987        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
988    }
989}
990
991/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992///
993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994#[inline]
995#[target_feature(enable = "avx512f,avx512vl")]
996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
997#[cfg_attr(test, assert_instr(vaddpd))]
998pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999    unsafe {
1000        let add = _mm_add_pd(a, b).as_f64x2();
1001        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1002    }
1003}
1004
1005/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008#[inline]
1009#[target_feature(enable = "avx512f")]
1010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1011#[cfg_attr(test, assert_instr(vpsubd))]
1012pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1014}
1015
1016/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019#[inline]
1020#[target_feature(enable = "avx512f")]
1021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1022#[cfg_attr(test, assert_instr(vpsubd))]
1023pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024    unsafe {
1025        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1026        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1027    }
1028}
1029
1030/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033#[inline]
1034#[target_feature(enable = "avx512f")]
1035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1036#[cfg_attr(test, assert_instr(vpsubd))]
1037pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038    unsafe {
1039        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1040        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1041    }
1042}
1043
1044/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045///
1046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047#[inline]
1048#[target_feature(enable = "avx512f,avx512vl")]
1049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1050#[cfg_attr(test, assert_instr(vpsubd))]
1051pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052    unsafe {
1053        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1054        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1055    }
1056}
1057
1058/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059///
1060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061#[inline]
1062#[target_feature(enable = "avx512f,avx512vl")]
1063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1064#[cfg_attr(test, assert_instr(vpsubd))]
1065pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066    unsafe {
1067        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1068        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1069    }
1070}
1071
1072/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073///
1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075#[inline]
1076#[target_feature(enable = "avx512f,avx512vl")]
1077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1078#[cfg_attr(test, assert_instr(vpsubd))]
1079pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080    unsafe {
1081        let sub = _mm_sub_epi32(a, b).as_i32x4();
1082        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1083    }
1084}
1085
1086/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089#[inline]
1090#[target_feature(enable = "avx512f,avx512vl")]
1091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1092#[cfg_attr(test, assert_instr(vpsubd))]
1093pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094    unsafe {
1095        let sub = _mm_sub_epi32(a, b).as_i32x4();
1096        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1097    }
1098}
1099
1100/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103#[inline]
1104#[target_feature(enable = "avx512f")]
1105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1106#[cfg_attr(test, assert_instr(vpsubq))]
1107pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1109}
1110
1111/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114#[inline]
1115#[target_feature(enable = "avx512f")]
1116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1117#[cfg_attr(test, assert_instr(vpsubq))]
1118pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119    unsafe {
1120        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1122    }
1123}
1124
1125/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128#[inline]
1129#[target_feature(enable = "avx512f")]
1130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1131#[cfg_attr(test, assert_instr(vpsubq))]
1132pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133    unsafe {
1134        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1135        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1136    }
1137}
1138
1139/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142#[inline]
1143#[target_feature(enable = "avx512f,avx512vl")]
1144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1145#[cfg_attr(test, assert_instr(vpsubq))]
1146pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147    unsafe {
1148        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1149        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1150    }
1151}
1152
1153/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154///
1155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156#[inline]
1157#[target_feature(enable = "avx512f,avx512vl")]
1158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1159#[cfg_attr(test, assert_instr(vpsubq))]
1160pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161    unsafe {
1162        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1163        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1164    }
1165}
1166
1167/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170#[inline]
1171#[target_feature(enable = "avx512f,avx512vl")]
1172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1173#[cfg_attr(test, assert_instr(vpsubq))]
1174pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175    unsafe {
1176        let sub = _mm_sub_epi64(a, b).as_i64x2();
1177        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1178    }
1179}
1180
1181/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184#[inline]
1185#[target_feature(enable = "avx512f,avx512vl")]
1186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1187#[cfg_attr(test, assert_instr(vpsubq))]
1188pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189    unsafe {
1190        let sub = _mm_sub_epi64(a, b).as_i64x2();
1191        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1192    }
1193}
1194
1195/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196///
1197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198#[inline]
1199#[target_feature(enable = "avx512f")]
1200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1201#[cfg_attr(test, assert_instr(vsubps))]
1202pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1204}
1205
1206/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1212#[cfg_attr(test, assert_instr(vsubps))]
1213pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214    unsafe {
1215        let sub = _mm512_sub_ps(a, b).as_f32x16();
1216        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1217    }
1218}
1219
1220/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221///
1222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223#[inline]
1224#[target_feature(enable = "avx512f")]
1225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1226#[cfg_attr(test, assert_instr(vsubps))]
1227pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228    unsafe {
1229        let sub = _mm512_sub_ps(a, b).as_f32x16();
1230        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1231    }
1232}
1233
1234/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240#[cfg_attr(test, assert_instr(vsubps))]
1241pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242    unsafe {
1243        let sub = _mm256_sub_ps(a, b).as_f32x8();
1244        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1245    }
1246}
1247
1248/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251#[inline]
1252#[target_feature(enable = "avx512f,avx512vl")]
1253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1254#[cfg_attr(test, assert_instr(vsubps))]
1255pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256    unsafe {
1257        let sub = _mm256_sub_ps(a, b).as_f32x8();
1258        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1259    }
1260}
1261
1262/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265#[inline]
1266#[target_feature(enable = "avx512f,avx512vl")]
1267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1268#[cfg_attr(test, assert_instr(vsubps))]
1269pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270    unsafe {
1271        let sub = _mm_sub_ps(a, b).as_f32x4();
1272        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1273    }
1274}
1275
1276/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279#[inline]
1280#[target_feature(enable = "avx512f,avx512vl")]
1281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1282#[cfg_attr(test, assert_instr(vsubps))]
1283pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284    unsafe {
1285        let sub = _mm_sub_ps(a, b).as_f32x4();
1286        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1287    }
1288}
1289
1290/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293#[inline]
1294#[target_feature(enable = "avx512f")]
1295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1296#[cfg_attr(test, assert_instr(vsubpd))]
1297pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1299}
1300
1301/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302///
1303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304#[inline]
1305#[target_feature(enable = "avx512f")]
1306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1307#[cfg_attr(test, assert_instr(vsubpd))]
1308pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309    unsafe {
1310        let sub = _mm512_sub_pd(a, b).as_f64x8();
1311        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1312    }
1313}
1314
1315/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318#[inline]
1319#[target_feature(enable = "avx512f")]
1320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1321#[cfg_attr(test, assert_instr(vsubpd))]
1322pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323    unsafe {
1324        let sub = _mm512_sub_pd(a, b).as_f64x8();
1325        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1326    }
1327}
1328
1329/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332#[inline]
1333#[target_feature(enable = "avx512f,avx512vl")]
1334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1335#[cfg_attr(test, assert_instr(vsubpd))]
1336pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337    unsafe {
1338        let sub = _mm256_sub_pd(a, b).as_f64x4();
1339        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1340    }
1341}
1342
1343/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344///
1345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346#[inline]
1347#[target_feature(enable = "avx512f,avx512vl")]
1348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1349#[cfg_attr(test, assert_instr(vsubpd))]
1350pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351    unsafe {
1352        let sub = _mm256_sub_pd(a, b).as_f64x4();
1353        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1354    }
1355}
1356
1357/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360#[inline]
1361#[target_feature(enable = "avx512f,avx512vl")]
1362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1363#[cfg_attr(test, assert_instr(vsubpd))]
1364pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365    unsafe {
1366        let sub = _mm_sub_pd(a, b).as_f64x2();
1367        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1368    }
1369}
1370
1371/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374#[inline]
1375#[target_feature(enable = "avx512f,avx512vl")]
1376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1377#[cfg_attr(test, assert_instr(vsubpd))]
1378pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379    unsafe {
1380        let sub = _mm_sub_pd(a, b).as_f64x2();
1381        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1382    }
1383}
1384
1385/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386///
1387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1391#[cfg_attr(test, assert_instr(vpmuldq))]
1392pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393    unsafe {
1394        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396        transmute(simd_mul(a, b))
1397    }
1398}
1399
1400/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401///
1402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1406#[cfg_attr(test, assert_instr(vpmuldq))]
1407pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408    unsafe {
1409        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1410        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1411    }
1412}
1413
1414/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417#[inline]
1418#[target_feature(enable = "avx512f")]
1419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420#[cfg_attr(test, assert_instr(vpmuldq))]
1421pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422    unsafe {
1423        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1424        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1425    }
1426}
1427
1428/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1434#[cfg_attr(test, assert_instr(vpmuldq))]
1435pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436    unsafe {
1437        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1438        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1439    }
1440}
1441
1442/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443///
1444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445#[inline]
1446#[target_feature(enable = "avx512f,avx512vl")]
1447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1448#[cfg_attr(test, assert_instr(vpmuldq))]
1449pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450    unsafe {
1451        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1452        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1453    }
1454}
1455
1456/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459#[inline]
1460#[target_feature(enable = "avx512f,avx512vl")]
1461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1462#[cfg_attr(test, assert_instr(vpmuldq))]
1463pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464    unsafe {
1465        let mul = _mm_mul_epi32(a, b).as_i64x2();
1466        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1467    }
1468}
1469
1470/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1476#[cfg_attr(test, assert_instr(vpmuldq))]
1477pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478    unsafe {
1479        let mul = _mm_mul_epi32(a, b).as_i64x2();
1480        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1481    }
1482}
1483
1484/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485///
1486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487#[inline]
1488#[target_feature(enable = "avx512f")]
1489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1490#[cfg_attr(test, assert_instr(vpmulld))]
1491pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1493}
1494
1495/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496///
1497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498#[inline]
1499#[target_feature(enable = "avx512f")]
1500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1501#[cfg_attr(test, assert_instr(vpmulld))]
1502pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503    unsafe {
1504        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1505        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1506    }
1507}
1508
1509/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512#[inline]
1513#[target_feature(enable = "avx512f")]
1514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1515#[cfg_attr(test, assert_instr(vpmulld))]
1516pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517    unsafe {
1518        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1519        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1520    }
1521}
1522
1523/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526#[inline]
1527#[target_feature(enable = "avx512f,avx512vl")]
1528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1529#[cfg_attr(test, assert_instr(vpmulld))]
1530pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531    unsafe {
1532        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1533        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1534    }
1535}
1536
1537/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540#[inline]
1541#[target_feature(enable = "avx512f,avx512vl")]
1542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1543#[cfg_attr(test, assert_instr(vpmulld))]
1544pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545    unsafe {
1546        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1547        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1548    }
1549}
1550
1551/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554#[inline]
1555#[target_feature(enable = "avx512f,avx512vl")]
1556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1557#[cfg_attr(test, assert_instr(vpmulld))]
1558pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559    unsafe {
1560        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1561        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1562    }
1563}
1564
1565/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566///
1567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568#[inline]
1569#[target_feature(enable = "avx512f,avx512vl")]
1570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1571#[cfg_attr(test, assert_instr(vpmulld))]
1572pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573    unsafe {
1574        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1575        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1576    }
1577}
1578
1579/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580///
1581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582///
1583/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584#[inline]
1585#[target_feature(enable = "avx512f")]
1586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1587pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1589}
1590
1591/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594///
1595/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596#[inline]
1597#[target_feature(enable = "avx512f")]
1598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1599pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600    unsafe {
1601        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1602        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1603    }
1604}
1605
1606/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609#[inline]
1610#[target_feature(enable = "avx512f")]
1611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1612#[cfg_attr(test, assert_instr(vpmuludq))]
1613pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614    unsafe {
1615        let a = a.as_u64x8();
1616        let b = b.as_u64x8();
1617        let mask = u64x8::splat(u32::MAX.into());
1618        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1619    }
1620}
1621
1622/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625#[inline]
1626#[target_feature(enable = "avx512f")]
1627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1628#[cfg_attr(test, assert_instr(vpmuludq))]
1629pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630    unsafe {
1631        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1632        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1633    }
1634}
1635
1636/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637///
1638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639#[inline]
1640#[target_feature(enable = "avx512f")]
1641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1642#[cfg_attr(test, assert_instr(vpmuludq))]
1643pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644    unsafe {
1645        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1646        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1647    }
1648}
1649
1650/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653#[inline]
1654#[target_feature(enable = "avx512f,avx512vl")]
1655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1656#[cfg_attr(test, assert_instr(vpmuludq))]
1657pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658    unsafe {
1659        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1660        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1661    }
1662}
1663
1664/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667#[inline]
1668#[target_feature(enable = "avx512f,avx512vl")]
1669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1670#[cfg_attr(test, assert_instr(vpmuludq))]
1671pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672    unsafe {
1673        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1674        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1675    }
1676}
1677
1678/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681#[inline]
1682#[target_feature(enable = "avx512f,avx512vl")]
1683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1684#[cfg_attr(test, assert_instr(vpmuludq))]
1685pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686    unsafe {
1687        let mul = _mm_mul_epu32(a, b).as_u64x2();
1688        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1689    }
1690}
1691
1692/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695#[inline]
1696#[target_feature(enable = "avx512f,avx512vl")]
1697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1698#[cfg_attr(test, assert_instr(vpmuludq))]
1699pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700    unsafe {
1701        let mul = _mm_mul_epu32(a, b).as_u64x2();
1702        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1703    }
1704}
1705
1706/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709#[inline]
1710#[target_feature(enable = "avx512f")]
1711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1712#[cfg_attr(test, assert_instr(vmulps))]
1713pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1715}
1716
1717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720#[inline]
1721#[target_feature(enable = "avx512f")]
1722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1723#[cfg_attr(test, assert_instr(vmulps))]
1724pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725    unsafe {
1726        let mul = _mm512_mul_ps(a, b).as_f32x16();
1727        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1728    }
1729}
1730
1731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1737#[cfg_attr(test, assert_instr(vmulps))]
1738pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739    unsafe {
1740        let mul = _mm512_mul_ps(a, b).as_f32x16();
1741        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1742    }
1743}
1744
1745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746///
1747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748#[inline]
1749#[target_feature(enable = "avx512f,avx512vl")]
1750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1751#[cfg_attr(test, assert_instr(vmulps))]
1752pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753    unsafe {
1754        let mul = _mm256_mul_ps(a, b).as_f32x8();
1755        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1756    }
1757}
1758
1759/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760///
1761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762#[inline]
1763#[target_feature(enable = "avx512f,avx512vl")]
1764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1765#[cfg_attr(test, assert_instr(vmulps))]
1766pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767    unsafe {
1768        let mul = _mm256_mul_ps(a, b).as_f32x8();
1769        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1770    }
1771}
1772
1773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776#[inline]
1777#[target_feature(enable = "avx512f,avx512vl")]
1778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1779#[cfg_attr(test, assert_instr(vmulps))]
1780pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781    unsafe {
1782        let mul = _mm_mul_ps(a, b).as_f32x4();
1783        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1784    }
1785}
1786
1787/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1793#[cfg_attr(test, assert_instr(vmulps))]
1794pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795    unsafe {
1796        let mul = _mm_mul_ps(a, b).as_f32x4();
1797        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1798    }
1799}
1800
1801/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802///
1803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804#[inline]
1805#[target_feature(enable = "avx512f")]
1806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1807#[cfg_attr(test, assert_instr(vmulpd))]
1808pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1810}
1811
1812/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815#[inline]
1816#[target_feature(enable = "avx512f")]
1817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1818#[cfg_attr(test, assert_instr(vmulpd))]
1819pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820    unsafe {
1821        let mul = _mm512_mul_pd(a, b).as_f64x8();
1822        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1823    }
1824}
1825
1826/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1832#[cfg_attr(test, assert_instr(vmulpd))]
1833pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834    unsafe {
1835        let mul = _mm512_mul_pd(a, b).as_f64x8();
1836        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1837    }
1838}
1839
1840/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843#[inline]
1844#[target_feature(enable = "avx512f,avx512vl")]
1845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1846#[cfg_attr(test, assert_instr(vmulpd))]
1847pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848    unsafe {
1849        let mul = _mm256_mul_pd(a, b).as_f64x4();
1850        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1851    }
1852}
1853
1854/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857#[inline]
1858#[target_feature(enable = "avx512f,avx512vl")]
1859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1860#[cfg_attr(test, assert_instr(vmulpd))]
1861pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862    unsafe {
1863        let mul = _mm256_mul_pd(a, b).as_f64x4();
1864        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
1865    }
1866}
1867
1868/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871#[inline]
1872#[target_feature(enable = "avx512f,avx512vl")]
1873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1874#[cfg_attr(test, assert_instr(vmulpd))]
1875pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876    unsafe {
1877        let mul = _mm_mul_pd(a, b).as_f64x2();
1878        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
1879    }
1880}
1881
1882/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885#[inline]
1886#[target_feature(enable = "avx512f,avx512vl")]
1887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1888#[cfg_attr(test, assert_instr(vmulpd))]
1889pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890    unsafe {
1891        let mul = _mm_mul_pd(a, b).as_f64x2();
1892        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
1893    }
1894}
1895
1896/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899#[inline]
1900#[target_feature(enable = "avx512f")]
1901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1902#[cfg_attr(test, assert_instr(vdivps))]
1903pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
1905}
1906
1907/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908///
1909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910#[inline]
1911#[target_feature(enable = "avx512f")]
1912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1913#[cfg_attr(test, assert_instr(vdivps))]
1914pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915    unsafe {
1916        let div = _mm512_div_ps(a, b).as_f32x16();
1917        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
1918    }
1919}
1920
1921/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924#[inline]
1925#[target_feature(enable = "avx512f")]
1926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1927#[cfg_attr(test, assert_instr(vdivps))]
1928pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929    unsafe {
1930        let div = _mm512_div_ps(a, b).as_f32x16();
1931        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
1932    }
1933}
1934
1935/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938#[inline]
1939#[target_feature(enable = "avx512f,avx512vl")]
1940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1941#[cfg_attr(test, assert_instr(vdivps))]
1942pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943    unsafe {
1944        let div = _mm256_div_ps(a, b).as_f32x8();
1945        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
1946    }
1947}
1948
1949/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955#[cfg_attr(test, assert_instr(vdivps))]
1956pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957    unsafe {
1958        let div = _mm256_div_ps(a, b).as_f32x8();
1959        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
1960    }
1961}
1962
1963/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966#[inline]
1967#[target_feature(enable = "avx512f,avx512vl")]
1968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1969#[cfg_attr(test, assert_instr(vdivps))]
1970pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971    unsafe {
1972        let div = _mm_div_ps(a, b).as_f32x4();
1973        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
1974    }
1975}
1976
1977/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978///
1979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980#[inline]
1981#[target_feature(enable = "avx512f,avx512vl")]
1982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1983#[cfg_attr(test, assert_instr(vdivps))]
1984pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985    unsafe {
1986        let div = _mm_div_ps(a, b).as_f32x4();
1987        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
1988    }
1989}
1990
1991/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992///
1993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994#[inline]
1995#[target_feature(enable = "avx512f")]
1996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1997#[cfg_attr(test, assert_instr(vdivpd))]
1998pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2000}
2001
2002/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003///
2004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005#[inline]
2006#[target_feature(enable = "avx512f")]
2007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2008#[cfg_attr(test, assert_instr(vdivpd))]
2009pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010    unsafe {
2011        let div = _mm512_div_pd(a, b).as_f64x8();
2012        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2013    }
2014}
2015
2016/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019#[inline]
2020#[target_feature(enable = "avx512f")]
2021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2022#[cfg_attr(test, assert_instr(vdivpd))]
2023pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024    unsafe {
2025        let div = _mm512_div_pd(a, b).as_f64x8();
2026        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2027    }
2028}
2029
2030/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033#[inline]
2034#[target_feature(enable = "avx512f,avx512vl")]
2035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2036#[cfg_attr(test, assert_instr(vdivpd))]
2037pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038    unsafe {
2039        let div = _mm256_div_pd(a, b).as_f64x4();
2040        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2041    }
2042}
2043
2044/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045///
2046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047#[inline]
2048#[target_feature(enable = "avx512f,avx512vl")]
2049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2050#[cfg_attr(test, assert_instr(vdivpd))]
2051pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052    unsafe {
2053        let div = _mm256_div_pd(a, b).as_f64x4();
2054        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2055    }
2056}
2057
2058/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061#[inline]
2062#[target_feature(enable = "avx512f,avx512vl")]
2063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2064#[cfg_attr(test, assert_instr(vdivpd))]
2065pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066    unsafe {
2067        let div = _mm_div_pd(a, b).as_f64x2();
2068        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2069    }
2070}
2071
2072/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075#[inline]
2076#[target_feature(enable = "avx512f,avx512vl")]
2077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2078#[cfg_attr(test, assert_instr(vdivpd))]
2079pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080    unsafe {
2081        let div = _mm_div_pd(a, b).as_f64x2();
2082        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2083    }
2084}
2085
2086/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089#[inline]
2090#[target_feature(enable = "avx512f")]
2091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2092#[cfg_attr(test, assert_instr(vpmaxsd))]
2093pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094    unsafe {
2095        let a = a.as_i32x16();
2096        let b = b.as_i32x16();
2097        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2098    }
2099}
2100
2101/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104#[inline]
2105#[target_feature(enable = "avx512f")]
2106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2107#[cfg_attr(test, assert_instr(vpmaxsd))]
2108pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109    unsafe {
2110        let max = _mm512_max_epi32(a, b).as_i32x16();
2111        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2112    }
2113}
2114
2115/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118#[inline]
2119#[target_feature(enable = "avx512f")]
2120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2121#[cfg_attr(test, assert_instr(vpmaxsd))]
2122pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123    unsafe {
2124        let max = _mm512_max_epi32(a, b).as_i32x16();
2125        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2126    }
2127}
2128
2129/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132#[inline]
2133#[target_feature(enable = "avx512f,avx512vl")]
2134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2135#[cfg_attr(test, assert_instr(vpmaxsd))]
2136pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137    unsafe {
2138        let max = _mm256_max_epi32(a, b).as_i32x8();
2139        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2140    }
2141}
2142
2143/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146#[inline]
2147#[target_feature(enable = "avx512f,avx512vl")]
2148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2149#[cfg_attr(test, assert_instr(vpmaxsd))]
2150pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151    unsafe {
2152        let max = _mm256_max_epi32(a, b).as_i32x8();
2153        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2154    }
2155}
2156
2157/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160#[inline]
2161#[target_feature(enable = "avx512f,avx512vl")]
2162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2163#[cfg_attr(test, assert_instr(vpmaxsd))]
2164pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165    unsafe {
2166        let max = _mm_max_epi32(a, b).as_i32x4();
2167        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2168    }
2169}
2170
2171/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174#[inline]
2175#[target_feature(enable = "avx512f,avx512vl")]
2176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2177#[cfg_attr(test, assert_instr(vpmaxsd))]
2178pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179    unsafe {
2180        let max = _mm_max_epi32(a, b).as_i32x4();
2181        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2182    }
2183}
2184
2185/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191#[cfg_attr(test, assert_instr(vpmaxsq))]
2192pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193    unsafe {
2194        let a = a.as_i64x8();
2195        let b = b.as_i64x8();
2196        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2197    }
2198}
2199
2200/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203#[inline]
2204#[target_feature(enable = "avx512f")]
2205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2206#[cfg_attr(test, assert_instr(vpmaxsq))]
2207pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208    unsafe {
2209        let max = _mm512_max_epi64(a, b).as_i64x8();
2210        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2211    }
2212}
2213
2214/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2220#[cfg_attr(test, assert_instr(vpmaxsq))]
2221pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222    unsafe {
2223        let max = _mm512_max_epi64(a, b).as_i64x8();
2224        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2225    }
2226}
2227
2228/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231#[inline]
2232#[target_feature(enable = "avx512f,avx512vl")]
2233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2234#[cfg_attr(test, assert_instr(vpmaxsq))]
2235pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236    unsafe {
2237        let a = a.as_i64x4();
2238        let b = b.as_i64x4();
2239        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2240    }
2241}
2242
2243/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246#[inline]
2247#[target_feature(enable = "avx512f,avx512vl")]
2248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2249#[cfg_attr(test, assert_instr(vpmaxsq))]
2250pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251    unsafe {
2252        let max = _mm256_max_epi64(a, b).as_i64x4();
2253        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2254    }
2255}
2256
2257/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260#[inline]
2261#[target_feature(enable = "avx512f,avx512vl")]
2262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263#[cfg_attr(test, assert_instr(vpmaxsq))]
2264pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265    unsafe {
2266        let max = _mm256_max_epi64(a, b).as_i64x4();
2267        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2268    }
2269}
2270
2271/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274#[inline]
2275#[target_feature(enable = "avx512f,avx512vl")]
2276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2277#[cfg_attr(test, assert_instr(vpmaxsq))]
2278pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279    unsafe {
2280        let a = a.as_i64x2();
2281        let b = b.as_i64x2();
2282        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2283    }
2284}
2285
2286/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289#[inline]
2290#[target_feature(enable = "avx512f,avx512vl")]
2291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2292#[cfg_attr(test, assert_instr(vpmaxsq))]
2293pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294    unsafe {
2295        let max = _mm_max_epi64(a, b).as_i64x2();
2296        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2297    }
2298}
2299
2300/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303#[inline]
2304#[target_feature(enable = "avx512f,avx512vl")]
2305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2306#[cfg_attr(test, assert_instr(vpmaxsq))]
2307pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308    unsafe {
2309        let max = _mm_max_epi64(a, b).as_i64x2();
2310        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2311    }
2312}
2313
2314/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317#[inline]
2318#[target_feature(enable = "avx512f")]
2319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2320#[cfg_attr(test, assert_instr(vmaxps))]
2321pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322    unsafe {
2323        transmute(vmaxps(
2324            a.as_f32x16(),
2325            b.as_f32x16(),
2326            _MM_FROUND_CUR_DIRECTION,
2327        ))
2328    }
2329}
2330
2331/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334#[inline]
2335#[target_feature(enable = "avx512f")]
2336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2337#[cfg_attr(test, assert_instr(vmaxps))]
2338pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339    unsafe {
2340        let max = _mm512_max_ps(a, b).as_f32x16();
2341        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2342    }
2343}
2344
2345/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348#[inline]
2349#[target_feature(enable = "avx512f")]
2350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2351#[cfg_attr(test, assert_instr(vmaxps))]
2352pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353    unsafe {
2354        let max = _mm512_max_ps(a, b).as_f32x16();
2355        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2356    }
2357}
2358
2359/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362#[inline]
2363#[target_feature(enable = "avx512f,avx512vl")]
2364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2365#[cfg_attr(test, assert_instr(vmaxps))]
2366pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367    unsafe {
2368        let max = _mm256_max_ps(a, b).as_f32x8();
2369        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2370    }
2371}
2372
2373/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2379#[cfg_attr(test, assert_instr(vmaxps))]
2380pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381    unsafe {
2382        let max = _mm256_max_ps(a, b).as_f32x8();
2383        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2384    }
2385}
2386
2387/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390#[inline]
2391#[target_feature(enable = "avx512f,avx512vl")]
2392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2393#[cfg_attr(test, assert_instr(vmaxps))]
2394pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395    unsafe {
2396        let max = _mm_max_ps(a, b).as_f32x4();
2397        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2398    }
2399}
2400
2401/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402///
2403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404#[inline]
2405#[target_feature(enable = "avx512f,avx512vl")]
2406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2407#[cfg_attr(test, assert_instr(vmaxps))]
2408pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409    unsafe {
2410        let max = _mm_max_ps(a, b).as_f32x4();
2411        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2412    }
2413}
2414
2415/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418#[inline]
2419#[target_feature(enable = "avx512f")]
2420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2421#[cfg_attr(test, assert_instr(vmaxpd))]
2422pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424}
2425
2426/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429#[inline]
2430#[target_feature(enable = "avx512f")]
2431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2432#[cfg_attr(test, assert_instr(vmaxpd))]
2433pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434    unsafe {
2435        let max = _mm512_max_pd(a, b).as_f64x8();
2436        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2437    }
2438}
2439
2440/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443#[inline]
2444#[target_feature(enable = "avx512f")]
2445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2446#[cfg_attr(test, assert_instr(vmaxpd))]
2447pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448    unsafe {
2449        let max = _mm512_max_pd(a, b).as_f64x8();
2450        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2451    }
2452}
2453
2454/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457#[inline]
2458#[target_feature(enable = "avx512f,avx512vl")]
2459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2460#[cfg_attr(test, assert_instr(vmaxpd))]
2461pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462    unsafe {
2463        let max = _mm256_max_pd(a, b).as_f64x4();
2464        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2465    }
2466}
2467
2468/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471#[inline]
2472#[target_feature(enable = "avx512f,avx512vl")]
2473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2474#[cfg_attr(test, assert_instr(vmaxpd))]
2475pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476    unsafe {
2477        let max = _mm256_max_pd(a, b).as_f64x4();
2478        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2479    }
2480}
2481
2482/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485#[inline]
2486#[target_feature(enable = "avx512f,avx512vl")]
2487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2488#[cfg_attr(test, assert_instr(vmaxpd))]
2489pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490    unsafe {
2491        let max = _mm_max_pd(a, b).as_f64x2();
2492        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2493    }
2494}
2495
2496/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499#[inline]
2500#[target_feature(enable = "avx512f,avx512vl")]
2501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2502#[cfg_attr(test, assert_instr(vmaxpd))]
2503pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504    unsafe {
2505        let max = _mm_max_pd(a, b).as_f64x2();
2506        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2507    }
2508}
2509
2510/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513#[inline]
2514#[target_feature(enable = "avx512f")]
2515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2516#[cfg_attr(test, assert_instr(vpmaxud))]
2517pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518    unsafe {
2519        let a = a.as_u32x16();
2520        let b = b.as_u32x16();
2521        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2522    }
2523}
2524
2525/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526///
2527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528#[inline]
2529#[target_feature(enable = "avx512f")]
2530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2531#[cfg_attr(test, assert_instr(vpmaxud))]
2532pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533    unsafe {
2534        let max = _mm512_max_epu32(a, b).as_u32x16();
2535        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2536    }
2537}
2538
2539/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542#[inline]
2543#[target_feature(enable = "avx512f")]
2544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2545#[cfg_attr(test, assert_instr(vpmaxud))]
2546pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547    unsafe {
2548        let max = _mm512_max_epu32(a, b).as_u32x16();
2549        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2550    }
2551}
2552
2553/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554///
2555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556#[inline]
2557#[target_feature(enable = "avx512f,avx512vl")]
2558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2559#[cfg_attr(test, assert_instr(vpmaxud))]
2560pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561    unsafe {
2562        let max = _mm256_max_epu32(a, b).as_u32x8();
2563        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2564    }
2565}
2566
2567/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570#[inline]
2571#[target_feature(enable = "avx512f,avx512vl")]
2572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2573#[cfg_attr(test, assert_instr(vpmaxud))]
2574pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575    unsafe {
2576        let max = _mm256_max_epu32(a, b).as_u32x8();
2577        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2578    }
2579}
2580
2581/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2587#[cfg_attr(test, assert_instr(vpmaxud))]
2588pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589    unsafe {
2590        let max = _mm_max_epu32(a, b).as_u32x4();
2591        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2592    }
2593}
2594
2595/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598#[inline]
2599#[target_feature(enable = "avx512f,avx512vl")]
2600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2601#[cfg_attr(test, assert_instr(vpmaxud))]
2602pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603    unsafe {
2604        let max = _mm_max_epu32(a, b).as_u32x4();
2605        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2606    }
2607}
2608
2609/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612#[inline]
2613#[target_feature(enable = "avx512f")]
2614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2615#[cfg_attr(test, assert_instr(vpmaxuq))]
2616pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617    unsafe {
2618        let a = a.as_u64x8();
2619        let b = b.as_u64x8();
2620        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2621    }
2622}
2623
2624/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625///
2626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627#[inline]
2628#[target_feature(enable = "avx512f")]
2629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2630#[cfg_attr(test, assert_instr(vpmaxuq))]
2631pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632    unsafe {
2633        let max = _mm512_max_epu64(a, b).as_u64x8();
2634        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2635    }
2636}
2637
2638/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2644#[cfg_attr(test, assert_instr(vpmaxuq))]
2645pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646    unsafe {
2647        let max = _mm512_max_epu64(a, b).as_u64x8();
2648        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2649    }
2650}
2651
2652/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655#[inline]
2656#[target_feature(enable = "avx512f,avx512vl")]
2657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2658#[cfg_attr(test, assert_instr(vpmaxuq))]
2659pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660    unsafe {
2661        let a = a.as_u64x4();
2662        let b = b.as_u64x4();
2663        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2664    }
2665}
2666
2667/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668///
2669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670#[inline]
2671#[target_feature(enable = "avx512f,avx512vl")]
2672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2673#[cfg_attr(test, assert_instr(vpmaxuq))]
2674pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675    unsafe {
2676        let max = _mm256_max_epu64(a, b).as_u64x4();
2677        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2678    }
2679}
2680
2681/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682///
2683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684#[inline]
2685#[target_feature(enable = "avx512f,avx512vl")]
2686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2687#[cfg_attr(test, assert_instr(vpmaxuq))]
2688pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689    unsafe {
2690        let max = _mm256_max_epu64(a, b).as_u64x4();
2691        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2692    }
2693}
2694
2695/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698#[inline]
2699#[target_feature(enable = "avx512f,avx512vl")]
2700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2701#[cfg_attr(test, assert_instr(vpmaxuq))]
2702pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703    unsafe {
2704        let a = a.as_u64x2();
2705        let b = b.as_u64x2();
2706        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2707    }
2708}
2709
2710/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713#[inline]
2714#[target_feature(enable = "avx512f,avx512vl")]
2715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2716#[cfg_attr(test, assert_instr(vpmaxuq))]
2717pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718    unsafe {
2719        let max = _mm_max_epu64(a, b).as_u64x2();
2720        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2721    }
2722}
2723
2724/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727#[inline]
2728#[target_feature(enable = "avx512f,avx512vl")]
2729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2730#[cfg_attr(test, assert_instr(vpmaxuq))]
2731pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732    unsafe {
2733        let max = _mm_max_epu64(a, b).as_u64x2();
2734        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2735    }
2736}
2737
2738/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744#[cfg_attr(test, assert_instr(vpminsd))]
2745pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746    unsafe {
2747        let a = a.as_i32x16();
2748        let b = b.as_i32x16();
2749        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
2750    }
2751}
2752
2753/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756#[inline]
2757#[target_feature(enable = "avx512f")]
2758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2759#[cfg_attr(test, assert_instr(vpminsd))]
2760pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761    unsafe {
2762        let min = _mm512_min_epi32(a, b).as_i32x16();
2763        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2764    }
2765}
2766
2767/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770#[inline]
2771#[target_feature(enable = "avx512f")]
2772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2773#[cfg_attr(test, assert_instr(vpminsd))]
2774pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775    unsafe {
2776        let min = _mm512_min_epi32(a, b).as_i32x16();
2777        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2778    }
2779}
2780
2781/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782///
2783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784#[inline]
2785#[target_feature(enable = "avx512f,avx512vl")]
2786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2787#[cfg_attr(test, assert_instr(vpminsd))]
2788pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789    unsafe {
2790        let min = _mm256_min_epi32(a, b).as_i32x8();
2791        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2792    }
2793}
2794
2795/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798#[inline]
2799#[target_feature(enable = "avx512f,avx512vl")]
2800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2801#[cfg_attr(test, assert_instr(vpminsd))]
2802pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803    unsafe {
2804        let min = _mm256_min_epi32(a, b).as_i32x8();
2805        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2806    }
2807}
2808
2809/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810///
2811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812#[inline]
2813#[target_feature(enable = "avx512f,avx512vl")]
2814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2815#[cfg_attr(test, assert_instr(vpminsd))]
2816pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817    unsafe {
2818        let min = _mm_min_epi32(a, b).as_i32x4();
2819        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2820    }
2821}
2822
2823/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826#[inline]
2827#[target_feature(enable = "avx512f,avx512vl")]
2828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2829#[cfg_attr(test, assert_instr(vpminsd))]
2830pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831    unsafe {
2832        let min = _mm_min_epi32(a, b).as_i32x4();
2833        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2834    }
2835}
2836
2837/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840#[inline]
2841#[target_feature(enable = "avx512f")]
2842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843#[cfg_attr(test, assert_instr(vpminsq))]
2844pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845    unsafe {
2846        let a = a.as_i64x8();
2847        let b = b.as_i64x8();
2848        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
2849    }
2850}
2851
2852/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853///
2854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855#[inline]
2856#[target_feature(enable = "avx512f")]
2857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2858#[cfg_attr(test, assert_instr(vpminsq))]
2859pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860    unsafe {
2861        let min = _mm512_min_epi64(a, b).as_i64x8();
2862        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
2863    }
2864}
2865
2866/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867///
2868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869#[inline]
2870#[target_feature(enable = "avx512f")]
2871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2872#[cfg_attr(test, assert_instr(vpminsq))]
2873pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874    unsafe {
2875        let min = _mm512_min_epi64(a, b).as_i64x8();
2876        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
2877    }
2878}
2879
2880/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881///
2882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883#[inline]
2884#[target_feature(enable = "avx512f,avx512vl")]
2885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2886#[cfg_attr(test, assert_instr(vpminsq))]
2887pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888    unsafe {
2889        let a = a.as_i64x4();
2890        let b = b.as_i64x4();
2891        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
2892    }
2893}
2894
2895/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898#[inline]
2899#[target_feature(enable = "avx512f,avx512vl")]
2900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2901#[cfg_attr(test, assert_instr(vpminsq))]
2902pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903    unsafe {
2904        let min = _mm256_min_epi64(a, b).as_i64x4();
2905        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
2906    }
2907}
2908
2909/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912#[inline]
2913#[target_feature(enable = "avx512f,avx512vl")]
2914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2915#[cfg_attr(test, assert_instr(vpminsq))]
2916pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917    unsafe {
2918        let min = _mm256_min_epi64(a, b).as_i64x4();
2919        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
2920    }
2921}
2922
2923/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2929#[cfg_attr(test, assert_instr(vpminsq))]
2930pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931    unsafe {
2932        let a = a.as_i64x2();
2933        let b = b.as_i64x2();
2934        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
2935    }
2936}
2937
2938/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941#[inline]
2942#[target_feature(enable = "avx512f,avx512vl")]
2943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2944#[cfg_attr(test, assert_instr(vpminsq))]
2945pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946    unsafe {
2947        let min = _mm_min_epi64(a, b).as_i64x2();
2948        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
2949    }
2950}
2951
2952/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953///
2954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955#[inline]
2956#[target_feature(enable = "avx512f,avx512vl")]
2957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2958#[cfg_attr(test, assert_instr(vpminsq))]
2959pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960    unsafe {
2961        let min = _mm_min_epi64(a, b).as_i64x2();
2962        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
2963    }
2964}
2965
2966/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969#[inline]
2970#[target_feature(enable = "avx512f")]
2971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2972#[cfg_attr(test, assert_instr(vminps))]
2973pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974    unsafe {
2975        transmute(vminps(
2976            a.as_f32x16(),
2977            b.as_f32x16(),
2978            _MM_FROUND_CUR_DIRECTION,
2979        ))
2980    }
2981}
2982
2983/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986#[inline]
2987#[target_feature(enable = "avx512f")]
2988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2989#[cfg_attr(test, assert_instr(vminps))]
2990pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991    unsafe {
2992        let min = _mm512_min_ps(a, b).as_f32x16();
2993        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
2994    }
2995}
2996
2997/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000#[inline]
3001#[target_feature(enable = "avx512f")]
3002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3003#[cfg_attr(test, assert_instr(vminps))]
3004pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005    unsafe {
3006        let min = _mm512_min_ps(a, b).as_f32x16();
3007        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3008    }
3009}
3010
3011/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012///
3013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014#[inline]
3015#[target_feature(enable = "avx512f,avx512vl")]
3016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3017#[cfg_attr(test, assert_instr(vminps))]
3018pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019    unsafe {
3020        let min = _mm256_min_ps(a, b).as_f32x8();
3021        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3022    }
3023}
3024
3025/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3031#[cfg_attr(test, assert_instr(vminps))]
3032pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033    unsafe {
3034        let min = _mm256_min_ps(a, b).as_f32x8();
3035        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3036    }
3037}
3038
3039/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042#[inline]
3043#[target_feature(enable = "avx512f,avx512vl")]
3044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3045#[cfg_attr(test, assert_instr(vminps))]
3046pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047    unsafe {
3048        let min = _mm_min_ps(a, b).as_f32x4();
3049        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3050    }
3051}
3052
3053/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056#[inline]
3057#[target_feature(enable = "avx512f,avx512vl")]
3058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3059#[cfg_attr(test, assert_instr(vminps))]
3060pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061    unsafe {
3062        let min = _mm_min_ps(a, b).as_f32x4();
3063        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3064    }
3065}
3066
3067/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068///
3069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070#[inline]
3071#[target_feature(enable = "avx512f")]
3072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3073#[cfg_attr(test, assert_instr(vminpd))]
3074pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076}
3077
3078/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079///
3080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081#[inline]
3082#[target_feature(enable = "avx512f")]
3083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3084#[cfg_attr(test, assert_instr(vminpd))]
3085pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086    unsafe {
3087        let min = _mm512_min_pd(a, b).as_f64x8();
3088        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3089    }
3090}
3091
3092/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3098#[cfg_attr(test, assert_instr(vminpd))]
3099pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100    unsafe {
3101        let min = _mm512_min_pd(a, b).as_f64x8();
3102        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3103    }
3104}
3105
3106/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107///
3108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109#[inline]
3110#[target_feature(enable = "avx512f,avx512vl")]
3111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3112#[cfg_attr(test, assert_instr(vminpd))]
3113pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114    unsafe {
3115        let min = _mm256_min_pd(a, b).as_f64x4();
3116        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3117    }
3118}
3119
3120/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121///
3122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123#[inline]
3124#[target_feature(enable = "avx512f,avx512vl")]
3125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3126#[cfg_attr(test, assert_instr(vminpd))]
3127pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128    unsafe {
3129        let min = _mm256_min_pd(a, b).as_f64x4();
3130        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3131    }
3132}
3133
3134/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137#[inline]
3138#[target_feature(enable = "avx512f,avx512vl")]
3139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3140#[cfg_attr(test, assert_instr(vminpd))]
3141pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142    unsafe {
3143        let min = _mm_min_pd(a, b).as_f64x2();
3144        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3145    }
3146}
3147
3148/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151#[inline]
3152#[target_feature(enable = "avx512f,avx512vl")]
3153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3154#[cfg_attr(test, assert_instr(vminpd))]
3155pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156    unsafe {
3157        let min = _mm_min_pd(a, b).as_f64x2();
3158        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3159    }
3160}
3161
3162/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165#[inline]
3166#[target_feature(enable = "avx512f")]
3167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3168#[cfg_attr(test, assert_instr(vpminud))]
3169pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170    unsafe {
3171        let a = a.as_u32x16();
3172        let b = b.as_u32x16();
3173        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
3174    }
3175}
3176
3177/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178///
3179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180#[inline]
3181#[target_feature(enable = "avx512f")]
3182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3183#[cfg_attr(test, assert_instr(vpminud))]
3184pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185    unsafe {
3186        let min = _mm512_min_epu32(a, b).as_u32x16();
3187        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3188    }
3189}
3190
3191/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194#[inline]
3195#[target_feature(enable = "avx512f")]
3196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3197#[cfg_attr(test, assert_instr(vpminud))]
3198pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199    unsafe {
3200        let min = _mm512_min_epu32(a, b).as_u32x16();
3201        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3202    }
3203}
3204
3205/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208#[inline]
3209#[target_feature(enable = "avx512f,avx512vl")]
3210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3211#[cfg_attr(test, assert_instr(vpminud))]
3212pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213    unsafe {
3214        let min = _mm256_min_epu32(a, b).as_u32x8();
3215        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3216    }
3217}
3218
3219/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222#[inline]
3223#[target_feature(enable = "avx512f,avx512vl")]
3224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3225#[cfg_attr(test, assert_instr(vpminud))]
3226pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227    unsafe {
3228        let min = _mm256_min_epu32(a, b).as_u32x8();
3229        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3230    }
3231}
3232
3233/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236#[inline]
3237#[target_feature(enable = "avx512f,avx512vl")]
3238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3239#[cfg_attr(test, assert_instr(vpminud))]
3240pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241    unsafe {
3242        let min = _mm_min_epu32(a, b).as_u32x4();
3243        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3244    }
3245}
3246
3247/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250#[inline]
3251#[target_feature(enable = "avx512f,avx512vl")]
3252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3253#[cfg_attr(test, assert_instr(vpminud))]
3254pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255    unsafe {
3256        let min = _mm_min_epu32(a, b).as_u32x4();
3257        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3258    }
3259}
3260
3261/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262///
3263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264#[inline]
3265#[target_feature(enable = "avx512f")]
3266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3267#[cfg_attr(test, assert_instr(vpminuq))]
3268pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269    unsafe {
3270        let a = a.as_u64x8();
3271        let b = b.as_u64x8();
3272        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
3273    }
3274}
3275
3276/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277///
3278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279#[inline]
3280#[target_feature(enable = "avx512f")]
3281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3282#[cfg_attr(test, assert_instr(vpminuq))]
3283pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284    unsafe {
3285        let min = _mm512_min_epu64(a, b).as_u64x8();
3286        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3287    }
3288}
3289
3290/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291///
3292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293#[inline]
3294#[target_feature(enable = "avx512f")]
3295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3296#[cfg_attr(test, assert_instr(vpminuq))]
3297pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298    unsafe {
3299        let min = _mm512_min_epu64(a, b).as_u64x8();
3300        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3301    }
3302}
3303
3304/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305///
3306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307#[inline]
3308#[target_feature(enable = "avx512f,avx512vl")]
3309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3310#[cfg_attr(test, assert_instr(vpminuq))]
3311pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312    unsafe {
3313        let a = a.as_u64x4();
3314        let b = b.as_u64x4();
3315        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
3316    }
3317}
3318
3319/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320///
3321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322#[inline]
3323#[target_feature(enable = "avx512f,avx512vl")]
3324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3325#[cfg_attr(test, assert_instr(vpminuq))]
3326pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327    unsafe {
3328        let min = _mm256_min_epu64(a, b).as_u64x4();
3329        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3330    }
3331}
3332
3333/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336#[inline]
3337#[target_feature(enable = "avx512f,avx512vl")]
3338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3339#[cfg_attr(test, assert_instr(vpminuq))]
3340pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341    unsafe {
3342        let min = _mm256_min_epu64(a, b).as_u64x4();
3343        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3344    }
3345}
3346
3347/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348///
3349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350#[inline]
3351#[target_feature(enable = "avx512f,avx512vl")]
3352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3353#[cfg_attr(test, assert_instr(vpminuq))]
3354pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355    unsafe {
3356        let a = a.as_u64x2();
3357        let b = b.as_u64x2();
3358        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
3359    }
3360}
3361
3362/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3368#[cfg_attr(test, assert_instr(vpminuq))]
3369pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370    unsafe {
3371        let min = _mm_min_epu64(a, b).as_u64x2();
3372        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3373    }
3374}
3375
3376/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379#[inline]
3380#[target_feature(enable = "avx512f,avx512vl")]
3381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3382#[cfg_attr(test, assert_instr(vpminuq))]
3383pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384    unsafe {
3385        let min = _mm_min_epu64(a, b).as_u64x2();
3386        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3387    }
3388}
3389
3390/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393#[inline]
3394#[target_feature(enable = "avx512f")]
3395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3396#[cfg_attr(test, assert_instr(vsqrtps))]
3397pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398    unsafe { simd_fsqrt(a) }
3399}
3400
3401/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404#[inline]
3405#[target_feature(enable = "avx512f")]
3406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3407#[cfg_attr(test, assert_instr(vsqrtps))]
3408pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3410}
3411
3412/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415#[inline]
3416#[target_feature(enable = "avx512f")]
3417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3418#[cfg_attr(test, assert_instr(vsqrtps))]
3419pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3421}
3422
3423/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426#[inline]
3427#[target_feature(enable = "avx512f,avx512vl")]
3428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3429#[cfg_attr(test, assert_instr(vsqrtps))]
3430pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3432}
3433
3434/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437#[inline]
3438#[target_feature(enable = "avx512f,avx512vl")]
3439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3440#[cfg_attr(test, assert_instr(vsqrtps))]
3441pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3443}
3444
3445/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448#[inline]
3449#[target_feature(enable = "avx512f,avx512vl")]
3450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3451#[cfg_attr(test, assert_instr(vsqrtps))]
3452pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3454}
3455
3456/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459#[inline]
3460#[target_feature(enable = "avx512f,avx512vl")]
3461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3462#[cfg_attr(test, assert_instr(vsqrtps))]
3463pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3465}
3466
3467/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470#[inline]
3471#[target_feature(enable = "avx512f")]
3472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3473#[cfg_attr(test, assert_instr(vsqrtpd))]
3474pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475    unsafe { simd_fsqrt(a) }
3476}
3477
3478/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481#[inline]
3482#[target_feature(enable = "avx512f")]
3483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3484#[cfg_attr(test, assert_instr(vsqrtpd))]
3485pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3487}
3488
3489/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492#[inline]
3493#[target_feature(enable = "avx512f")]
3494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3495#[cfg_attr(test, assert_instr(vsqrtpd))]
3496pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3498}
3499
3500/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503#[inline]
3504#[target_feature(enable = "avx512f,avx512vl")]
3505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3506#[cfg_attr(test, assert_instr(vsqrtpd))]
3507pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3509}
3510
3511/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514#[inline]
3515#[target_feature(enable = "avx512f,avx512vl")]
3516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3517#[cfg_attr(test, assert_instr(vsqrtpd))]
3518pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3520}
3521
3522/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525#[inline]
3526#[target_feature(enable = "avx512f,avx512vl")]
3527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3528#[cfg_attr(test, assert_instr(vsqrtpd))]
3529pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3531}
3532
3533/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536#[inline]
3537#[target_feature(enable = "avx512f,avx512vl")]
3538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3539#[cfg_attr(test, assert_instr(vsqrtpd))]
3540pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3542}
3543
3544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547#[inline]
3548#[target_feature(enable = "avx512f")]
3549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3550#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552    unsafe { simd_fma(a, b, c) }
3553}
3554
3555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558#[inline]
3559#[target_feature(enable = "avx512f")]
3560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3561#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3564}
3565
3566/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569#[inline]
3570#[target_feature(enable = "avx512f")]
3571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3572#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3575}
3576
3577/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580#[inline]
3581#[target_feature(enable = "avx512f")]
3582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3583#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3586}
3587
3588/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591#[inline]
3592#[target_feature(enable = "avx512f,avx512vl")]
3593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3594#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3597}
3598
3599/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602#[inline]
3603#[target_feature(enable = "avx512f,avx512vl")]
3604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3605#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3608}
3609
3610/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3619}
3620
3621/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624#[inline]
3625#[target_feature(enable = "avx512f,avx512vl")]
3626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3627#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3630}
3631
3632/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635#[inline]
3636#[target_feature(enable = "avx512f,avx512vl")]
3637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3638#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3641}
3642
3643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646#[inline]
3647#[target_feature(enable = "avx512f,avx512vl")]
3648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3649#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3652}
3653
3654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657#[inline]
3658#[target_feature(enable = "avx512f")]
3659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3660#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662    unsafe { simd_fma(a, b, c) }
3663}
3664
3665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668#[inline]
3669#[target_feature(enable = "avx512f")]
3670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3671#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3674}
3675
3676/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679#[inline]
3680#[target_feature(enable = "avx512f")]
3681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3682#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3685}
3686
3687/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3696}
3697
3698/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701#[inline]
3702#[target_feature(enable = "avx512f,avx512vl")]
3703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3704#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3707}
3708
3709/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712#[inline]
3713#[target_feature(enable = "avx512f,avx512vl")]
3714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3715#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3718}
3719
3720/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723#[inline]
3724#[target_feature(enable = "avx512f,avx512vl")]
3725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3726#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3729}
3730
3731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734#[inline]
3735#[target_feature(enable = "avx512f,avx512vl")]
3736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3737#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3740}
3741
3742/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745#[inline]
3746#[target_feature(enable = "avx512f,avx512vl")]
3747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3748#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3751}
3752
3753/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3762}
3763
3764/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3770#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772    unsafe { simd_fma(a, b, simd_neg(c)) }
3773}
3774
3775/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778#[inline]
3779#[target_feature(enable = "avx512f")]
3780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3781#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3784}
3785
3786/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3795}
3796
3797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800#[inline]
3801#[target_feature(enable = "avx512f")]
3802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3803#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3806}
3807
3808/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811#[inline]
3812#[target_feature(enable = "avx512f,avx512vl")]
3813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3814#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
3817}
3818
3819/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822#[inline]
3823#[target_feature(enable = "avx512f,avx512vl")]
3824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3825#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
3828}
3829
3830/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833#[inline]
3834#[target_feature(enable = "avx512f,avx512vl")]
3835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3836#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
3839}
3840
3841/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844#[inline]
3845#[target_feature(enable = "avx512f,avx512vl")]
3846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3847#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
3850}
3851
3852/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855#[inline]
3856#[target_feature(enable = "avx512f,avx512vl")]
3857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3858#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
3861}
3862
3863/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866#[inline]
3867#[target_feature(enable = "avx512f,avx512vl")]
3868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3869#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
3872}
3873
3874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877#[inline]
3878#[target_feature(enable = "avx512f")]
3879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3880#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882    unsafe { simd_fma(a, b, simd_neg(c)) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888#[inline]
3889#[target_feature(enable = "avx512f")]
3890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
3894}
3895
3896/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899#[inline]
3900#[target_feature(enable = "avx512f")]
3901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3902#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
3905}
3906
3907/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910#[inline]
3911#[target_feature(enable = "avx512f")]
3912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3913#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
3916}
3917
3918/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921#[inline]
3922#[target_feature(enable = "avx512f,avx512vl")]
3923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3924#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
3927}
3928
3929/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932#[inline]
3933#[target_feature(enable = "avx512f,avx512vl")]
3934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3935#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
3938}
3939
3940/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943#[inline]
3944#[target_feature(enable = "avx512f,avx512vl")]
3945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3946#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
3949}
3950
3951/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954#[inline]
3955#[target_feature(enable = "avx512f,avx512vl")]
3956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3957#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
3960}
3961
3962/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965#[inline]
3966#[target_feature(enable = "avx512f,avx512vl")]
3967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3968#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
3971}
3972
3973/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976#[inline]
3977#[target_feature(enable = "avx512f,avx512vl")]
3978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3979#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
3982}
3983
3984/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987#[inline]
3988#[target_feature(enable = "avx512f")]
3989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3990#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992    unsafe {
3993        let add = simd_fma(a, b, c);
3994        let sub = simd_fma(a, b, simd_neg(c));
3995        simd_shuffle!(
3996            add,
3997            sub,
3998            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3999        )
4000    }
4001}
4002
4003/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004///
4005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006#[inline]
4007#[target_feature(enable = "avx512f")]
4008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4009#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4012}
4013
4014/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015///
4016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017#[inline]
4018#[target_feature(enable = "avx512f")]
4019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4020#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4023}
4024
4025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028#[inline]
4029#[target_feature(enable = "avx512f")]
4030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4031#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4034}
4035
4036/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037///
4038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039#[inline]
4040#[target_feature(enable = "avx512f,avx512vl")]
4041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4042#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4045}
4046
4047/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050#[inline]
4051#[target_feature(enable = "avx512f,avx512vl")]
4052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4053#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4056}
4057
4058/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059///
4060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061#[inline]
4062#[target_feature(enable = "avx512f,avx512vl")]
4063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4064#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4067}
4068
4069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070///
4071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072#[inline]
4073#[target_feature(enable = "avx512f,avx512vl")]
4074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4075#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4078}
4079
4080/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081///
4082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083#[inline]
4084#[target_feature(enable = "avx512f,avx512vl")]
4085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4086#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4089}
4090
4091/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092///
4093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094#[inline]
4095#[target_feature(enable = "avx512f,avx512vl")]
4096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4097#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103///
4104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105#[inline]
4106#[target_feature(enable = "avx512f")]
4107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4108#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110    unsafe {
4111        let add = simd_fma(a, b, c);
4112        let sub = simd_fma(a, b, simd_neg(c));
4113        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4114    }
4115}
4116
4117/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118///
4119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120#[inline]
4121#[target_feature(enable = "avx512f")]
4122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4123#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129///
4130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131#[inline]
4132#[target_feature(enable = "avx512f")]
4133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4134#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4137}
4138
4139/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140///
4141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142#[inline]
4143#[target_feature(enable = "avx512f")]
4144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4145#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4148}
4149
4150/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151///
4152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153#[inline]
4154#[target_feature(enable = "avx512f,avx512vl")]
4155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4156#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4167#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4170}
4171
4172/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173///
4174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175#[inline]
4176#[target_feature(enable = "avx512f,avx512vl")]
4177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4178#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4181}
4182
4183/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184///
4185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186#[inline]
4187#[target_feature(enable = "avx512f,avx512vl")]
4188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4189#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4192}
4193
4194/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195///
4196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197#[inline]
4198#[target_feature(enable = "avx512f,avx512vl")]
4199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4200#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208#[inline]
4209#[target_feature(enable = "avx512f,avx512vl")]
4210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4214}
4215
4216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217///
4218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219#[inline]
4220#[target_feature(enable = "avx512f")]
4221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4222#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224    unsafe {
4225        let add = simd_fma(a, b, c);
4226        let sub = simd_fma(a, b, simd_neg(c));
4227        simd_shuffle!(
4228            add,
4229            sub,
4230            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4231        )
4232    }
4233}
4234
4235/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236///
4237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238#[inline]
4239#[target_feature(enable = "avx512f")]
4240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4241#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249#[inline]
4250#[target_feature(enable = "avx512f")]
4251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4252#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4255}
4256
4257/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258///
4259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260#[inline]
4261#[target_feature(enable = "avx512f")]
4262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4263#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4266}
4267
4268/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269///
4270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271#[inline]
4272#[target_feature(enable = "avx512f,avx512vl")]
4273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4274#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4277}
4278
4279/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280///
4281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282#[inline]
4283#[target_feature(enable = "avx512f,avx512vl")]
4284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4285#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4288}
4289
4290/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291///
4292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293#[inline]
4294#[target_feature(enable = "avx512f,avx512vl")]
4295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4296#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4307#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4310}
4311
4312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313///
4314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315#[inline]
4316#[target_feature(enable = "avx512f,avx512vl")]
4317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4318#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4321}
4322
4323/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326#[inline]
4327#[target_feature(enable = "avx512f,avx512vl")]
4328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4329#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4332}
4333
4334/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335///
4336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337#[inline]
4338#[target_feature(enable = "avx512f")]
4339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4340#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342    unsafe {
4343        let add = simd_fma(a, b, c);
4344        let sub = simd_fma(a, b, simd_neg(c));
4345        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4346    }
4347}
4348
4349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350///
4351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352#[inline]
4353#[target_feature(enable = "avx512f")]
4354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4355#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4358}
4359
4360/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361///
4362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363#[inline]
4364#[target_feature(enable = "avx512f")]
4365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4366#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4369}
4370
4371/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374#[inline]
4375#[target_feature(enable = "avx512f")]
4376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4377#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4388#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4391}
4392
4393/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394///
4395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396#[inline]
4397#[target_feature(enable = "avx512f,avx512vl")]
4398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4399#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4402}
4403
4404/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405///
4406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407#[inline]
4408#[target_feature(enable = "avx512f,avx512vl")]
4409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4410#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4413}
4414
4415/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416///
4417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418#[inline]
4419#[target_feature(enable = "avx512f,avx512vl")]
4420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4421#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4424}
4425
4426/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427///
4428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429#[inline]
4430#[target_feature(enable = "avx512f,avx512vl")]
4431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4432#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4435}
4436
4437/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440#[inline]
4441#[target_feature(enable = "avx512f,avx512vl")]
4442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4446}
4447
4448/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449///
4450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451#[inline]
4452#[target_feature(enable = "avx512f")]
4453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4454#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456    unsafe { simd_fma(simd_neg(a), b, c) }
4457}
4458
4459/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462#[inline]
4463#[target_feature(enable = "avx512f")]
4464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4465#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473#[inline]
4474#[target_feature(enable = "avx512f")]
4475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4476#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4487#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4490}
4491
4492/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495#[inline]
4496#[target_feature(enable = "avx512f,avx512vl")]
4497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4498#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4501}
4502
4503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504///
4505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506#[inline]
4507#[target_feature(enable = "avx512f,avx512vl")]
4508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4509#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4512}
4513
4514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515///
4516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517#[inline]
4518#[target_feature(enable = "avx512f,avx512vl")]
4519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4520#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4523}
4524
4525/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526///
4527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528#[inline]
4529#[target_feature(enable = "avx512f,avx512vl")]
4530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4531#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4534}
4535
4536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537///
4538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539#[inline]
4540#[target_feature(enable = "avx512f,avx512vl")]
4541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4542#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4545}
4546
4547/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548///
4549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550#[inline]
4551#[target_feature(enable = "avx512f,avx512vl")]
4552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4553#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4564#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566    unsafe { simd_fma(simd_neg(a), b, c) }
4567}
4568
4569/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570///
4571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572#[inline]
4573#[target_feature(enable = "avx512f")]
4574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4575#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4578}
4579
4580/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4586#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4589}
4590
4591/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592///
4593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594#[inline]
4595#[target_feature(enable = "avx512f")]
4596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4597#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4600}
4601
4602/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603///
4604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605#[inline]
4606#[target_feature(enable = "avx512f,avx512vl")]
4607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4608#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4611}
4612
4613/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614///
4615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616#[inline]
4617#[target_feature(enable = "avx512f,avx512vl")]
4618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4619#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4622}
4623
4624/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627#[inline]
4628#[target_feature(enable = "avx512f,avx512vl")]
4629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4630#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4633}
4634
4635/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638#[inline]
4639#[target_feature(enable = "avx512f,avx512vl")]
4640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4641#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4644}
4645
4646/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647///
4648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649#[inline]
4650#[target_feature(enable = "avx512f,avx512vl")]
4651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4652#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4663#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4666}
4667
4668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669///
4670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671#[inline]
4672#[target_feature(enable = "avx512f")]
4673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4674#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4677}
4678
4679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682#[inline]
4683#[target_feature(enable = "avx512f")]
4684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4685#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4688}
4689
4690/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691///
4692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693#[inline]
4694#[target_feature(enable = "avx512f")]
4695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4696#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4707#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4710}
4711
4712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713///
4714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715#[inline]
4716#[target_feature(enable = "avx512f,avx512vl")]
4717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4718#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4721}
4722
4723/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724///
4725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726#[inline]
4727#[target_feature(enable = "avx512f,avx512vl")]
4728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4729#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4732}
4733
4734/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735///
4736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737#[inline]
4738#[target_feature(enable = "avx512f,avx512vl")]
4739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4740#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
4743}
4744
4745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746///
4747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748#[inline]
4749#[target_feature(enable = "avx512f,avx512vl")]
4750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4751#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
4754}
4755
4756/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759#[inline]
4760#[target_feature(enable = "avx512f,avx512vl")]
4761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4762#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
4765}
4766
4767/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768///
4769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770#[inline]
4771#[target_feature(enable = "avx512f,avx512vl")]
4772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4773#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
4776}
4777
4778/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781#[inline]
4782#[target_feature(enable = "avx512f")]
4783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4784#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4787}
4788
4789/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790///
4791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792#[inline]
4793#[target_feature(enable = "avx512f")]
4794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4795#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
4798}
4799
4800/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801///
4802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803#[inline]
4804#[target_feature(enable = "avx512f")]
4805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4806#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
4809}
4810
4811/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812///
4813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814#[inline]
4815#[target_feature(enable = "avx512f")]
4816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4817#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
4820}
4821
4822/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823///
4824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825#[inline]
4826#[target_feature(enable = "avx512f,avx512vl")]
4827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4828#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836#[inline]
4837#[target_feature(enable = "avx512f,avx512vl")]
4838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4839#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
4842}
4843
4844/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845///
4846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847#[inline]
4848#[target_feature(enable = "avx512f,avx512vl")]
4849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4850#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
4853}
4854
4855/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856///
4857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858#[inline]
4859#[target_feature(enable = "avx512f,avx512vl")]
4860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4861#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
4864}
4865
4866/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869#[inline]
4870#[target_feature(enable = "avx512f,avx512vl")]
4871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4872#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
4875}
4876
4877/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880#[inline]
4881#[target_feature(enable = "avx512f,avx512vl")]
4882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4883#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
4886}
4887
4888/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891#[inline]
4892#[target_feature(enable = "avx512f")]
4893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4894#[cfg_attr(test, assert_instr(vrcp14ps))]
4895pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
4897}
4898
4899/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900///
4901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902#[inline]
4903#[target_feature(enable = "avx512f")]
4904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4905#[cfg_attr(test, assert_instr(vrcp14ps))]
4906pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
4908}
4909
4910/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911///
4912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913#[inline]
4914#[target_feature(enable = "avx512f")]
4915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4916#[cfg_attr(test, assert_instr(vrcp14ps))]
4917pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
4919}
4920
4921/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922///
4923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924#[inline]
4925#[target_feature(enable = "avx512f,avx512vl")]
4926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4927#[cfg_attr(test, assert_instr(vrcp14ps))]
4928pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
4930}
4931
4932/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4938#[cfg_attr(test, assert_instr(vrcp14ps))]
4939pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
4941}
4942
4943/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4949#[cfg_attr(test, assert_instr(vrcp14ps))]
4950pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
4952}
4953
4954/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955///
4956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957#[inline]
4958#[target_feature(enable = "avx512f,avx512vl")]
4959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4960#[cfg_attr(test, assert_instr(vrcp14ps))]
4961pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
4963}
4964
4965/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968#[inline]
4969#[target_feature(enable = "avx512f,avx512vl")]
4970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4971#[cfg_attr(test, assert_instr(vrcp14ps))]
4972pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
4974}
4975
4976/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979#[inline]
4980#[target_feature(enable = "avx512f,avx512vl")]
4981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4982#[cfg_attr(test, assert_instr(vrcp14ps))]
4983pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
4985}
4986
4987/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988///
4989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990#[inline]
4991#[target_feature(enable = "avx512f")]
4992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4993#[cfg_attr(test, assert_instr(vrcp14pd))]
4994pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
4996}
4997
4998/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999///
5000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001#[inline]
5002#[target_feature(enable = "avx512f")]
5003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5004#[cfg_attr(test, assert_instr(vrcp14pd))]
5005pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5007}
5008
5009/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010///
5011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012#[inline]
5013#[target_feature(enable = "avx512f")]
5014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5015#[cfg_attr(test, assert_instr(vrcp14pd))]
5016pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5018}
5019
5020/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023#[inline]
5024#[target_feature(enable = "avx512f,avx512vl")]
5025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5026#[cfg_attr(test, assert_instr(vrcp14pd))]
5027pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5029}
5030
5031/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032///
5033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034#[inline]
5035#[target_feature(enable = "avx512f,avx512vl")]
5036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5037#[cfg_attr(test, assert_instr(vrcp14pd))]
5038pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5040}
5041
5042/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045#[inline]
5046#[target_feature(enable = "avx512f,avx512vl")]
5047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5048#[cfg_attr(test, assert_instr(vrcp14pd))]
5049pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5051}
5052
5053/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054///
5055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056#[inline]
5057#[target_feature(enable = "avx512f,avx512vl")]
5058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5059#[cfg_attr(test, assert_instr(vrcp14pd))]
5060pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5062}
5063
5064/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065///
5066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067#[inline]
5068#[target_feature(enable = "avx512f,avx512vl")]
5069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5070#[cfg_attr(test, assert_instr(vrcp14pd))]
5071pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5073}
5074
5075/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076///
5077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078#[inline]
5079#[target_feature(enable = "avx512f,avx512vl")]
5080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5081#[cfg_attr(test, assert_instr(vrcp14pd))]
5082pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5084}
5085
5086/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089#[inline]
5090#[target_feature(enable = "avx512f")]
5091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5092#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5095}
5096
5097/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5106}
5107
5108/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109///
5110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111#[inline]
5112#[target_feature(enable = "avx512f")]
5113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5114#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5117}
5118
5119/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120///
5121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122#[inline]
5123#[target_feature(enable = "avx512f,avx512vl")]
5124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5125#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5128}
5129
5130/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133#[inline]
5134#[target_feature(enable = "avx512f,avx512vl")]
5135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5136#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5139}
5140
5141/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142///
5143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144#[inline]
5145#[target_feature(enable = "avx512f,avx512vl")]
5146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5147#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5150}
5151
5152/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155#[inline]
5156#[target_feature(enable = "avx512f,avx512vl")]
5157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5158#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5161}
5162
5163/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166#[inline]
5167#[target_feature(enable = "avx512f,avx512vl")]
5168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5169#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5172}
5173
5174/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5180#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5183}
5184
5185/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188#[inline]
5189#[target_feature(enable = "avx512f")]
5190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5191#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5194}
5195
5196/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199#[inline]
5200#[target_feature(enable = "avx512f")]
5201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5202#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5205}
5206
5207/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5216}
5217
5218/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221#[inline]
5222#[target_feature(enable = "avx512f,avx512vl")]
5223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5224#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5227}
5228
5229/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232#[inline]
5233#[target_feature(enable = "avx512f,avx512vl")]
5234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5235#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5238}
5239
5240/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243#[inline]
5244#[target_feature(enable = "avx512f,avx512vl")]
5245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5246#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5249}
5250
5251/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254#[inline]
5255#[target_feature(enable = "avx512f,avx512vl")]
5256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5257#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5260}
5261
5262/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265#[inline]
5266#[target_feature(enable = "avx512f,avx512vl")]
5267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5268#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5271}
5272
5273/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274///
5275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276#[inline]
5277#[target_feature(enable = "avx512f,avx512vl")]
5278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5279#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5282}
5283
5284/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285///
5286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287#[inline]
5288#[target_feature(enable = "avx512f")]
5289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5290#[cfg_attr(test, assert_instr(vgetexpps))]
5291pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292    unsafe {
5293        transmute(vgetexpps(
5294            a.as_f32x16(),
5295            f32x16::ZERO,
5296            0b11111111_11111111,
5297            _MM_FROUND_CUR_DIRECTION,
5298        ))
5299    }
5300}
5301
5302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305#[inline]
5306#[target_feature(enable = "avx512f")]
5307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5308#[cfg_attr(test, assert_instr(vgetexpps))]
5309pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310    unsafe {
5311        transmute(vgetexpps(
5312            a.as_f32x16(),
5313            src.as_f32x16(),
5314            k,
5315            _MM_FROUND_CUR_DIRECTION,
5316        ))
5317    }
5318}
5319
5320/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323#[inline]
5324#[target_feature(enable = "avx512f")]
5325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5326#[cfg_attr(test, assert_instr(vgetexpps))]
5327pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328    unsafe {
5329        transmute(vgetexpps(
5330            a.as_f32x16(),
5331            f32x16::ZERO,
5332            k,
5333            _MM_FROUND_CUR_DIRECTION,
5334        ))
5335    }
5336}
5337
5338/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339///
5340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341#[inline]
5342#[target_feature(enable = "avx512f,avx512vl")]
5343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5344#[cfg_attr(test, assert_instr(vgetexpps))]
5345pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5347}
5348
5349/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352#[inline]
5353#[target_feature(enable = "avx512f,avx512vl")]
5354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5355#[cfg_attr(test, assert_instr(vgetexpps))]
5356pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5358}
5359
5360/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363#[inline]
5364#[target_feature(enable = "avx512f,avx512vl")]
5365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5366#[cfg_attr(test, assert_instr(vgetexpps))]
5367pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5369}
5370
5371/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374#[inline]
5375#[target_feature(enable = "avx512f,avx512vl")]
5376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5377#[cfg_attr(test, assert_instr(vgetexpps))]
5378pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5380}
5381
5382/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385#[inline]
5386#[target_feature(enable = "avx512f,avx512vl")]
5387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5388#[cfg_attr(test, assert_instr(vgetexpps))]
5389pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5391}
5392
5393/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5399#[cfg_attr(test, assert_instr(vgetexpps))]
5400pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5402}
5403
5404/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407#[inline]
5408#[target_feature(enable = "avx512f")]
5409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5410#[cfg_attr(test, assert_instr(vgetexppd))]
5411pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412    unsafe {
5413        transmute(vgetexppd(
5414            a.as_f64x8(),
5415            f64x8::ZERO,
5416            0b11111111,
5417            _MM_FROUND_CUR_DIRECTION,
5418        ))
5419    }
5420}
5421
5422/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425#[inline]
5426#[target_feature(enable = "avx512f")]
5427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5428#[cfg_attr(test, assert_instr(vgetexppd))]
5429pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430    unsafe {
5431        transmute(vgetexppd(
5432            a.as_f64x8(),
5433            src.as_f64x8(),
5434            k,
5435            _MM_FROUND_CUR_DIRECTION,
5436        ))
5437    }
5438}
5439
5440/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443#[inline]
5444#[target_feature(enable = "avx512f")]
5445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5446#[cfg_attr(test, assert_instr(vgetexppd))]
5447pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448    unsafe {
5449        transmute(vgetexppd(
5450            a.as_f64x8(),
5451            f64x8::ZERO,
5452            k,
5453            _MM_FROUND_CUR_DIRECTION,
5454        ))
5455    }
5456}
5457
5458/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459///
5460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461#[inline]
5462#[target_feature(enable = "avx512f,avx512vl")]
5463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5464#[cfg_attr(test, assert_instr(vgetexppd))]
5465pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5467}
5468
5469/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470///
5471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472#[inline]
5473#[target_feature(enable = "avx512f,avx512vl")]
5474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5475#[cfg_attr(test, assert_instr(vgetexppd))]
5476pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5478}
5479
5480/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481///
5482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483#[inline]
5484#[target_feature(enable = "avx512f,avx512vl")]
5485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5486#[cfg_attr(test, assert_instr(vgetexppd))]
5487pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5489}
5490
5491/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492///
5493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494#[inline]
5495#[target_feature(enable = "avx512f,avx512vl")]
5496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5497#[cfg_attr(test, assert_instr(vgetexppd))]
5498pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5500}
5501
5502/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503///
5504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505#[inline]
5506#[target_feature(enable = "avx512f,avx512vl")]
5507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5508#[cfg_attr(test, assert_instr(vgetexppd))]
5509pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5511}
5512
5513/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516#[inline]
5517#[target_feature(enable = "avx512f,avx512vl")]
5518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5519#[cfg_attr(test, assert_instr(vgetexppd))]
5520pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5522}
5523
5524/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531///
5532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533#[inline]
5534#[target_feature(enable = "avx512f")]
5535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5536#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5537#[rustc_legacy_const_generics(1)]
5538pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539    unsafe {
5540        static_assert_uimm_bits!(IMM8, 8);
5541        let a = a.as_f32x16();
5542        let r = vrndscaleps(
5543            a,
5544            IMM8,
5545            f32x16::ZERO,
5546            0b11111111_11111111,
5547            _MM_FROUND_CUR_DIRECTION,
5548        );
5549        transmute(r)
5550    }
5551}
5552
5553/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562#[inline]
5563#[target_feature(enable = "avx512f")]
5564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5565#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5566#[rustc_legacy_const_generics(3)]
5567pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568    unsafe {
5569        static_assert_uimm_bits!(IMM8, 8);
5570        let a = a.as_f32x16();
5571        let src = src.as_f32x16();
5572        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5573        transmute(r)
5574    }
5575}
5576
5577/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5589#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5590#[rustc_legacy_const_generics(2)]
5591pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592    unsafe {
5593        static_assert_uimm_bits!(IMM8, 8);
5594        let a = a.as_f32x16();
5595        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5596        transmute(r)
5597    }
5598}
5599
5600/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609#[inline]
5610#[target_feature(enable = "avx512f,avx512vl")]
5611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5612#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5613#[rustc_legacy_const_generics(1)]
5614pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615    unsafe {
5616        static_assert_uimm_bits!(IMM8, 8);
5617        let a = a.as_f32x8();
5618        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5619        transmute(r)
5620    }
5621}
5622
5623/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630///
5631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632#[inline]
5633#[target_feature(enable = "avx512f,avx512vl")]
5634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5635#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5636#[rustc_legacy_const_generics(3)]
5637pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe {
5639        static_assert_uimm_bits!(IMM8, 8);
5640        let a = a.as_f32x8();
5641        let src = src.as_f32x8();
5642        let r = vrndscaleps256(a, IMM8, src, k);
5643        transmute(r)
5644    }
5645}
5646
5647/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654///
5655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656#[inline]
5657#[target_feature(enable = "avx512f,avx512vl")]
5658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5659#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5660#[rustc_legacy_const_generics(2)]
5661pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662    unsafe {
5663        static_assert_uimm_bits!(IMM8, 8);
5664        let a = a.as_f32x8();
5665        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5666        transmute(r)
5667    }
5668}
5669
5670/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677///
5678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679#[inline]
5680#[target_feature(enable = "avx512f,avx512vl")]
5681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5682#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5683#[rustc_legacy_const_generics(1)]
5684pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685    unsafe {
5686        static_assert_uimm_bits!(IMM8, 8);
5687        let a = a.as_f32x4();
5688        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5689        transmute(r)
5690    }
5691}
5692
5693/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702#[inline]
5703#[target_feature(enable = "avx512f,avx512vl")]
5704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5705#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5706#[rustc_legacy_const_generics(3)]
5707pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708    unsafe {
5709        static_assert_uimm_bits!(IMM8, 8);
5710        let a = a.as_f32x4();
5711        let src = src.as_f32x4();
5712        let r = vrndscaleps128(a, IMM8, src, k);
5713        transmute(r)
5714    }
5715}
5716
5717/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726#[inline]
5727#[target_feature(enable = "avx512f,avx512vl")]
5728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5729#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5730#[rustc_legacy_const_generics(2)]
5731pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732    unsafe {
5733        static_assert_uimm_bits!(IMM8, 8);
5734        let a = a.as_f32x4();
5735        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
5736        transmute(r)
5737    }
5738}
5739
5740/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747///
5748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749#[inline]
5750#[target_feature(enable = "avx512f")]
5751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5752#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5753#[rustc_legacy_const_generics(1)]
5754pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755    unsafe {
5756        static_assert_uimm_bits!(IMM8, 8);
5757        let a = a.as_f64x8();
5758        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
5759        transmute(r)
5760    }
5761}
5762
5763/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770///
5771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772#[inline]
5773#[target_feature(enable = "avx512f")]
5774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5775#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5776#[rustc_legacy_const_generics(3)]
5777pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778    src: __m512d,
5779    k: __mmask8,
5780    a: __m512d,
5781) -> __m512d {
5782    unsafe {
5783        static_assert_uimm_bits!(IMM8, 8);
5784        let a = a.as_f64x8();
5785        let src = src.as_f64x8();
5786        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5787        transmute(r)
5788    }
5789}
5790
5791/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800#[inline]
5801#[target_feature(enable = "avx512f")]
5802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5803#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5804#[rustc_legacy_const_generics(2)]
5805pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806    unsafe {
5807        static_assert_uimm_bits!(IMM8, 8);
5808        let a = a.as_f64x8();
5809        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5810        transmute(r)
5811    }
5812}
5813
5814/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821///
5822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823#[inline]
5824#[target_feature(enable = "avx512f,avx512vl")]
5825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5826#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5827#[rustc_legacy_const_generics(1)]
5828pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829    unsafe {
5830        static_assert_uimm_bits!(IMM8, 8);
5831        let a = a.as_f64x4();
5832        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
5833        transmute(r)
5834    }
5835}
5836
5837/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846#[inline]
5847#[target_feature(enable = "avx512f,avx512vl")]
5848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5849#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5850#[rustc_legacy_const_generics(3)]
5851pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852    src: __m256d,
5853    k: __mmask8,
5854    a: __m256d,
5855) -> __m256d {
5856    unsafe {
5857        static_assert_uimm_bits!(IMM8, 8);
5858        let a = a.as_f64x4();
5859        let src = src.as_f64x4();
5860        let r = vrndscalepd256(a, IMM8, src, k);
5861        transmute(r)
5862    }
5863}
5864
5865/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874#[inline]
5875#[target_feature(enable = "avx512f,avx512vl")]
5876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5877#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5878#[rustc_legacy_const_generics(2)]
5879pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880    unsafe {
5881        static_assert_uimm_bits!(IMM8, 8);
5882        let a = a.as_f64x4();
5883        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
5884        transmute(r)
5885    }
5886}
5887
5888/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897#[inline]
5898#[target_feature(enable = "avx512f,avx512vl")]
5899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5900#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5901#[rustc_legacy_const_generics(1)]
5902pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903    unsafe {
5904        static_assert_uimm_bits!(IMM8, 8);
5905        let a = a.as_f64x2();
5906        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
5907        transmute(r)
5908    }
5909}
5910
5911/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918///
5919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920#[inline]
5921#[target_feature(enable = "avx512f,avx512vl")]
5922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5923#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5924#[rustc_legacy_const_generics(3)]
5925pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926    unsafe {
5927        static_assert_uimm_bits!(IMM8, 8);
5928        let a = a.as_f64x2();
5929        let src = src.as_f64x2();
5930        let r = vrndscalepd128(a, IMM8, src, k);
5931        transmute(r)
5932    }
5933}
5934
5935/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(2)]
5949pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950    unsafe {
5951        static_assert_uimm_bits!(IMM8, 8);
5952        let a = a.as_f64x2();
5953        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
5954        transmute(r)
5955    }
5956}
5957
5958/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959///
5960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961#[inline]
5962#[target_feature(enable = "avx512f")]
5963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5964#[cfg_attr(test, assert_instr(vscalefps))]
5965pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966    unsafe {
5967        transmute(vscalefps(
5968            a.as_f32x16(),
5969            b.as_f32x16(),
5970            f32x16::ZERO,
5971            0b11111111_11111111,
5972            _MM_FROUND_CUR_DIRECTION,
5973        ))
5974    }
5975}
5976
5977/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980#[inline]
5981#[target_feature(enable = "avx512f")]
5982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5983#[cfg_attr(test, assert_instr(vscalefps))]
5984pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985    unsafe {
5986        transmute(vscalefps(
5987            a.as_f32x16(),
5988            b.as_f32x16(),
5989            src.as_f32x16(),
5990            k,
5991            _MM_FROUND_CUR_DIRECTION,
5992        ))
5993    }
5994}
5995
5996/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999#[inline]
6000#[target_feature(enable = "avx512f")]
6001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6002#[cfg_attr(test, assert_instr(vscalefps))]
6003pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004    unsafe {
6005        transmute(vscalefps(
6006            a.as_f32x16(),
6007            b.as_f32x16(),
6008            f32x16::ZERO,
6009            k,
6010            _MM_FROUND_CUR_DIRECTION,
6011        ))
6012    }
6013}
6014
6015/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016///
6017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018#[inline]
6019#[target_feature(enable = "avx512f,avx512vl")]
6020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6021#[cfg_attr(test, assert_instr(vscalefps))]
6022pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023    unsafe {
6024        transmute(vscalefps256(
6025            a.as_f32x8(),
6026            b.as_f32x8(),
6027            f32x8::ZERO,
6028            0b11111111,
6029        ))
6030    }
6031}
6032
6033/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036#[inline]
6037#[target_feature(enable = "avx512f,avx512vl")]
6038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6039#[cfg_attr(test, assert_instr(vscalefps))]
6040pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6042}
6043
6044/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047#[inline]
6048#[target_feature(enable = "avx512f,avx512vl")]
6049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6050#[cfg_attr(test, assert_instr(vscalefps))]
6051pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6053}
6054
6055/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058#[inline]
6059#[target_feature(enable = "avx512f,avx512vl")]
6060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6061#[cfg_attr(test, assert_instr(vscalefps))]
6062pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063    unsafe {
6064        transmute(vscalefps128(
6065            a.as_f32x4(),
6066            b.as_f32x4(),
6067            f32x4::ZERO,
6068            0b00001111,
6069        ))
6070    }
6071}
6072
6073/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076#[inline]
6077#[target_feature(enable = "avx512f,avx512vl")]
6078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6079#[cfg_attr(test, assert_instr(vscalefps))]
6080pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6082}
6083
6084/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087#[inline]
6088#[target_feature(enable = "avx512f,avx512vl")]
6089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6090#[cfg_attr(test, assert_instr(vscalefps))]
6091pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6093}
6094
6095/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096///
6097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098#[inline]
6099#[target_feature(enable = "avx512f")]
6100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6101#[cfg_attr(test, assert_instr(vscalefpd))]
6102pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103    unsafe {
6104        transmute(vscalefpd(
6105            a.as_f64x8(),
6106            b.as_f64x8(),
6107            f64x8::ZERO,
6108            0b11111111,
6109            _MM_FROUND_CUR_DIRECTION,
6110        ))
6111    }
6112}
6113
6114/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117#[inline]
6118#[target_feature(enable = "avx512f")]
6119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6120#[cfg_attr(test, assert_instr(vscalefpd))]
6121pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122    unsafe {
6123        transmute(vscalefpd(
6124            a.as_f64x8(),
6125            b.as_f64x8(),
6126            src.as_f64x8(),
6127            k,
6128            _MM_FROUND_CUR_DIRECTION,
6129        ))
6130    }
6131}
6132
6133/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136#[inline]
6137#[target_feature(enable = "avx512f")]
6138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6139#[cfg_attr(test, assert_instr(vscalefpd))]
6140pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141    unsafe {
6142        transmute(vscalefpd(
6143            a.as_f64x8(),
6144            b.as_f64x8(),
6145            f64x8::ZERO,
6146            k,
6147            _MM_FROUND_CUR_DIRECTION,
6148        ))
6149    }
6150}
6151
6152/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6158#[cfg_attr(test, assert_instr(vscalefpd))]
6159pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160    unsafe {
6161        transmute(vscalefpd256(
6162            a.as_f64x4(),
6163            b.as_f64x4(),
6164            f64x4::ZERO,
6165            0b00001111,
6166        ))
6167    }
6168}
6169
6170/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173#[inline]
6174#[target_feature(enable = "avx512f,avx512vl")]
6175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6176#[cfg_attr(test, assert_instr(vscalefpd))]
6177pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6179}
6180
6181/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184#[inline]
6185#[target_feature(enable = "avx512f,avx512vl")]
6186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6187#[cfg_attr(test, assert_instr(vscalefpd))]
6188pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6190}
6191
6192/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195#[inline]
6196#[target_feature(enable = "avx512f,avx512vl")]
6197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6198#[cfg_attr(test, assert_instr(vscalefpd))]
6199pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200    unsafe {
6201        transmute(vscalefpd128(
6202            a.as_f64x2(),
6203            b.as_f64x2(),
6204            f64x2::ZERO,
6205            0b00000011,
6206        ))
6207    }
6208}
6209
6210/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213#[inline]
6214#[target_feature(enable = "avx512f,avx512vl")]
6215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6216#[cfg_attr(test, assert_instr(vscalefpd))]
6217pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6219}
6220
6221/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224#[inline]
6225#[target_feature(enable = "avx512f,avx512vl")]
6226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6227#[cfg_attr(test, assert_instr(vscalefpd))]
6228pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6230}
6231
6232/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233///
6234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235#[inline]
6236#[target_feature(enable = "avx512f")]
6237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6238#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6239#[rustc_legacy_const_generics(3)]
6240pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241    unsafe {
6242        static_assert_uimm_bits!(IMM8, 8);
6243        let a = a.as_f32x16();
6244        let b = b.as_f32x16();
6245        let c = c.as_i32x16();
6246        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6247        transmute(r)
6248    }
6249}
6250
6251/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254#[inline]
6255#[target_feature(enable = "avx512f")]
6256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6257#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6258#[rustc_legacy_const_generics(4)]
6259pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260    a: __m512,
6261    k: __mmask16,
6262    b: __m512,
6263    c: __m512i,
6264) -> __m512 {
6265    unsafe {
6266        static_assert_uimm_bits!(IMM8, 8);
6267        let a = a.as_f32x16();
6268        let b = b.as_f32x16();
6269        let c = c.as_i32x16();
6270        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6271        transmute(r)
6272    }
6273}
6274
6275/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276///
6277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278#[inline]
6279#[target_feature(enable = "avx512f")]
6280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6281#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6282#[rustc_legacy_const_generics(4)]
6283pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284    k: __mmask16,
6285    a: __m512,
6286    b: __m512,
6287    c: __m512i,
6288) -> __m512 {
6289    unsafe {
6290        static_assert_uimm_bits!(IMM8, 8);
6291        let a = a.as_f32x16();
6292        let b = b.as_f32x16();
6293        let c = c.as_i32x16();
6294        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6295        transmute(r)
6296    }
6297}
6298
6299/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300///
6301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302#[inline]
6303#[target_feature(enable = "avx512f,avx512vl")]
6304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6305#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6306#[rustc_legacy_const_generics(3)]
6307pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308    unsafe {
6309        static_assert_uimm_bits!(IMM8, 8);
6310        let a = a.as_f32x8();
6311        let b = b.as_f32x8();
6312        let c = c.as_i32x8();
6313        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6314        transmute(r)
6315    }
6316}
6317
6318/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321#[inline]
6322#[target_feature(enable = "avx512f,avx512vl")]
6323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6324#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6325#[rustc_legacy_const_generics(4)]
6326pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327    a: __m256,
6328    k: __mmask8,
6329    b: __m256,
6330    c: __m256i,
6331) -> __m256 {
6332    unsafe {
6333        static_assert_uimm_bits!(IMM8, 8);
6334        let a = a.as_f32x8();
6335        let b = b.as_f32x8();
6336        let c = c.as_i32x8();
6337        let r = vfixupimmps256(a, b, c, IMM8, k);
6338        transmute(r)
6339    }
6340}
6341
6342/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345#[inline]
6346#[target_feature(enable = "avx512f,avx512vl")]
6347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6348#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6349#[rustc_legacy_const_generics(4)]
6350pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351    k: __mmask8,
6352    a: __m256,
6353    b: __m256,
6354    c: __m256i,
6355) -> __m256 {
6356    unsafe {
6357        static_assert_uimm_bits!(IMM8, 8);
6358        let a = a.as_f32x8();
6359        let b = b.as_f32x8();
6360        let c = c.as_i32x8();
6361        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6362        transmute(r)
6363    }
6364}
6365
6366/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367///
6368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369#[inline]
6370#[target_feature(enable = "avx512f,avx512vl")]
6371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6372#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6373#[rustc_legacy_const_generics(3)]
6374pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375    unsafe {
6376        static_assert_uimm_bits!(IMM8, 8);
6377        let a = a.as_f32x4();
6378        let b = b.as_f32x4();
6379        let c = c.as_i32x4();
6380        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6381        transmute(r)
6382    }
6383}
6384
6385/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388#[inline]
6389#[target_feature(enable = "avx512f,avx512vl")]
6390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6391#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6392#[rustc_legacy_const_generics(4)]
6393pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394    a: __m128,
6395    k: __mmask8,
6396    b: __m128,
6397    c: __m128i,
6398) -> __m128 {
6399    unsafe {
6400        static_assert_uimm_bits!(IMM8, 8);
6401        let a = a.as_f32x4();
6402        let b = b.as_f32x4();
6403        let c = c.as_i32x4();
6404        let r = vfixupimmps128(a, b, c, IMM8, k);
6405        transmute(r)
6406    }
6407}
6408
6409/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410///
6411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412#[inline]
6413#[target_feature(enable = "avx512f,avx512vl")]
6414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6415#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6416#[rustc_legacy_const_generics(4)]
6417pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418    k: __mmask8,
6419    a: __m128,
6420    b: __m128,
6421    c: __m128i,
6422) -> __m128 {
6423    unsafe {
6424        static_assert_uimm_bits!(IMM8, 8);
6425        let a = a.as_f32x4();
6426        let b = b.as_f32x4();
6427        let c = c.as_i32x4();
6428        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6429        transmute(r)
6430    }
6431}
6432
6433/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436#[inline]
6437#[target_feature(enable = "avx512f")]
6438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6439#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6440#[rustc_legacy_const_generics(3)]
6441pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442    unsafe {
6443        static_assert_uimm_bits!(IMM8, 8);
6444        let a = a.as_f64x8();
6445        let b = b.as_f64x8();
6446        let c = c.as_i64x8();
6447        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6448        transmute(r)
6449    }
6450}
6451
6452/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453///
6454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455#[inline]
6456#[target_feature(enable = "avx512f")]
6457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6458#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6459#[rustc_legacy_const_generics(4)]
6460pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461    a: __m512d,
6462    k: __mmask8,
6463    b: __m512d,
6464    c: __m512i,
6465) -> __m512d {
6466    unsafe {
6467        static_assert_uimm_bits!(IMM8, 8);
6468        let a = a.as_f64x8();
6469        let b = b.as_f64x8();
6470        let c = c.as_i64x8();
6471        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6472        transmute(r)
6473    }
6474}
6475
6476/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477///
6478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479#[inline]
6480#[target_feature(enable = "avx512f")]
6481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6482#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6483#[rustc_legacy_const_generics(4)]
6484pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485    k: __mmask8,
6486    a: __m512d,
6487    b: __m512d,
6488    c: __m512i,
6489) -> __m512d {
6490    unsafe {
6491        static_assert_uimm_bits!(IMM8, 8);
6492        let a = a.as_f64x8();
6493        let b = b.as_f64x8();
6494        let c = c.as_i64x8();
6495        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6496        transmute(r)
6497    }
6498}
6499
6500/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503#[inline]
6504#[target_feature(enable = "avx512f,avx512vl")]
6505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6506#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6507#[rustc_legacy_const_generics(3)]
6508pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509    unsafe {
6510        static_assert_uimm_bits!(IMM8, 8);
6511        let a = a.as_f64x4();
6512        let b = b.as_f64x4();
6513        let c = c.as_i64x4();
6514        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6515        transmute(r)
6516    }
6517}
6518
6519/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522#[inline]
6523#[target_feature(enable = "avx512f,avx512vl")]
6524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6525#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6526#[rustc_legacy_const_generics(4)]
6527pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528    a: __m256d,
6529    k: __mmask8,
6530    b: __m256d,
6531    c: __m256i,
6532) -> __m256d {
6533    unsafe {
6534        static_assert_uimm_bits!(IMM8, 8);
6535        let a = a.as_f64x4();
6536        let b = b.as_f64x4();
6537        let c = c.as_i64x4();
6538        let r = vfixupimmpd256(a, b, c, IMM8, k);
6539        transmute(r)
6540    }
6541}
6542
6543/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546#[inline]
6547#[target_feature(enable = "avx512f,avx512vl")]
6548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6549#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6550#[rustc_legacy_const_generics(4)]
6551pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552    k: __mmask8,
6553    a: __m256d,
6554    b: __m256d,
6555    c: __m256i,
6556) -> __m256d {
6557    unsafe {
6558        static_assert_uimm_bits!(IMM8, 8);
6559        let a = a.as_f64x4();
6560        let b = b.as_f64x4();
6561        let c = c.as_i64x4();
6562        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6563        transmute(r)
6564    }
6565}
6566
6567/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568///
6569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570#[inline]
6571#[target_feature(enable = "avx512f,avx512vl")]
6572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6573#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6574#[rustc_legacy_const_generics(3)]
6575pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576    unsafe {
6577        static_assert_uimm_bits!(IMM8, 8);
6578        let a = a.as_f64x2();
6579        let b = b.as_f64x2();
6580        let c = c.as_i64x2();
6581        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6582        transmute(r)
6583    }
6584}
6585
6586/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589#[inline]
6590#[target_feature(enable = "avx512f,avx512vl")]
6591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6592#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6593#[rustc_legacy_const_generics(4)]
6594pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595    a: __m128d,
6596    k: __mmask8,
6597    b: __m128d,
6598    c: __m128i,
6599) -> __m128d {
6600    unsafe {
6601        static_assert_uimm_bits!(IMM8, 8);
6602        let a = a.as_f64x2();
6603        let b = b.as_f64x2();
6604        let c = c.as_i64x2();
6605        let r = vfixupimmpd128(a, b, c, IMM8, k);
6606        transmute(r)
6607    }
6608}
6609
6610/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613#[inline]
6614#[target_feature(enable = "avx512f,avx512vl")]
6615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6616#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6617#[rustc_legacy_const_generics(4)]
6618pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619    k: __mmask8,
6620    a: __m128d,
6621    b: __m128d,
6622    c: __m128i,
6623) -> __m128d {
6624    unsafe {
6625        static_assert_uimm_bits!(IMM8, 8);
6626        let a = a.as_f64x2();
6627        let b = b.as_f64x2();
6628        let c = c.as_i64x2();
6629        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6630        transmute(r)
6631    }
6632}
6633
6634/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635///
6636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637#[inline]
6638#[target_feature(enable = "avx512f")]
6639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6640#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6641#[rustc_legacy_const_generics(3)]
6642pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643    unsafe {
6644        static_assert_uimm_bits!(IMM8, 8);
6645        let a = a.as_i32x16();
6646        let b = b.as_i32x16();
6647        let c = c.as_i32x16();
6648        let r = vpternlogd(a, b, c, IMM8);
6649        transmute(r)
6650    }
6651}
6652
6653/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656#[inline]
6657#[target_feature(enable = "avx512f")]
6658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6659#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6660#[rustc_legacy_const_generics(4)]
6661pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662    src: __m512i,
6663    k: __mmask16,
6664    a: __m512i,
6665    b: __m512i,
6666) -> __m512i {
6667    unsafe {
6668        static_assert_uimm_bits!(IMM8, 8);
6669        let src = src.as_i32x16();
6670        let a = a.as_i32x16();
6671        let b = b.as_i32x16();
6672        let r = vpternlogd(src, a, b, IMM8);
6673        transmute(simd_select_bitmask(k, r, src))
6674    }
6675}
6676
6677/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680#[inline]
6681#[target_feature(enable = "avx512f")]
6682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6683#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6684#[rustc_legacy_const_generics(4)]
6685pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686    k: __mmask16,
6687    a: __m512i,
6688    b: __m512i,
6689    c: __m512i,
6690) -> __m512i {
6691    unsafe {
6692        static_assert_uimm_bits!(IMM8, 8);
6693        let a = a.as_i32x16();
6694        let b = b.as_i32x16();
6695        let c = c.as_i32x16();
6696        let r = vpternlogd(a, b, c, IMM8);
6697        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6698    }
6699}
6700
6701/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702///
6703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704#[inline]
6705#[target_feature(enable = "avx512f,avx512vl")]
6706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6707#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6708#[rustc_legacy_const_generics(3)]
6709pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710    unsafe {
6711        static_assert_uimm_bits!(IMM8, 8);
6712        let a = a.as_i32x8();
6713        let b = b.as_i32x8();
6714        let c = c.as_i32x8();
6715        let r = vpternlogd256(a, b, c, IMM8);
6716        transmute(r)
6717    }
6718}
6719
6720/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723#[inline]
6724#[target_feature(enable = "avx512f,avx512vl")]
6725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6726#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6727#[rustc_legacy_const_generics(4)]
6728pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729    src: __m256i,
6730    k: __mmask8,
6731    a: __m256i,
6732    b: __m256i,
6733) -> __m256i {
6734    unsafe {
6735        static_assert_uimm_bits!(IMM8, 8);
6736        let src = src.as_i32x8();
6737        let a = a.as_i32x8();
6738        let b = b.as_i32x8();
6739        let r = vpternlogd256(src, a, b, IMM8);
6740        transmute(simd_select_bitmask(k, r, src))
6741    }
6742}
6743
6744/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747#[inline]
6748#[target_feature(enable = "avx512f,avx512vl")]
6749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6750#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6751#[rustc_legacy_const_generics(4)]
6752pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753    k: __mmask8,
6754    a: __m256i,
6755    b: __m256i,
6756    c: __m256i,
6757) -> __m256i {
6758    unsafe {
6759        static_assert_uimm_bits!(IMM8, 8);
6760        let a = a.as_i32x8();
6761        let b = b.as_i32x8();
6762        let c = c.as_i32x8();
6763        let r = vpternlogd256(a, b, c, IMM8);
6764        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
6765    }
6766}
6767
6768/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769///
6770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771#[inline]
6772#[target_feature(enable = "avx512f,avx512vl")]
6773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6774#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6775#[rustc_legacy_const_generics(3)]
6776pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777    unsafe {
6778        static_assert_uimm_bits!(IMM8, 8);
6779        let a = a.as_i32x4();
6780        let b = b.as_i32x4();
6781        let c = c.as_i32x4();
6782        let r = vpternlogd128(a, b, c, IMM8);
6783        transmute(r)
6784    }
6785}
6786
6787/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790#[inline]
6791#[target_feature(enable = "avx512f,avx512vl")]
6792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6793#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6794#[rustc_legacy_const_generics(4)]
6795pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796    src: __m128i,
6797    k: __mmask8,
6798    a: __m128i,
6799    b: __m128i,
6800) -> __m128i {
6801    unsafe {
6802        static_assert_uimm_bits!(IMM8, 8);
6803        let src = src.as_i32x4();
6804        let a = a.as_i32x4();
6805        let b = b.as_i32x4();
6806        let r = vpternlogd128(src, a, b, IMM8);
6807        transmute(simd_select_bitmask(k, r, src))
6808    }
6809}
6810
6811/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812///
6813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814#[inline]
6815#[target_feature(enable = "avx512f,avx512vl")]
6816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6817#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6818#[rustc_legacy_const_generics(4)]
6819pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820    k: __mmask8,
6821    a: __m128i,
6822    b: __m128i,
6823    c: __m128i,
6824) -> __m128i {
6825    unsafe {
6826        static_assert_uimm_bits!(IMM8, 8);
6827        let a = a.as_i32x4();
6828        let b = b.as_i32x4();
6829        let c = c.as_i32x4();
6830        let r = vpternlogd128(a, b, c, IMM8);
6831        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
6832    }
6833}
6834
6835/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836///
6837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838#[inline]
6839#[target_feature(enable = "avx512f")]
6840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6841#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6842#[rustc_legacy_const_generics(3)]
6843pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844    unsafe {
6845        static_assert_uimm_bits!(IMM8, 8);
6846        let a = a.as_i64x8();
6847        let b = b.as_i64x8();
6848        let c = c.as_i64x8();
6849        let r = vpternlogq(a, b, c, IMM8);
6850        transmute(r)
6851    }
6852}
6853
6854/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857#[inline]
6858#[target_feature(enable = "avx512f")]
6859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6860#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6861#[rustc_legacy_const_generics(4)]
6862pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863    src: __m512i,
6864    k: __mmask8,
6865    a: __m512i,
6866    b: __m512i,
6867) -> __m512i {
6868    unsafe {
6869        static_assert_uimm_bits!(IMM8, 8);
6870        let src = src.as_i64x8();
6871        let a = a.as_i64x8();
6872        let b = b.as_i64x8();
6873        let r = vpternlogq(src, a, b, IMM8);
6874        transmute(simd_select_bitmask(k, r, src))
6875    }
6876}
6877
6878/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879///
6880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881#[inline]
6882#[target_feature(enable = "avx512f")]
6883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6884#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6885#[rustc_legacy_const_generics(4)]
6886pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887    k: __mmask8,
6888    a: __m512i,
6889    b: __m512i,
6890    c: __m512i,
6891) -> __m512i {
6892    unsafe {
6893        static_assert_uimm_bits!(IMM8, 8);
6894        let a = a.as_i64x8();
6895        let b = b.as_i64x8();
6896        let c = c.as_i64x8();
6897        let r = vpternlogq(a, b, c, IMM8);
6898        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
6899    }
6900}
6901
6902/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905#[inline]
6906#[target_feature(enable = "avx512f,avx512vl")]
6907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6908#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6909#[rustc_legacy_const_generics(3)]
6910pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911    unsafe {
6912        static_assert_uimm_bits!(IMM8, 8);
6913        let a = a.as_i64x4();
6914        let b = b.as_i64x4();
6915        let c = c.as_i64x4();
6916        let r = vpternlogq256(a, b, c, IMM8);
6917        transmute(r)
6918    }
6919}
6920
6921/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922///
6923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924#[inline]
6925#[target_feature(enable = "avx512f,avx512vl")]
6926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6927#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6928#[rustc_legacy_const_generics(4)]
6929pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930    src: __m256i,
6931    k: __mmask8,
6932    a: __m256i,
6933    b: __m256i,
6934) -> __m256i {
6935    unsafe {
6936        static_assert_uimm_bits!(IMM8, 8);
6937        let src = src.as_i64x4();
6938        let a = a.as_i64x4();
6939        let b = b.as_i64x4();
6940        let r = vpternlogq256(src, a, b, IMM8);
6941        transmute(simd_select_bitmask(k, r, src))
6942    }
6943}
6944
6945/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946///
6947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948#[inline]
6949#[target_feature(enable = "avx512f,avx512vl")]
6950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6951#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6952#[rustc_legacy_const_generics(4)]
6953pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954    k: __mmask8,
6955    a: __m256i,
6956    b: __m256i,
6957    c: __m256i,
6958) -> __m256i {
6959    unsafe {
6960        static_assert_uimm_bits!(IMM8, 8);
6961        let a = a.as_i64x4();
6962        let b = b.as_i64x4();
6963        let c = c.as_i64x4();
6964        let r = vpternlogq256(a, b, c, IMM8);
6965        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
6966    }
6967}
6968
6969/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972#[inline]
6973#[target_feature(enable = "avx512f,avx512vl")]
6974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6975#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6976#[rustc_legacy_const_generics(3)]
6977pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978    unsafe {
6979        static_assert_uimm_bits!(IMM8, 8);
6980        let a = a.as_i64x2();
6981        let b = b.as_i64x2();
6982        let c = c.as_i64x2();
6983        let r = vpternlogq128(a, b, c, IMM8);
6984        transmute(r)
6985    }
6986}
6987
6988/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989///
6990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991#[inline]
6992#[target_feature(enable = "avx512f,avx512vl")]
6993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6994#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6995#[rustc_legacy_const_generics(4)]
6996pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997    src: __m128i,
6998    k: __mmask8,
6999    a: __m128i,
7000    b: __m128i,
7001) -> __m128i {
7002    unsafe {
7003        static_assert_uimm_bits!(IMM8, 8);
7004        let src = src.as_i64x2();
7005        let a = a.as_i64x2();
7006        let b = b.as_i64x2();
7007        let r = vpternlogq128(src, a, b, IMM8);
7008        transmute(simd_select_bitmask(k, r, src))
7009    }
7010}
7011
7012/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013///
7014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015#[inline]
7016#[target_feature(enable = "avx512f,avx512vl")]
7017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7018#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7019#[rustc_legacy_const_generics(4)]
7020pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021    k: __mmask8,
7022    a: __m128i,
7023    b: __m128i,
7024    c: __m128i,
7025) -> __m128i {
7026    unsafe {
7027        static_assert_uimm_bits!(IMM8, 8);
7028        let a = a.as_i64x2();
7029        let b = b.as_i64x2();
7030        let c = c.as_i64x2();
7031        let r = vpternlogq128(a, b, c, IMM8);
7032        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7033    }
7034}
7035
7036/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038///    _MM_MANT_NORM_1_2     // interval [1, 2)
7039///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7040///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7041///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042/// The sign is determined by sc which can take the following values:
7043///    _MM_MANT_SIGN_src     // sign = sign(src)
7044///    _MM_MANT_SIGN_zero    // sign = 0
7045///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048#[inline]
7049#[target_feature(enable = "avx512f")]
7050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7051#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7052#[rustc_legacy_const_generics(1, 2)]
7053pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054    a: __m512,
7055) -> __m512 {
7056    unsafe {
7057        static_assert_uimm_bits!(NORM, 4);
7058        static_assert_uimm_bits!(SIGN, 2);
7059        let a = a.as_f32x16();
7060        let zero = f32x16::ZERO;
7061        let r = vgetmantps(
7062            a,
7063            SIGN << 2 | NORM,
7064            zero,
7065            0b11111111_11111111,
7066            _MM_FROUND_CUR_DIRECTION,
7067        );
7068        transmute(r)
7069    }
7070}
7071
7072/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7075///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7076///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7077///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078/// The sign is determined by sc which can take the following values:\
7079///    _MM_MANT_SIGN_src     // sign = sign(src)\
7080///    _MM_MANT_SIGN_zero    // sign = 0\
7081///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7082///
7083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084#[inline]
7085#[target_feature(enable = "avx512f")]
7086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7087#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7088#[rustc_legacy_const_generics(3, 4)]
7089pub fn _mm512_mask_getmant_ps<
7090    const NORM: _MM_MANTISSA_NORM_ENUM,
7091    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092>(
7093    src: __m512,
7094    k: __mmask16,
7095    a: __m512,
7096) -> __m512 {
7097    unsafe {
7098        static_assert_uimm_bits!(NORM, 4);
7099        static_assert_uimm_bits!(SIGN, 2);
7100        let a = a.as_f32x16();
7101        let src = src.as_f32x16();
7102        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7103        transmute(r)
7104    }
7105}
7106
7107/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7110///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7111///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7112///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113/// The sign is determined by sc which can take the following values:\
7114///    _MM_MANT_SIGN_src     // sign = sign(src)\
7115///    _MM_MANT_SIGN_zero    // sign = 0\
7116///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7122#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7123#[rustc_legacy_const_generics(2, 3)]
7124pub fn _mm512_maskz_getmant_ps<
7125    const NORM: _MM_MANTISSA_NORM_ENUM,
7126    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127>(
7128    k: __mmask16,
7129    a: __m512,
7130) -> __m512 {
7131    unsafe {
7132        static_assert_uimm_bits!(NORM, 4);
7133        static_assert_uimm_bits!(SIGN, 2);
7134        let a = a.as_f32x16();
7135        let r = vgetmantps(
7136            a,
7137            SIGN << 2 | NORM,
7138            f32x16::ZERO,
7139            k,
7140            _MM_FROUND_CUR_DIRECTION,
7141        );
7142        transmute(r)
7143    }
7144}
7145
7146/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148///    _MM_MANT_NORM_1_2     // interval [1, 2)
7149///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7150///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7151///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152/// The sign is determined by sc which can take the following values:
7153///    _MM_MANT_SIGN_src     // sign = sign(src)
7154///    _MM_MANT_SIGN_zero    // sign = 0
7155///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7156///
7157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158#[inline]
7159#[target_feature(enable = "avx512f,avx512vl")]
7160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7161#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7162#[rustc_legacy_const_generics(1, 2)]
7163pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164    a: __m256,
7165) -> __m256 {
7166    unsafe {
7167        static_assert_uimm_bits!(NORM, 4);
7168        static_assert_uimm_bits!(SIGN, 2);
7169        let a = a.as_f32x8();
7170        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7171        transmute(r)
7172    }
7173}
7174
7175/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7178///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7179///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7180///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181/// The sign is determined by sc which can take the following values:\
7182///    _MM_MANT_SIGN_src     // sign = sign(src)\
7183///    _MM_MANT_SIGN_zero    // sign = 0\
7184///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7185///
7186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187#[inline]
7188#[target_feature(enable = "avx512f,avx512vl")]
7189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7190#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7191#[rustc_legacy_const_generics(3, 4)]
7192pub fn _mm256_mask_getmant_ps<
7193    const NORM: _MM_MANTISSA_NORM_ENUM,
7194    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195>(
7196    src: __m256,
7197    k: __mmask8,
7198    a: __m256,
7199) -> __m256 {
7200    unsafe {
7201        static_assert_uimm_bits!(NORM, 4);
7202        static_assert_uimm_bits!(SIGN, 2);
7203        let a = a.as_f32x8();
7204        let src = src.as_f32x8();
7205        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7206        transmute(r)
7207    }
7208}
7209
7210/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7213///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7214///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7215///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216/// The sign is determined by sc which can take the following values:\
7217///    _MM_MANT_SIGN_src     // sign = sign(src)\
7218///    _MM_MANT_SIGN_zero    // sign = 0\
7219///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7220///
7221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222#[inline]
7223#[target_feature(enable = "avx512f,avx512vl")]
7224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7225#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7226#[rustc_legacy_const_generics(2, 3)]
7227pub fn _mm256_maskz_getmant_ps<
7228    const NORM: _MM_MANTISSA_NORM_ENUM,
7229    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230>(
7231    k: __mmask8,
7232    a: __m256,
7233) -> __m256 {
7234    unsafe {
7235        static_assert_uimm_bits!(NORM, 4);
7236        static_assert_uimm_bits!(SIGN, 2);
7237        let a = a.as_f32x8();
7238        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7239        transmute(r)
7240    }
7241}
7242
7243/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245///    _MM_MANT_NORM_1_2     // interval [1, 2)
7246///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7247///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7248///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249/// The sign is determined by sc which can take the following values:
7250///    _MM_MANT_SIGN_src     // sign = sign(src)
7251///    _MM_MANT_SIGN_zero    // sign = 0
7252///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7253///
7254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255#[inline]
7256#[target_feature(enable = "avx512f,avx512vl")]
7257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7258#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7259#[rustc_legacy_const_generics(1, 2)]
7260pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261    a: __m128,
7262) -> __m128 {
7263    unsafe {
7264        static_assert_uimm_bits!(NORM, 4);
7265        static_assert_uimm_bits!(SIGN, 2);
7266        let a = a.as_f32x4();
7267        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7268        transmute(r)
7269    }
7270}
7271
7272/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7275///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7276///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7277///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278/// The sign is determined by sc which can take the following values:\
7279///    _MM_MANT_SIGN_src     // sign = sign(src)\
7280///    _MM_MANT_SIGN_zero    // sign = 0\
7281///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7282///
7283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284#[inline]
7285#[target_feature(enable = "avx512f,avx512vl")]
7286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7287#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7288#[rustc_legacy_const_generics(3, 4)]
7289pub fn _mm_mask_getmant_ps<
7290    const NORM: _MM_MANTISSA_NORM_ENUM,
7291    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292>(
7293    src: __m128,
7294    k: __mmask8,
7295    a: __m128,
7296) -> __m128 {
7297    unsafe {
7298        static_assert_uimm_bits!(NORM, 4);
7299        static_assert_uimm_bits!(SIGN, 2);
7300        let a = a.as_f32x4();
7301        let src = src.as_f32x4();
7302        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7303        transmute(r)
7304    }
7305}
7306
7307/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7310///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7311///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7312///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313/// The sign is determined by sc which can take the following values:\
7314///    _MM_MANT_SIGN_src     // sign = sign(src)\
7315///    _MM_MANT_SIGN_zero    // sign = 0\
7316///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7317///
7318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319#[inline]
7320#[target_feature(enable = "avx512f,avx512vl")]
7321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7322#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7323#[rustc_legacy_const_generics(2, 3)]
7324pub fn _mm_maskz_getmant_ps<
7325    const NORM: _MM_MANTISSA_NORM_ENUM,
7326    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327>(
7328    k: __mmask8,
7329    a: __m128,
7330) -> __m128 {
7331    unsafe {
7332        static_assert_uimm_bits!(NORM, 4);
7333        static_assert_uimm_bits!(SIGN, 2);
7334        let a = a.as_f32x4();
7335        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7336        transmute(r)
7337    }
7338}
7339
7340/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7343///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7344///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7345///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346/// The sign is determined by sc which can take the following values:\
7347///    _MM_MANT_SIGN_src     // sign = sign(src)\
7348///    _MM_MANT_SIGN_zero    // sign = 0\
7349///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7350///
7351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352#[inline]
7353#[target_feature(enable = "avx512f")]
7354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7355#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7356#[rustc_legacy_const_generics(1, 2)]
7357pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358    a: __m512d,
7359) -> __m512d {
7360    unsafe {
7361        static_assert_uimm_bits!(NORM, 4);
7362        static_assert_uimm_bits!(SIGN, 2);
7363        let a = a.as_f64x8();
7364        let zero = f64x8::ZERO;
7365        let r = vgetmantpd(
7366            a,
7367            SIGN << 2 | NORM,
7368            zero,
7369            0b11111111,
7370            _MM_FROUND_CUR_DIRECTION,
7371        );
7372        transmute(r)
7373    }
7374}
7375
7376/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7379///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7380///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7381///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382/// The sign is determined by sc which can take the following values:\
7383///    _MM_MANT_SIGN_src     // sign = sign(src)\
7384///    _MM_MANT_SIGN_zero    // sign = 0\
7385///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7386///
7387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388#[inline]
7389#[target_feature(enable = "avx512f")]
7390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7391#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7392#[rustc_legacy_const_generics(3, 4)]
7393pub fn _mm512_mask_getmant_pd<
7394    const NORM: _MM_MANTISSA_NORM_ENUM,
7395    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396>(
7397    src: __m512d,
7398    k: __mmask8,
7399    a: __m512d,
7400) -> __m512d {
7401    unsafe {
7402        static_assert_uimm_bits!(NORM, 4);
7403        static_assert_uimm_bits!(SIGN, 2);
7404        let a = a.as_f64x8();
7405        let src = src.as_f64x8();
7406        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7407        transmute(r)
7408    }
7409}
7410
7411/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7414///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7415///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7416///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417/// The sign is determined by sc which can take the following values:\
7418///    _MM_MANT_SIGN_src     // sign = sign(src)\
7419///    _MM_MANT_SIGN_zero    // sign = 0\
7420///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423#[inline]
7424#[target_feature(enable = "avx512f")]
7425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7426#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7427#[rustc_legacy_const_generics(2, 3)]
7428pub fn _mm512_maskz_getmant_pd<
7429    const NORM: _MM_MANTISSA_NORM_ENUM,
7430    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431>(
7432    k: __mmask8,
7433    a: __m512d,
7434) -> __m512d {
7435    unsafe {
7436        static_assert_uimm_bits!(NORM, 4);
7437        static_assert_uimm_bits!(SIGN, 2);
7438        let a = a.as_f64x8();
7439        let r = vgetmantpd(
7440            a,
7441            SIGN << 2 | NORM,
7442            f64x8::ZERO,
7443            k,
7444            _MM_FROUND_CUR_DIRECTION,
7445        );
7446        transmute(r)
7447    }
7448}
7449
7450/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7453///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7454///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7455///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456/// The sign is determined by sc which can take the following values:\
7457///    _MM_MANT_SIGN_src     // sign = sign(src)\
7458///    _MM_MANT_SIGN_zero    // sign = 0\
7459///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7460///
7461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462#[inline]
7463#[target_feature(enable = "avx512f,avx512vl")]
7464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7465#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7466#[rustc_legacy_const_generics(1, 2)]
7467pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468    a: __m256d,
7469) -> __m256d {
7470    unsafe {
7471        static_assert_uimm_bits!(NORM, 4);
7472        static_assert_uimm_bits!(SIGN, 2);
7473        let a = a.as_f64x4();
7474        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7475        transmute(r)
7476    }
7477}
7478
7479/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7482///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7483///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7484///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485/// The sign is determined by sc which can take the following values:\
7486///    _MM_MANT_SIGN_src     // sign = sign(src)\
7487///    _MM_MANT_SIGN_zero    // sign = 0\
7488///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7489///
7490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491#[inline]
7492#[target_feature(enable = "avx512f,avx512vl")]
7493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7494#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7495#[rustc_legacy_const_generics(3, 4)]
7496pub fn _mm256_mask_getmant_pd<
7497    const NORM: _MM_MANTISSA_NORM_ENUM,
7498    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499>(
7500    src: __m256d,
7501    k: __mmask8,
7502    a: __m256d,
7503) -> __m256d {
7504    unsafe {
7505        static_assert_uimm_bits!(NORM, 4);
7506        static_assert_uimm_bits!(SIGN, 2);
7507        let a = a.as_f64x4();
7508        let src = src.as_f64x4();
7509        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7510        transmute(r)
7511    }
7512}
7513
7514/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7517///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7518///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7519///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520/// The sign is determined by sc which can take the following values:\
7521///    _MM_MANT_SIGN_src     // sign = sign(src)\
7522///    _MM_MANT_SIGN_zero    // sign = 0\
7523///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7524///
7525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526#[inline]
7527#[target_feature(enable = "avx512f,avx512vl")]
7528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7529#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7530#[rustc_legacy_const_generics(2, 3)]
7531pub fn _mm256_maskz_getmant_pd<
7532    const NORM: _MM_MANTISSA_NORM_ENUM,
7533    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534>(
7535    k: __mmask8,
7536    a: __m256d,
7537) -> __m256d {
7538    unsafe {
7539        static_assert_uimm_bits!(NORM, 4);
7540        static_assert_uimm_bits!(SIGN, 2);
7541        let a = a.as_f64x4();
7542        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7543        transmute(r)
7544    }
7545}
7546
7547/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7550///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7551///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7552///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553/// The sign is determined by sc which can take the following values:\
7554///    _MM_MANT_SIGN_src     // sign = sign(src)\
7555///    _MM_MANT_SIGN_zero    // sign = 0\
7556///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559#[inline]
7560#[target_feature(enable = "avx512f,avx512vl")]
7561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7562#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7563#[rustc_legacy_const_generics(1, 2)]
7564pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565    a: __m128d,
7566) -> __m128d {
7567    unsafe {
7568        static_assert_uimm_bits!(NORM, 4);
7569        static_assert_uimm_bits!(SIGN, 2);
7570        let a = a.as_f64x2();
7571        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7572        transmute(r)
7573    }
7574}
7575
7576/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7579///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7580///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7581///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582/// The sign is determined by sc which can take the following values:\
7583///    _MM_MANT_SIGN_src     // sign = sign(src)\
7584///    _MM_MANT_SIGN_zero    // sign = 0\
7585///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588#[inline]
7589#[target_feature(enable = "avx512f,avx512vl")]
7590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7591#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7592#[rustc_legacy_const_generics(3, 4)]
7593pub fn _mm_mask_getmant_pd<
7594    const NORM: _MM_MANTISSA_NORM_ENUM,
7595    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596>(
7597    src: __m128d,
7598    k: __mmask8,
7599    a: __m128d,
7600) -> __m128d {
7601    unsafe {
7602        static_assert_uimm_bits!(NORM, 4);
7603        static_assert_uimm_bits!(SIGN, 2);
7604        let a = a.as_f64x2();
7605        let src = src.as_f64x2();
7606        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7607        transmute(r)
7608    }
7609}
7610
7611/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7614///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7615///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7616///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617/// The sign is determined by sc which can take the following values:\
7618///    _MM_MANT_SIGN_src     // sign = sign(src)\
7619///    _MM_MANT_SIGN_zero    // sign = 0\
7620///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623#[inline]
7624#[target_feature(enable = "avx512f,avx512vl")]
7625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7626#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7627#[rustc_legacy_const_generics(2, 3)]
7628pub fn _mm_maskz_getmant_pd<
7629    const NORM: _MM_MANTISSA_NORM_ENUM,
7630    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631>(
7632    k: __mmask8,
7633    a: __m128d,
7634) -> __m128d {
7635    unsafe {
7636        static_assert_uimm_bits!(NORM, 4);
7637        static_assert_uimm_bits!(SIGN, 2);
7638        let a = a.as_f64x2();
7639        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7640        transmute(r)
7641    }
7642}
7643
7644/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645///
7646/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654#[inline]
7655#[target_feature(enable = "avx512f")]
7656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7657#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7658#[rustc_legacy_const_generics(2)]
7659pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660    unsafe {
7661        static_assert_rounding!(ROUNDING);
7662        let a = a.as_f32x16();
7663        let b = b.as_f32x16();
7664        let r = vaddps(a, b, ROUNDING);
7665        transmute(r)
7666    }
7667}
7668
7669/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670///
7671/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677///
7678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679#[inline]
7680#[target_feature(enable = "avx512f")]
7681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7682#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7683#[rustc_legacy_const_generics(4)]
7684pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685    src: __m512,
7686    k: __mmask16,
7687    a: __m512,
7688    b: __m512,
7689) -> __m512 {
7690    unsafe {
7691        static_assert_rounding!(ROUNDING);
7692        let a = a.as_f32x16();
7693        let b = b.as_f32x16();
7694        let r = vaddps(a, b, ROUNDING);
7695        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7696    }
7697}
7698
7699/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700///
7701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707///
7708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709#[inline]
7710#[target_feature(enable = "avx512f")]
7711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7712#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7713#[rustc_legacy_const_generics(3)]
7714pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715    k: __mmask16,
7716    a: __m512,
7717    b: __m512,
7718) -> __m512 {
7719    unsafe {
7720        static_assert_rounding!(ROUNDING);
7721        let a = a.as_f32x16();
7722        let b = b.as_f32x16();
7723        let r = vaddps(a, b, ROUNDING);
7724        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7725    }
7726}
7727
7728/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7741#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(2)]
7743pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744    unsafe {
7745        static_assert_rounding!(ROUNDING);
7746        let a = a.as_f64x8();
7747        let b = b.as_f64x8();
7748        let r = vaddpd(a, b, ROUNDING);
7749        transmute(r)
7750    }
7751}
7752
7753/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754///
7755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761///
7762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763#[inline]
7764#[target_feature(enable = "avx512f")]
7765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7766#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7767#[rustc_legacy_const_generics(4)]
7768pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769    src: __m512d,
7770    k: __mmask8,
7771    a: __m512d,
7772    b: __m512d,
7773) -> __m512d {
7774    unsafe {
7775        static_assert_rounding!(ROUNDING);
7776        let a = a.as_f64x8();
7777        let b = b.as_f64x8();
7778        let r = vaddpd(a, b, ROUNDING);
7779        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7780    }
7781}
7782
7783/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784///
7785/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791///
7792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793#[inline]
7794#[target_feature(enable = "avx512f")]
7795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7796#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7797#[rustc_legacy_const_generics(3)]
7798pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799    k: __mmask8,
7800    a: __m512d,
7801    b: __m512d,
7802) -> __m512d {
7803    unsafe {
7804        static_assert_rounding!(ROUNDING);
7805        let a = a.as_f64x8();
7806        let b = b.as_f64x8();
7807        let r = vaddpd(a, b, ROUNDING);
7808        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7809    }
7810}
7811
7812/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813///
7814/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820///
7821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822#[inline]
7823#[target_feature(enable = "avx512f")]
7824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7825#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7826#[rustc_legacy_const_generics(2)]
7827pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828    unsafe {
7829        static_assert_rounding!(ROUNDING);
7830        let a = a.as_f32x16();
7831        let b = b.as_f32x16();
7832        let r = vsubps(a, b, ROUNDING);
7833        transmute(r)
7834    }
7835}
7836
7837/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838///
7839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847#[inline]
7848#[target_feature(enable = "avx512f")]
7849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7850#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7851#[rustc_legacy_const_generics(4)]
7852pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853    src: __m512,
7854    k: __mmask16,
7855    a: __m512,
7856    b: __m512,
7857) -> __m512 {
7858    unsafe {
7859        static_assert_rounding!(ROUNDING);
7860        let a = a.as_f32x16();
7861        let b = b.as_f32x16();
7862        let r = vsubps(a, b, ROUNDING);
7863        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7864    }
7865}
7866
7867/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868///
7869/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875///
7876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877#[inline]
7878#[target_feature(enable = "avx512f")]
7879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7880#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(3)]
7882pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883    k: __mmask16,
7884    a: __m512,
7885    b: __m512,
7886) -> __m512 {
7887    unsafe {
7888        static_assert_rounding!(ROUNDING);
7889        let a = a.as_f32x16();
7890        let b = b.as_f32x16();
7891        let r = vsubps(a, b, ROUNDING);
7892        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7893    }
7894}
7895
7896/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897///
7898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904///
7905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906#[inline]
7907#[target_feature(enable = "avx512f")]
7908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7909#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7910#[rustc_legacy_const_generics(2)]
7911pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912    unsafe {
7913        static_assert_rounding!(ROUNDING);
7914        let a = a.as_f64x8();
7915        let b = b.as_f64x8();
7916        let r = vsubpd(a, b, ROUNDING);
7917        transmute(r)
7918    }
7919}
7920
7921/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922///
7923/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931#[inline]
7932#[target_feature(enable = "avx512f")]
7933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7934#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7935#[rustc_legacy_const_generics(4)]
7936pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937    src: __m512d,
7938    k: __mmask8,
7939    a: __m512d,
7940    b: __m512d,
7941) -> __m512d {
7942    unsafe {
7943        static_assert_rounding!(ROUNDING);
7944        let a = a.as_f64x8();
7945        let b = b.as_f64x8();
7946        let r = vsubpd(a, b, ROUNDING);
7947        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7948    }
7949}
7950
7951/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952///
7953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959///
7960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961#[inline]
7962#[target_feature(enable = "avx512f")]
7963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7964#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7965#[rustc_legacy_const_generics(3)]
7966pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967    k: __mmask8,
7968    a: __m512d,
7969    b: __m512d,
7970) -> __m512d {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f64x8();
7974        let b = b.as_f64x8();
7975        let r = vsubpd(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7977    }
7978}
7979
7980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7993#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(2)]
7995pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996    unsafe {
7997        static_assert_rounding!(ROUNDING);
7998        let a = a.as_f32x16();
7999        let b = b.as_f32x16();
8000        let r = vmulps(a, b, ROUNDING);
8001        transmute(r)
8002    }
8003}
8004
8005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006///
8007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013///
8014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015#[inline]
8016#[target_feature(enable = "avx512f")]
8017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8018#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8019#[rustc_legacy_const_generics(4)]
8020pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021    src: __m512,
8022    k: __mmask16,
8023    a: __m512,
8024    b: __m512,
8025) -> __m512 {
8026    unsafe {
8027        static_assert_rounding!(ROUNDING);
8028        let a = a.as_f32x16();
8029        let b = b.as_f32x16();
8030        let r = vmulps(a, b, ROUNDING);
8031        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8032    }
8033}
8034
8035/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036///
8037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045#[inline]
8046#[target_feature(enable = "avx512f")]
8047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8048#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8049#[rustc_legacy_const_generics(3)]
8050pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051    k: __mmask16,
8052    a: __m512,
8053    b: __m512,
8054) -> __m512 {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f32x16();
8058        let b = b.as_f32x16();
8059        let r = vmulps(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8061    }
8062}
8063
8064/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8077#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(2)]
8079pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080    unsafe {
8081        static_assert_rounding!(ROUNDING);
8082        let a = a.as_f64x8();
8083        let b = b.as_f64x8();
8084        let r = vmulpd(a, b, ROUNDING);
8085        transmute(r)
8086    }
8087}
8088
8089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090///
8091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097///
8098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099#[inline]
8100#[target_feature(enable = "avx512f")]
8101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8102#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(4)]
8104pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105    src: __m512d,
8106    k: __mmask8,
8107    a: __m512d,
8108    b: __m512d,
8109) -> __m512d {
8110    unsafe {
8111        static_assert_rounding!(ROUNDING);
8112        let a = a.as_f64x8();
8113        let b = b.as_f64x8();
8114        let r = vmulpd(a, b, ROUNDING);
8115        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8116    }
8117}
8118
8119/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120///
8121/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129#[inline]
8130#[target_feature(enable = "avx512f")]
8131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8132#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8133#[rustc_legacy_const_generics(3)]
8134pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135    k: __mmask8,
8136    a: __m512d,
8137    b: __m512d,
8138) -> __m512d {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f64x8();
8142        let b = b.as_f64x8();
8143        let r = vmulpd(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8145    }
8146}
8147
8148/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8161#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(2)]
8163pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164    unsafe {
8165        static_assert_rounding!(ROUNDING);
8166        let a = a.as_f32x16();
8167        let b = b.as_f32x16();
8168        let r = vdivps(a, b, ROUNDING);
8169        transmute(r)
8170    }
8171}
8172
8173/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174///
8175/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181///
8182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183#[inline]
8184#[target_feature(enable = "avx512f")]
8185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8186#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8187#[rustc_legacy_const_generics(4)]
8188pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189    src: __m512,
8190    k: __mmask16,
8191    a: __m512,
8192    b: __m512,
8193) -> __m512 {
8194    unsafe {
8195        static_assert_rounding!(ROUNDING);
8196        let a = a.as_f32x16();
8197        let b = b.as_f32x16();
8198        let r = vdivps(a, b, ROUNDING);
8199        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8200    }
8201}
8202
8203/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204///
8205/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213#[inline]
8214#[target_feature(enable = "avx512f")]
8215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8216#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219    k: __mmask16,
8220    a: __m512,
8221    b: __m512,
8222) -> __m512 {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f32x16();
8226        let b = b.as_f32x16();
8227        let r = vdivps(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8229    }
8230}
8231
8232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8245#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(2)]
8247pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248    unsafe {
8249        static_assert_rounding!(ROUNDING);
8250        let a = a.as_f64x8();
8251        let b = b.as_f64x8();
8252        let r = vdivpd(a, b, ROUNDING);
8253        transmute(r)
8254    }
8255}
8256
8257/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258///
8259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265///
8266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267#[inline]
8268#[target_feature(enable = "avx512f")]
8269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8270#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8271#[rustc_legacy_const_generics(4)]
8272pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273    src: __m512d,
8274    k: __mmask8,
8275    a: __m512d,
8276    b: __m512d,
8277) -> __m512d {
8278    unsafe {
8279        static_assert_rounding!(ROUNDING);
8280        let a = a.as_f64x8();
8281        let b = b.as_f64x8();
8282        let r = vdivpd(a, b, ROUNDING);
8283        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8284    }
8285}
8286
8287/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288///
8289/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295///
8296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297#[inline]
8298#[target_feature(enable = "avx512f")]
8299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8300#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8301#[rustc_legacy_const_generics(3)]
8302pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303    k: __mmask8,
8304    a: __m512d,
8305    b: __m512d,
8306) -> __m512d {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f64x8();
8310        let b = b.as_f64x8();
8311        let r = vdivpd(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8313    }
8314}
8315
8316/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8329#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(1)]
8331pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332    unsafe {
8333        static_assert_rounding!(ROUNDING);
8334        let a = a.as_f32x16();
8335        let r = vsqrtps(a, ROUNDING);
8336        transmute(r)
8337    }
8338}
8339
8340/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341///
8342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350#[inline]
8351#[target_feature(enable = "avx512f")]
8352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8353#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8354#[rustc_legacy_const_generics(3)]
8355pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356    src: __m512,
8357    k: __mmask16,
8358    a: __m512,
8359) -> __m512 {
8360    unsafe {
8361        static_assert_rounding!(ROUNDING);
8362        let a = a.as_f32x16();
8363        let r = vsqrtps(a, ROUNDING);
8364        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8365    }
8366}
8367
8368/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369///
8370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378#[inline]
8379#[target_feature(enable = "avx512f")]
8380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8381#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8382#[rustc_legacy_const_generics(2)]
8383pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384    unsafe {
8385        static_assert_rounding!(ROUNDING);
8386        let a = a.as_f32x16();
8387        let r = vsqrtps(a, ROUNDING);
8388        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8389    }
8390}
8391
8392/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393///
8394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402#[inline]
8403#[target_feature(enable = "avx512f")]
8404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8405#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8406#[rustc_legacy_const_generics(1)]
8407pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408    unsafe {
8409        static_assert_rounding!(ROUNDING);
8410        let a = a.as_f64x8();
8411        let r = vsqrtpd(a, ROUNDING);
8412        transmute(r)
8413    }
8414}
8415
8416/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417///
8418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424///
8425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426#[inline]
8427#[target_feature(enable = "avx512f")]
8428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8429#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8430#[rustc_legacy_const_generics(3)]
8431pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432    src: __m512d,
8433    k: __mmask8,
8434    a: __m512d,
8435) -> __m512d {
8436    unsafe {
8437        static_assert_rounding!(ROUNDING);
8438        let a = a.as_f64x8();
8439        let r = vsqrtpd(a, ROUNDING);
8440        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8441    }
8442}
8443
8444/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445///
8446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454#[inline]
8455#[target_feature(enable = "avx512f")]
8456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8457#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8458#[rustc_legacy_const_generics(2)]
8459pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460    unsafe {
8461        static_assert_rounding!(ROUNDING);
8462        let a = a.as_f64x8();
8463        let r = vsqrtpd(a, ROUNDING);
8464        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8465    }
8466}
8467
8468/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469///
8470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476///
8477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478#[inline]
8479#[target_feature(enable = "avx512f")]
8480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482#[rustc_legacy_const_generics(3)]
8483pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484    unsafe {
8485        static_assert_rounding!(ROUNDING);
8486        vfmadd132psround(a, b, c, ROUNDING)
8487    }
8488}
8489
8490/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491///
8492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500#[inline]
8501#[target_feature(enable = "avx512f")]
8502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8503#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504#[rustc_legacy_const_generics(4)]
8505pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506    a: __m512,
8507    k: __mmask16,
8508    b: __m512,
8509    c: __m512,
8510) -> __m512 {
8511    unsafe {
8512        static_assert_rounding!(ROUNDING);
8513        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8514    }
8515}
8516
8517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518///
8519/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525///
8526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527#[inline]
8528#[target_feature(enable = "avx512f")]
8529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8530#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531#[rustc_legacy_const_generics(4)]
8532pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533    k: __mmask16,
8534    a: __m512,
8535    b: __m512,
8536    c: __m512,
8537) -> __m512 {
8538    unsafe {
8539        static_assert_rounding!(ROUNDING);
8540        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8541    }
8542}
8543
8544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545///
8546/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552///
8553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554#[inline]
8555#[target_feature(enable = "avx512f")]
8556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8557#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558#[rustc_legacy_const_generics(4)]
8559pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560    a: __m512,
8561    b: __m512,
8562    c: __m512,
8563    k: __mmask16,
8564) -> __m512 {
8565    unsafe {
8566        static_assert_rounding!(ROUNDING);
8567        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8568    }
8569}
8570
8571/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572///
8573/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581#[inline]
8582#[target_feature(enable = "avx512f")]
8583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8584#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585#[rustc_legacy_const_generics(3)]
8586pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587    unsafe {
8588        static_assert_rounding!(ROUNDING);
8589        vfmadd132pdround(a, b, c, ROUNDING)
8590    }
8591}
8592
8593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594///
8595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601///
8602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603#[inline]
8604#[target_feature(enable = "avx512f")]
8605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8606#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607#[rustc_legacy_const_generics(4)]
8608pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609    a: __m512d,
8610    k: __mmask8,
8611    b: __m512d,
8612    c: __m512d,
8613) -> __m512d {
8614    unsafe {
8615        static_assert_rounding!(ROUNDING);
8616        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8617    }
8618}
8619
8620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621///
8622/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628///
8629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630#[inline]
8631#[target_feature(enable = "avx512f")]
8632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8633#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634#[rustc_legacy_const_generics(4)]
8635pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636    k: __mmask8,
8637    a: __m512d,
8638    b: __m512d,
8639    c: __m512d,
8640) -> __m512d {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8644    }
8645}
8646
8647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648///
8649/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657#[inline]
8658#[target_feature(enable = "avx512f")]
8659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8660#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661#[rustc_legacy_const_generics(4)]
8662pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663    a: __m512d,
8664    b: __m512d,
8665    c: __m512d,
8666    k: __mmask8,
8667) -> __m512d {
8668    unsafe {
8669        static_assert_rounding!(ROUNDING);
8670        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8671    }
8672}
8673
8674/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675///
8676/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682///
8683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684#[inline]
8685#[target_feature(enable = "avx512f")]
8686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8687#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688#[rustc_legacy_const_generics(3)]
8689pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690    unsafe {
8691        static_assert_rounding!(ROUNDING);
8692        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8693    }
8694}
8695
8696/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697///
8698/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704///
8705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706#[inline]
8707#[target_feature(enable = "avx512f")]
8708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8709#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710#[rustc_legacy_const_generics(4)]
8711pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712    a: __m512,
8713    k: __mmask16,
8714    b: __m512,
8715    c: __m512,
8716) -> __m512 {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8720        simd_select_bitmask(k, r, a)
8721    }
8722}
8723
8724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725///
8726/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734#[inline]
8735#[target_feature(enable = "avx512f")]
8736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8737#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738#[rustc_legacy_const_generics(4)]
8739pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740    k: __mmask16,
8741    a: __m512,
8742    b: __m512,
8743    c: __m512,
8744) -> __m512 {
8745    unsafe {
8746        static_assert_rounding!(ROUNDING);
8747        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8748        simd_select_bitmask(k, r, _mm512_setzero_ps())
8749    }
8750}
8751
8752/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753///
8754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760///
8761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762#[inline]
8763#[target_feature(enable = "avx512f")]
8764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8765#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766#[rustc_legacy_const_generics(4)]
8767pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768    a: __m512,
8769    b: __m512,
8770    c: __m512,
8771    k: __mmask16,
8772) -> __m512 {
8773    unsafe {
8774        static_assert_rounding!(ROUNDING);
8775        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8776        simd_select_bitmask(k, r, c)
8777    }
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8793#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794#[rustc_legacy_const_generics(3)]
8795pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796    unsafe {
8797        static_assert_rounding!(ROUNDING);
8798        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
8799    }
8800}
8801
8802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803///
8804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810///
8811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812#[inline]
8813#[target_feature(enable = "avx512f")]
8814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8815#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816#[rustc_legacy_const_generics(4)]
8817pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818    a: __m512d,
8819    k: __mmask8,
8820    b: __m512d,
8821    c: __m512d,
8822) -> __m512d {
8823    unsafe {
8824        static_assert_rounding!(ROUNDING);
8825        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8826        simd_select_bitmask(k, r, a)
8827    }
8828}
8829
8830/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831///
8832/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840#[inline]
8841#[target_feature(enable = "avx512f")]
8842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8843#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844#[rustc_legacy_const_generics(4)]
8845pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846    k: __mmask8,
8847    a: __m512d,
8848    b: __m512d,
8849    c: __m512d,
8850) -> __m512d {
8851    unsafe {
8852        static_assert_rounding!(ROUNDING);
8853        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8854        simd_select_bitmask(k, r, _mm512_setzero_pd())
8855    }
8856}
8857
8858/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859///
8860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866///
8867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868#[inline]
8869#[target_feature(enable = "avx512f")]
8870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8871#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872#[rustc_legacy_const_generics(4)]
8873pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874    a: __m512d,
8875    b: __m512d,
8876    c: __m512d,
8877    k: __mmask8,
8878) -> __m512d {
8879    unsafe {
8880        static_assert_rounding!(ROUNDING);
8881        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8882        simd_select_bitmask(k, r, c)
8883    }
8884}
8885
8886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887///
8888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894///
8895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896#[inline]
8897#[target_feature(enable = "avx512f")]
8898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8899#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900#[rustc_legacy_const_generics(3)]
8901pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902    unsafe {
8903        static_assert_rounding!(ROUNDING);
8904        vfmaddsubpsround(a, b, c, ROUNDING)
8905    }
8906}
8907
8908/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909///
8910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918#[inline]
8919#[target_feature(enable = "avx512f")]
8920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8921#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922#[rustc_legacy_const_generics(4)]
8923pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924    a: __m512,
8925    k: __mmask16,
8926    b: __m512,
8927    c: __m512,
8928) -> __m512 {
8929    unsafe {
8930        static_assert_rounding!(ROUNDING);
8931        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
8932    }
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8948#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951    k: __mmask16,
8952    a: __m512,
8953    b: __m512,
8954    c: __m512,
8955) -> __m512 {
8956    unsafe {
8957        static_assert_rounding!(ROUNDING);
8958        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
8959    }
8960}
8961
8962/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963///
8964/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970///
8971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972#[inline]
8973#[target_feature(enable = "avx512f")]
8974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8975#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976#[rustc_legacy_const_generics(4)]
8977pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978    a: __m512,
8979    b: __m512,
8980    c: __m512,
8981    k: __mmask16,
8982) -> __m512 {
8983    unsafe {
8984        static_assert_rounding!(ROUNDING);
8985        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
8986    }
8987}
8988
8989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990///
8991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997///
8998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999#[inline]
9000#[target_feature(enable = "avx512f")]
9001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9002#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003#[rustc_legacy_const_generics(3)]
9004pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005    a: __m512d,
9006    b: __m512d,
9007    c: __m512d,
9008) -> __m512d {
9009    unsafe {
9010        static_assert_rounding!(ROUNDING);
9011        vfmaddsubpdround(a, b, c, ROUNDING)
9012    }
9013}
9014
9015/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016///
9017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023///
9024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025#[inline]
9026#[target_feature(enable = "avx512f")]
9027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9028#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029#[rustc_legacy_const_generics(4)]
9030pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031    a: __m512d,
9032    k: __mmask8,
9033    b: __m512d,
9034    c: __m512d,
9035) -> __m512d {
9036    unsafe {
9037        static_assert_rounding!(ROUNDING);
9038        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9039    }
9040}
9041
9042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043///
9044/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052#[inline]
9053#[target_feature(enable = "avx512f")]
9054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9055#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056#[rustc_legacy_const_generics(4)]
9057pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058    k: __mmask8,
9059    a: __m512d,
9060    b: __m512d,
9061    c: __m512d,
9062) -> __m512d {
9063    unsafe {
9064        static_assert_rounding!(ROUNDING);
9065        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9066    }
9067}
9068
9069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070///
9071/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077///
9078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079#[inline]
9080#[target_feature(enable = "avx512f")]
9081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9082#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083#[rustc_legacy_const_generics(4)]
9084pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085    a: __m512d,
9086    b: __m512d,
9087    c: __m512d,
9088    k: __mmask8,
9089) -> __m512d {
9090    unsafe {
9091        static_assert_rounding!(ROUNDING);
9092        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9093    }
9094}
9095
9096/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097///
9098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9109#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112    unsafe {
9113        static_assert_rounding!(ROUNDING);
9114        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9115    }
9116}
9117
9118/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119///
9120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128#[inline]
9129#[target_feature(enable = "avx512f")]
9130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9131#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132#[rustc_legacy_const_generics(4)]
9133pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134    a: __m512,
9135    k: __mmask16,
9136    b: __m512,
9137    c: __m512,
9138) -> __m512 {
9139    unsafe {
9140        static_assert_rounding!(ROUNDING);
9141        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9142        simd_select_bitmask(k, r, a)
9143    }
9144}
9145
9146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147///
9148/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154///
9155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156#[inline]
9157#[target_feature(enable = "avx512f")]
9158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9159#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160#[rustc_legacy_const_generics(4)]
9161pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162    k: __mmask16,
9163    a: __m512,
9164    b: __m512,
9165    c: __m512,
9166) -> __m512 {
9167    unsafe {
9168        static_assert_rounding!(ROUNDING);
9169        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9170        simd_select_bitmask(k, r, _mm512_setzero_ps())
9171    }
9172}
9173
9174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175///
9176/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182///
9183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184#[inline]
9185#[target_feature(enable = "avx512f")]
9186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9187#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188#[rustc_legacy_const_generics(4)]
9189pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190    a: __m512,
9191    b: __m512,
9192    c: __m512,
9193    k: __mmask16,
9194) -> __m512 {
9195    unsafe {
9196        static_assert_rounding!(ROUNDING);
9197        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9198        simd_select_bitmask(k, r, c)
9199    }
9200}
9201
9202/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203///
9204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212#[inline]
9213#[target_feature(enable = "avx512f")]
9214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9215#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218    a: __m512d,
9219    b: __m512d,
9220    c: __m512d,
9221) -> __m512d {
9222    unsafe {
9223        static_assert_rounding!(ROUNDING);
9224        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9225    }
9226}
9227
9228/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229///
9230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236///
9237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238#[inline]
9239#[target_feature(enable = "avx512f")]
9240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9241#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242#[rustc_legacy_const_generics(4)]
9243pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244    a: __m512d,
9245    k: __mmask8,
9246    b: __m512d,
9247    c: __m512d,
9248) -> __m512d {
9249    unsafe {
9250        static_assert_rounding!(ROUNDING);
9251        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9252        simd_select_bitmask(k, r, a)
9253    }
9254}
9255
9256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257///
9258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266#[inline]
9267#[target_feature(enable = "avx512f")]
9268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9269#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270#[rustc_legacy_const_generics(4)]
9271pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272    k: __mmask8,
9273    a: __m512d,
9274    b: __m512d,
9275    c: __m512d,
9276) -> __m512d {
9277    unsafe {
9278        static_assert_rounding!(ROUNDING);
9279        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9280        simd_select_bitmask(k, r, _mm512_setzero_pd())
9281    }
9282}
9283
9284/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285///
9286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294#[inline]
9295#[target_feature(enable = "avx512f")]
9296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9297#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298#[rustc_legacy_const_generics(4)]
9299pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300    a: __m512d,
9301    b: __m512d,
9302    c: __m512d,
9303    k: __mmask8,
9304) -> __m512d {
9305    unsafe {
9306        static_assert_rounding!(ROUNDING);
9307        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9308        simd_select_bitmask(k, r, c)
9309    }
9310}
9311
9312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313///
9314/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322#[inline]
9323#[target_feature(enable = "avx512f")]
9324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9325#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326#[rustc_legacy_const_generics(3)]
9327pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328    unsafe {
9329        static_assert_rounding!(ROUNDING);
9330        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9331    }
9332}
9333
9334/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335///
9336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348#[rustc_legacy_const_generics(4)]
9349pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350    a: __m512,
9351    k: __mmask16,
9352    b: __m512,
9353    c: __m512,
9354) -> __m512 {
9355    unsafe {
9356        static_assert_rounding!(ROUNDING);
9357        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9358        simd_select_bitmask(k, r, a)
9359    }
9360}
9361
9362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363///
9364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370///
9371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372#[inline]
9373#[target_feature(enable = "avx512f")]
9374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9375#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376#[rustc_legacy_const_generics(4)]
9377pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378    k: __mmask16,
9379    a: __m512,
9380    b: __m512,
9381    c: __m512,
9382) -> __m512 {
9383    unsafe {
9384        static_assert_rounding!(ROUNDING);
9385        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9386        simd_select_bitmask(k, r, _mm512_setzero_ps())
9387    }
9388}
9389
9390/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391///
9392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398///
9399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400#[inline]
9401#[target_feature(enable = "avx512f")]
9402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9403#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404#[rustc_legacy_const_generics(4)]
9405pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406    a: __m512,
9407    b: __m512,
9408    c: __m512,
9409    k: __mmask16,
9410) -> __m512 {
9411    unsafe {
9412        static_assert_rounding!(ROUNDING);
9413        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9414        simd_select_bitmask(k, r, c)
9415    }
9416}
9417
9418/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419///
9420/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426///
9427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428#[inline]
9429#[target_feature(enable = "avx512f")]
9430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9431#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432#[rustc_legacy_const_generics(3)]
9433pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434    unsafe {
9435        static_assert_rounding!(ROUNDING);
9436        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9437    }
9438}
9439
9440/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441///
9442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448///
9449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450#[inline]
9451#[target_feature(enable = "avx512f")]
9452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9453#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454#[rustc_legacy_const_generics(4)]
9455pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456    a: __m512d,
9457    k: __mmask8,
9458    b: __m512d,
9459    c: __m512d,
9460) -> __m512d {
9461    unsafe {
9462        static_assert_rounding!(ROUNDING);
9463        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9464        simd_select_bitmask(k, r, a)
9465    }
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482#[rustc_legacy_const_generics(4)]
9483pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484    k: __mmask8,
9485    a: __m512d,
9486    b: __m512d,
9487    c: __m512d,
9488) -> __m512d {
9489    unsafe {
9490        static_assert_rounding!(ROUNDING);
9491        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9492        simd_select_bitmask(k, r, _mm512_setzero_pd())
9493    }
9494}
9495
9496/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497///
9498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504///
9505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506#[inline]
9507#[target_feature(enable = "avx512f")]
9508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9509#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510#[rustc_legacy_const_generics(4)]
9511pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512    a: __m512d,
9513    b: __m512d,
9514    c: __m512d,
9515    k: __mmask8,
9516) -> __m512d {
9517    unsafe {
9518        static_assert_rounding!(ROUNDING);
9519        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9520        simd_select_bitmask(k, r, c)
9521    }
9522}
9523
9524/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525///
9526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532///
9533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534#[inline]
9535#[target_feature(enable = "avx512f")]
9536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9537#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538#[rustc_legacy_const_generics(3)]
9539pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540    unsafe {
9541        static_assert_rounding!(ROUNDING);
9542        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9543    }
9544}
9545
9546/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547///
9548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554///
9555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556#[inline]
9557#[target_feature(enable = "avx512f")]
9558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9559#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560#[rustc_legacy_const_generics(4)]
9561pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562    a: __m512,
9563    k: __mmask16,
9564    b: __m512,
9565    c: __m512,
9566) -> __m512 {
9567    unsafe {
9568        static_assert_rounding!(ROUNDING);
9569        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9570        simd_select_bitmask(k, r, a)
9571    }
9572}
9573
9574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575///
9576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582///
9583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584#[inline]
9585#[target_feature(enable = "avx512f")]
9586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9587#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588#[rustc_legacy_const_generics(4)]
9589pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590    k: __mmask16,
9591    a: __m512,
9592    b: __m512,
9593    c: __m512,
9594) -> __m512 {
9595    unsafe {
9596        static_assert_rounding!(ROUNDING);
9597        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9598        simd_select_bitmask(k, r, _mm512_setzero_ps())
9599    }
9600}
9601
9602/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603///
9604/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610///
9611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612#[inline]
9613#[target_feature(enable = "avx512f")]
9614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9615#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616#[rustc_legacy_const_generics(4)]
9617pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618    a: __m512,
9619    b: __m512,
9620    c: __m512,
9621    k: __mmask16,
9622) -> __m512 {
9623    unsafe {
9624        static_assert_rounding!(ROUNDING);
9625        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9626        simd_select_bitmask(k, r, c)
9627    }
9628}
9629
9630/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631///
9632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640#[inline]
9641#[target_feature(enable = "avx512f")]
9642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9643#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644#[rustc_legacy_const_generics(3)]
9645pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646    unsafe {
9647        static_assert_rounding!(ROUNDING);
9648        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9649    }
9650}
9651
9652/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653///
9654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660///
9661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662#[inline]
9663#[target_feature(enable = "avx512f")]
9664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9665#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666#[rustc_legacy_const_generics(4)]
9667pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668    a: __m512d,
9669    k: __mmask8,
9670    b: __m512d,
9671    c: __m512d,
9672) -> __m512d {
9673    unsafe {
9674        static_assert_rounding!(ROUNDING);
9675        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9676        simd_select_bitmask(k, r, a)
9677    }
9678}
9679
9680/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681///
9682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688///
9689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690#[inline]
9691#[target_feature(enable = "avx512f")]
9692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9693#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694#[rustc_legacy_const_generics(4)]
9695pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696    k: __mmask8,
9697    a: __m512d,
9698    b: __m512d,
9699    c: __m512d,
9700) -> __m512d {
9701    unsafe {
9702        static_assert_rounding!(ROUNDING);
9703        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9704        simd_select_bitmask(k, r, _mm512_setzero_pd())
9705    }
9706}
9707
9708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709///
9710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718#[inline]
9719#[target_feature(enable = "avx512f")]
9720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9721#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722#[rustc_legacy_const_generics(4)]
9723pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724    a: __m512d,
9725    b: __m512d,
9726    c: __m512d,
9727    k: __mmask8,
9728) -> __m512d {
9729    unsafe {
9730        static_assert_rounding!(ROUNDING);
9731        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9732        simd_select_bitmask(k, r, c)
9733    }
9734}
9735
9736/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740#[inline]
9741#[target_feature(enable = "avx512f")]
9742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9743#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9744#[rustc_legacy_const_generics(2)]
9745pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746    unsafe {
9747        static_assert_sae!(SAE);
9748        let a = a.as_f32x16();
9749        let b = b.as_f32x16();
9750        let r = vmaxps(a, b, SAE);
9751        transmute(r)
9752    }
9753}
9754
9755/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9762#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765    src: __m512,
9766    k: __mmask16,
9767    a: __m512,
9768    b: __m512,
9769) -> __m512 {
9770    unsafe {
9771        static_assert_sae!(SAE);
9772        let a = a.as_f32x16();
9773        let b = b.as_f32x16();
9774        let r = vmaxps(a, b, SAE);
9775        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9776    }
9777}
9778
9779/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783#[inline]
9784#[target_feature(enable = "avx512f")]
9785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9786#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9787#[rustc_legacy_const_generics(3)]
9788pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789    unsafe {
9790        static_assert_sae!(SAE);
9791        let a = a.as_f32x16();
9792        let b = b.as_f32x16();
9793        let r = vmaxps(a, b, SAE);
9794        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9795    }
9796}
9797
9798/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802#[inline]
9803#[target_feature(enable = "avx512f")]
9804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9805#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9806#[rustc_legacy_const_generics(2)]
9807pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808    unsafe {
9809        static_assert_sae!(SAE);
9810        let a = a.as_f64x8();
9811        let b = b.as_f64x8();
9812        let r = vmaxpd(a, b, SAE);
9813        transmute(r)
9814    }
9815}
9816
9817/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819///
9820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821#[inline]
9822#[target_feature(enable = "avx512f")]
9823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9824#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9825#[rustc_legacy_const_generics(4)]
9826pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827    src: __m512d,
9828    k: __mmask8,
9829    a: __m512d,
9830    b: __m512d,
9831) -> __m512d {
9832    unsafe {
9833        static_assert_sae!(SAE);
9834        let a = a.as_f64x8();
9835        let b = b.as_f64x8();
9836        let r = vmaxpd(a, b, SAE);
9837        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9838    }
9839}
9840
9841/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843///
9844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845#[inline]
9846#[target_feature(enable = "avx512f")]
9847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9848#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9849#[rustc_legacy_const_generics(3)]
9850pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851    unsafe {
9852        static_assert_sae!(SAE);
9853        let a = a.as_f64x8();
9854        let b = b.as_f64x8();
9855        let r = vmaxpd(a, b, SAE);
9856        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9857    }
9858}
9859
9860/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864#[inline]
9865#[target_feature(enable = "avx512f")]
9866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9867#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9868#[rustc_legacy_const_generics(2)]
9869pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870    unsafe {
9871        static_assert_sae!(SAE);
9872        let a = a.as_f32x16();
9873        let b = b.as_f32x16();
9874        let r = vminps(a, b, SAE);
9875        transmute(r)
9876    }
9877}
9878
9879/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881///
9882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883#[inline]
9884#[target_feature(enable = "avx512f")]
9885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9886#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9887#[rustc_legacy_const_generics(4)]
9888pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889    src: __m512,
9890    k: __mmask16,
9891    a: __m512,
9892    b: __m512,
9893) -> __m512 {
9894    unsafe {
9895        static_assert_sae!(SAE);
9896        let a = a.as_f32x16();
9897        let b = b.as_f32x16();
9898        let r = vminps(a, b, SAE);
9899        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9900    }
9901}
9902
9903/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907#[inline]
9908#[target_feature(enable = "avx512f")]
9909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9910#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9911#[rustc_legacy_const_generics(3)]
9912pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913    unsafe {
9914        static_assert_sae!(SAE);
9915        let a = a.as_f32x16();
9916        let b = b.as_f32x16();
9917        let r = vminps(a, b, SAE);
9918        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9919    }
9920}
9921
9922/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924///
9925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926#[inline]
9927#[target_feature(enable = "avx512f")]
9928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9929#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9930#[rustc_legacy_const_generics(2)]
9931pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932    unsafe {
9933        static_assert_sae!(SAE);
9934        let a = a.as_f64x8();
9935        let b = b.as_f64x8();
9936        let r = vminpd(a, b, SAE);
9937        transmute(r)
9938    }
9939}
9940
9941/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9948#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9949#[rustc_legacy_const_generics(4)]
9950pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951    src: __m512d,
9952    k: __mmask8,
9953    a: __m512d,
9954    b: __m512d,
9955) -> __m512d {
9956    unsafe {
9957        static_assert_sae!(SAE);
9958        let a = a.as_f64x8();
9959        let b = b.as_f64x8();
9960        let r = vminpd(a, b, SAE);
9961        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9962    }
9963}
9964
9965/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969#[inline]
9970#[target_feature(enable = "avx512f")]
9971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9972#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9973#[rustc_legacy_const_generics(3)]
9974pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975    unsafe {
9976        static_assert_sae!(SAE);
9977        let a = a.as_f64x8();
9978        let b = b.as_f64x8();
9979        let r = vminpd(a, b, SAE);
9980        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9981    }
9982}
9983
9984/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986///
9987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988#[inline]
9989#[target_feature(enable = "avx512f")]
9990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9991#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9992#[rustc_legacy_const_generics(1)]
9993pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994    unsafe {
9995        static_assert_sae!(SAE);
9996        let a = a.as_f32x16();
9997        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
9998        transmute(r)
9999    }
10000}
10001
10002/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004///
10005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006#[inline]
10007#[target_feature(enable = "avx512f")]
10008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10009#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10010#[rustc_legacy_const_generics(3)]
10011pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012    unsafe {
10013        static_assert_sae!(SAE);
10014        let a = a.as_f32x16();
10015        let src = src.as_f32x16();
10016        let r = vgetexpps(a, src, k, SAE);
10017        transmute(r)
10018    }
10019}
10020
10021/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025#[inline]
10026#[target_feature(enable = "avx512f")]
10027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10028#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10029#[rustc_legacy_const_generics(2)]
10030pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031    unsafe {
10032        static_assert_sae!(SAE);
10033        let a = a.as_f32x16();
10034        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10035        transmute(r)
10036    }
10037}
10038
10039/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043#[inline]
10044#[target_feature(enable = "avx512f")]
10045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10046#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10047#[rustc_legacy_const_generics(1)]
10048pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049    unsafe {
10050        static_assert_sae!(SAE);
10051        let a = a.as_f64x8();
10052        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10053        transmute(r)
10054    }
10055}
10056
10057/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059///
10060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061#[inline]
10062#[target_feature(enable = "avx512f")]
10063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10064#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10065#[rustc_legacy_const_generics(3)]
10066pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067    src: __m512d,
10068    k: __mmask8,
10069    a: __m512d,
10070) -> __m512d {
10071    unsafe {
10072        static_assert_sae!(SAE);
10073        let a = a.as_f64x8();
10074        let src = src.as_f64x8();
10075        let r = vgetexppd(a, src, k, SAE);
10076        transmute(r)
10077    }
10078}
10079
10080/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082///
10083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084#[inline]
10085#[target_feature(enable = "avx512f")]
10086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10087#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10088#[rustc_legacy_const_generics(2)]
10089pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090    unsafe {
10091        static_assert_sae!(SAE);
10092        let a = a.as_f64x8();
10093        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108#[inline]
10109#[target_feature(enable = "avx512f")]
10110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10111#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10112#[rustc_legacy_const_generics(1, 2)]
10113pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114    unsafe {
10115        static_assert_uimm_bits!(IMM8, 8);
10116        static_assert_mantissas_sae!(SAE);
10117        let a = a.as_f32x16();
10118        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10119        transmute(r)
10120    }
10121}
10122
10123/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130///
10131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10136#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10137#[rustc_legacy_const_generics(3, 4)]
10138pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139    src: __m512,
10140    k: __mmask16,
10141    a: __m512,
10142) -> __m512 {
10143    unsafe {
10144        static_assert_uimm_bits!(IMM8, 8);
10145        static_assert_mantissas_sae!(SAE);
10146        let a = a.as_f32x16();
10147        let src = src.as_f32x16();
10148        let r = vrndscaleps(a, IMM8, src, k, SAE);
10149        transmute(r)
10150    }
10151}
10152
10153/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160///
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10166#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10167#[rustc_legacy_const_generics(2, 3)]
10168pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169    k: __mmask16,
10170    a: __m512,
10171) -> __m512 {
10172    unsafe {
10173        static_assert_uimm_bits!(IMM8, 8);
10174        static_assert_mantissas_sae!(SAE);
10175        let a = a.as_f32x16();
10176        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10177        transmute(r)
10178    }
10179}
10180
10181/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188///
10189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191#[inline]
10192#[target_feature(enable = "avx512f")]
10193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10194#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10195#[rustc_legacy_const_generics(1, 2)]
10196pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197    unsafe {
10198        static_assert_uimm_bits!(IMM8, 8);
10199        static_assert_mantissas_sae!(SAE);
10200        let a = a.as_f64x8();
10201        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10202        transmute(r)
10203    }
10204}
10205
10206/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213///
10214/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10220#[rustc_legacy_const_generics(3, 4)]
10221pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222    src: __m512d,
10223    k: __mmask8,
10224    a: __m512d,
10225) -> __m512d {
10226    unsafe {
10227        static_assert_uimm_bits!(IMM8, 8);
10228        static_assert_mantissas_sae!(SAE);
10229        let a = a.as_f64x8();
10230        let src = src.as_f64x8();
10231        let r = vrndscalepd(a, IMM8, src, k, SAE);
10232        transmute(r)
10233    }
10234}
10235
10236/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243///
10244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246#[inline]
10247#[target_feature(enable = "avx512f")]
10248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10249#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10250#[rustc_legacy_const_generics(2, 3)]
10251pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252    k: __mmask8,
10253    a: __m512d,
10254) -> __m512d {
10255    unsafe {
10256        static_assert_uimm_bits!(IMM8, 8);
10257        static_assert_mantissas_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10260        transmute(r)
10261    }
10262}
10263
10264/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265///
10266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274#[inline]
10275#[target_feature(enable = "avx512f")]
10276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10277#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10278#[rustc_legacy_const_generics(2)]
10279pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280    unsafe {
10281        static_assert_rounding!(ROUNDING);
10282        let a = a.as_f32x16();
10283        let b = b.as_f32x16();
10284        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10285        transmute(r)
10286    }
10287}
10288
10289/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290///
10291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297///
10298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299#[inline]
10300#[target_feature(enable = "avx512f")]
10301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10302#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10303#[rustc_legacy_const_generics(4)]
10304pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305    src: __m512,
10306    k: __mmask16,
10307    a: __m512,
10308    b: __m512,
10309) -> __m512 {
10310    unsafe {
10311        static_assert_rounding!(ROUNDING);
10312        let a = a.as_f32x16();
10313        let b = b.as_f32x16();
10314        let src = src.as_f32x16();
10315        let r = vscalefps(a, b, src, k, ROUNDING);
10316        transmute(r)
10317    }
10318}
10319
10320/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321///
10322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10333#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10334#[rustc_legacy_const_generics(3)]
10335pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336    k: __mmask16,
10337    a: __m512,
10338    b: __m512,
10339) -> __m512 {
10340    unsafe {
10341        static_assert_rounding!(ROUNDING);
10342        let a = a.as_f32x16();
10343        let b = b.as_f32x16();
10344        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10345        transmute(r)
10346    }
10347}
10348
10349/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350///
10351/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359#[inline]
10360#[target_feature(enable = "avx512f")]
10361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10362#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10363#[rustc_legacy_const_generics(2)]
10364pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365    unsafe {
10366        static_assert_rounding!(ROUNDING);
10367        let a = a.as_f64x8();
10368        let b = b.as_f64x8();
10369        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10370        transmute(r)
10371    }
10372}
10373
10374/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375///
10376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384#[inline]
10385#[target_feature(enable = "avx512f")]
10386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10387#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390    src: __m512d,
10391    k: __mmask8,
10392    a: __m512d,
10393    b: __m512d,
10394) -> __m512d {
10395    unsafe {
10396        static_assert_rounding!(ROUNDING);
10397        let a = a.as_f64x8();
10398        let b = b.as_f64x8();
10399        let src = src.as_f64x8();
10400        let r = vscalefpd(a, b, src, k, ROUNDING);
10401        transmute(r)
10402    }
10403}
10404
10405/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406///
10407/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415#[inline]
10416#[target_feature(enable = "avx512f")]
10417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10418#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10419#[rustc_legacy_const_generics(3)]
10420pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421    k: __mmask8,
10422    a: __m512d,
10423    b: __m512d,
10424) -> __m512d {
10425    unsafe {
10426        static_assert_rounding!(ROUNDING);
10427        let a = a.as_f64x8();
10428        let b = b.as_f64x8();
10429        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10430        transmute(r)
10431    }
10432}
10433
10434/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435///
10436/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438#[inline]
10439#[target_feature(enable = "avx512f")]
10440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10441#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10442#[rustc_legacy_const_generics(3, 4)]
10443pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444    a: __m512,
10445    b: __m512,
10446    c: __m512i,
10447) -> __m512 {
10448    unsafe {
10449        static_assert_uimm_bits!(IMM8, 8);
10450        static_assert_mantissas_sae!(SAE);
10451        let a = a.as_f32x16();
10452        let b = b.as_f32x16();
10453        let c = c.as_i32x16();
10454        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10455        transmute(r)
10456    }
10457}
10458
10459/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460///
10461/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463#[inline]
10464#[target_feature(enable = "avx512f")]
10465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10466#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10467#[rustc_legacy_const_generics(4, 5)]
10468pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469    a: __m512,
10470    k: __mmask16,
10471    b: __m512,
10472    c: __m512i,
10473) -> __m512 {
10474    unsafe {
10475        static_assert_uimm_bits!(IMM8, 8);
10476        static_assert_mantissas_sae!(SAE);
10477        let a = a.as_f32x16();
10478        let b = b.as_f32x16();
10479        let c = c.as_i32x16();
10480        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10481        transmute(r)
10482    }
10483}
10484
10485/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486///
10487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489#[inline]
10490#[target_feature(enable = "avx512f")]
10491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10492#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10493#[rustc_legacy_const_generics(4, 5)]
10494pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495    k: __mmask16,
10496    a: __m512,
10497    b: __m512,
10498    c: __m512i,
10499) -> __m512 {
10500    unsafe {
10501        static_assert_uimm_bits!(IMM8, 8);
10502        static_assert_mantissas_sae!(SAE);
10503        let a = a.as_f32x16();
10504        let b = b.as_f32x16();
10505        let c = c.as_i32x16();
10506        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10507        transmute(r)
10508    }
10509}
10510
10511/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512///
10513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10518#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10519#[rustc_legacy_const_generics(3, 4)]
10520pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521    a: __m512d,
10522    b: __m512d,
10523    c: __m512i,
10524) -> __m512d {
10525    unsafe {
10526        static_assert_uimm_bits!(IMM8, 8);
10527        static_assert_mantissas_sae!(SAE);
10528        let a = a.as_f64x8();
10529        let b = b.as_f64x8();
10530        let c = c.as_i64x8();
10531        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10532        transmute(r)
10533    }
10534}
10535
10536/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540#[inline]
10541#[target_feature(enable = "avx512f")]
10542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10543#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(4, 5)]
10545pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546    a: __m512d,
10547    k: __mmask8,
10548    b: __m512d,
10549    c: __m512i,
10550) -> __m512d {
10551    unsafe {
10552        static_assert_uimm_bits!(IMM8, 8);
10553        static_assert_mantissas_sae!(SAE);
10554        let a = a.as_f64x8();
10555        let b = b.as_f64x8();
10556        let c = c.as_i64x8();
10557        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10558        transmute(r)
10559    }
10560}
10561
10562/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563///
10564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566#[inline]
10567#[target_feature(enable = "avx512f")]
10568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10569#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10570#[rustc_legacy_const_generics(4, 5)]
10571pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572    k: __mmask8,
10573    a: __m512d,
10574    b: __m512d,
10575    c: __m512i,
10576) -> __m512d {
10577    unsafe {
10578        static_assert_uimm_bits!(IMM8, 8);
10579        static_assert_mantissas_sae!(SAE);
10580        let a = a.as_f64x8();
10581        let b = b.as_f64x8();
10582        let c = c.as_i64x8();
10583        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10584        transmute(r)
10585    }
10586}
10587
10588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594/// The sign is determined by sc which can take the following values:\
10595///    _MM_MANT_SIGN_src     // sign = sign(src)\
10596///    _MM_MANT_SIGN_zero    // sign = 0\
10597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10598/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601#[inline]
10602#[target_feature(enable = "avx512f")]
10603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10604#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10605#[rustc_legacy_const_generics(1, 2, 3)]
10606pub fn _mm512_getmant_round_ps<
10607    const NORM: _MM_MANTISSA_NORM_ENUM,
10608    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609    const SAE: i32,
10610>(
10611    a: __m512,
10612) -> __m512 {
10613    unsafe {
10614        static_assert_uimm_bits!(NORM, 4);
10615        static_assert_uimm_bits!(SIGN, 2);
10616        static_assert_mantissas_sae!(SAE);
10617        let a = a.as_f32x16();
10618        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10619        transmute(r)
10620    }
10621}
10622
10623/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10626///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10627///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10628///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629/// The sign is determined by sc which can take the following values:\
10630///    _MM_MANT_SIGN_src     // sign = sign(src)\
10631///    _MM_MANT_SIGN_zero    // sign = 0\
10632///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10633/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636#[inline]
10637#[target_feature(enable = "avx512f")]
10638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10639#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10640#[rustc_legacy_const_generics(3, 4, 5)]
10641pub fn _mm512_mask_getmant_round_ps<
10642    const NORM: _MM_MANTISSA_NORM_ENUM,
10643    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644    const SAE: i32,
10645>(
10646    src: __m512,
10647    k: __mmask16,
10648    a: __m512,
10649) -> __m512 {
10650    unsafe {
10651        static_assert_uimm_bits!(NORM, 4);
10652        static_assert_uimm_bits!(SIGN, 2);
10653        static_assert_mantissas_sae!(SAE);
10654        let a = a.as_f32x16();
10655        let src = src.as_f32x16();
10656        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10657        transmute(r)
10658    }
10659}
10660
10661/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10664///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10665///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10666///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667/// The sign is determined by sc which can take the following values:\
10668///    _MM_MANT_SIGN_src     // sign = sign(src)\
10669///    _MM_MANT_SIGN_zero    // sign = 0\
10670///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674#[inline]
10675#[target_feature(enable = "avx512f")]
10676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10677#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10678#[rustc_legacy_const_generics(2, 3, 4)]
10679pub fn _mm512_maskz_getmant_round_ps<
10680    const NORM: _MM_MANTISSA_NORM_ENUM,
10681    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682    const SAE: i32,
10683>(
10684    k: __mmask16,
10685    a: __m512,
10686) -> __m512 {
10687    unsafe {
10688        static_assert_uimm_bits!(NORM, 4);
10689        static_assert_uimm_bits!(SIGN, 2);
10690        static_assert_mantissas_sae!(SAE);
10691        let a = a.as_f32x16();
10692        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10693        transmute(r)
10694    }
10695}
10696
10697/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10700///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10701///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10702///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703/// The sign is determined by sc which can take the following values:\
10704///    _MM_MANT_SIGN_src     // sign = sign(src)\
10705///    _MM_MANT_SIGN_zero    // sign = 0\
10706///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10707/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10713#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10714#[rustc_legacy_const_generics(1, 2, 3)]
10715pub fn _mm512_getmant_round_pd<
10716    const NORM: _MM_MANTISSA_NORM_ENUM,
10717    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718    const SAE: i32,
10719>(
10720    a: __m512d,
10721) -> __m512d {
10722    unsafe {
10723        static_assert_uimm_bits!(NORM, 4);
10724        static_assert_uimm_bits!(SIGN, 2);
10725        static_assert_mantissas_sae!(SAE);
10726        let a = a.as_f64x8();
10727        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
10728        transmute(r)
10729    }
10730}
10731
10732/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10735///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10736///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10737///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738/// The sign is determined by sc which can take the following values:\
10739///    _MM_MANT_SIGN_src     // sign = sign(src)\
10740///    _MM_MANT_SIGN_zero    // sign = 0\
10741///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743///
10744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745#[inline]
10746#[target_feature(enable = "avx512f")]
10747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10748#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10749#[rustc_legacy_const_generics(3, 4, 5)]
10750pub fn _mm512_mask_getmant_round_pd<
10751    const NORM: _MM_MANTISSA_NORM_ENUM,
10752    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753    const SAE: i32,
10754>(
10755    src: __m512d,
10756    k: __mmask8,
10757    a: __m512d,
10758) -> __m512d {
10759    unsafe {
10760        static_assert_uimm_bits!(NORM, 4);
10761        static_assert_uimm_bits!(SIGN, 2);
10762        static_assert_mantissas_sae!(SAE);
10763        let a = a.as_f64x8();
10764        let src = src.as_f64x8();
10765        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
10766        transmute(r)
10767    }
10768}
10769
10770/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10773///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10774///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10775///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776/// The sign is determined by sc which can take the following values:\
10777///    _MM_MANT_SIGN_src     // sign = sign(src)\
10778///    _MM_MANT_SIGN_zero    // sign = 0\
10779///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783#[inline]
10784#[target_feature(enable = "avx512f")]
10785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10786#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10787#[rustc_legacy_const_generics(2, 3, 4)]
10788pub fn _mm512_maskz_getmant_round_pd<
10789    const NORM: _MM_MANTISSA_NORM_ENUM,
10790    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791    const SAE: i32,
10792>(
10793    k: __mmask8,
10794    a: __m512d,
10795) -> __m512d {
10796    unsafe {
10797        static_assert_uimm_bits!(NORM, 4);
10798        static_assert_uimm_bits!(SIGN, 2);
10799        static_assert_mantissas_sae!(SAE);
10800        let a = a.as_f64x8();
10801        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
10802        transmute(r)
10803    }
10804}
10805
10806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807///
10808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809#[inline]
10810#[target_feature(enable = "avx512f")]
10811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10812#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814    unsafe {
10815        transmute(vcvtps2dq(
10816            a.as_f32x16(),
10817            i32x16::ZERO,
10818            0b11111111_11111111,
10819            _MM_FROUND_CUR_DIRECTION,
10820        ))
10821    }
10822}
10823
10824/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827#[inline]
10828#[target_feature(enable = "avx512f")]
10829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10830#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832    unsafe {
10833        transmute(vcvtps2dq(
10834            a.as_f32x16(),
10835            src.as_i32x16(),
10836            k,
10837            _MM_FROUND_CUR_DIRECTION,
10838        ))
10839    }
10840}
10841
10842/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843///
10844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845#[inline]
10846#[target_feature(enable = "avx512f")]
10847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10848#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850    unsafe {
10851        transmute(vcvtps2dq(
10852            a.as_f32x16(),
10853            i32x16::ZERO,
10854            k,
10855            _MM_FROUND_CUR_DIRECTION,
10856        ))
10857    }
10858}
10859
10860/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861///
10862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863#[inline]
10864#[target_feature(enable = "avx512f,avx512vl")]
10865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10866#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868    unsafe {
10869        let convert = _mm256_cvtps_epi32(a);
10870        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
10871    }
10872}
10873
10874/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877#[inline]
10878#[target_feature(enable = "avx512f,avx512vl")]
10879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10880#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882    unsafe {
10883        let convert = _mm256_cvtps_epi32(a);
10884        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
10885    }
10886}
10887
10888/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891#[inline]
10892#[target_feature(enable = "avx512f,avx512vl")]
10893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10894#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896    unsafe {
10897        let convert = _mm_cvtps_epi32(a);
10898        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10899    }
10900}
10901
10902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903///
10904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905#[inline]
10906#[target_feature(enable = "avx512f,avx512vl")]
10907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10908#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910    unsafe {
10911        let convert = _mm_cvtps_epi32(a);
10912        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10913    }
10914}
10915
10916/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917///
10918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919#[inline]
10920#[target_feature(enable = "avx512f")]
10921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10922#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924    unsafe {
10925        transmute(vcvtps2udq(
10926            a.as_f32x16(),
10927            u32x16::ZERO,
10928            0b11111111_11111111,
10929            _MM_FROUND_CUR_DIRECTION,
10930        ))
10931    }
10932}
10933
10934/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937#[inline]
10938#[target_feature(enable = "avx512f")]
10939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10940#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942    unsafe {
10943        transmute(vcvtps2udq(
10944            a.as_f32x16(),
10945            src.as_u32x16(),
10946            k,
10947            _MM_FROUND_CUR_DIRECTION,
10948        ))
10949    }
10950}
10951
10952/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10958#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960    unsafe {
10961        transmute(vcvtps2udq(
10962            a.as_f32x16(),
10963            u32x16::ZERO,
10964            k,
10965            _MM_FROUND_CUR_DIRECTION,
10966        ))
10967    }
10968}
10969
10970/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971///
10972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973#[inline]
10974#[target_feature(enable = "avx512f,avx512vl")]
10975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10976#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
10979}
10980
10981/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982///
10983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984#[inline]
10985#[target_feature(enable = "avx512f,avx512vl")]
10986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10987#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
10990}
10991
10992/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995#[inline]
10996#[target_feature(enable = "avx512f,avx512vl")]
10997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10998#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11001}
11002
11003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004///
11005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006#[inline]
11007#[target_feature(enable = "avx512f,avx512vl")]
11008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11009#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11012}
11013
11014/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017#[inline]
11018#[target_feature(enable = "avx512f,avx512vl")]
11019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11020#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11023}
11024
11025/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028#[inline]
11029#[target_feature(enable = "avx512f,avx512vl")]
11030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11031#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11034}
11035
11036/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037///
11038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039#[inline]
11040#[target_feature(enable = "avx512f")]
11041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11042#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044    unsafe {
11045        transmute(vcvtps2pd(
11046            a.as_f32x8(),
11047            f64x8::ZERO,
11048            0b11111111,
11049            _MM_FROUND_CUR_DIRECTION,
11050        ))
11051    }
11052}
11053
11054/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11060#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062    unsafe {
11063        transmute(vcvtps2pd(
11064            a.as_f32x8(),
11065            src.as_f64x8(),
11066            k,
11067            _MM_FROUND_CUR_DIRECTION,
11068        ))
11069    }
11070}
11071
11072/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073///
11074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075#[inline]
11076#[target_feature(enable = "avx512f")]
11077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11078#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080    unsafe {
11081        transmute(vcvtps2pd(
11082            a.as_f32x8(),
11083            f64x8::ZERO,
11084            k,
11085            _MM_FROUND_CUR_DIRECTION,
11086        ))
11087    }
11088}
11089
11090/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093#[inline]
11094#[target_feature(enable = "avx512f")]
11095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11096#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098    unsafe {
11099        transmute(vcvtps2pd(
11100            _mm512_castps512_ps256(v2).as_f32x8(),
11101            f64x8::ZERO,
11102            0b11111111,
11103            _MM_FROUND_CUR_DIRECTION,
11104        ))
11105    }
11106}
11107
11108/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109///
11110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111#[inline]
11112#[target_feature(enable = "avx512f")]
11113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11114#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116    unsafe {
11117        transmute(vcvtps2pd(
11118            _mm512_castps512_ps256(v2).as_f32x8(),
11119            src.as_f64x8(),
11120            k,
11121            _MM_FROUND_CUR_DIRECTION,
11122        ))
11123    }
11124}
11125
11126/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127///
11128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129#[inline]
11130#[target_feature(enable = "avx512f")]
11131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11132#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134    unsafe {
11135        transmute(vcvtpd2ps(
11136            a.as_f64x8(),
11137            f32x8::ZERO,
11138            0b11111111,
11139            _MM_FROUND_CUR_DIRECTION,
11140        ))
11141    }
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147#[inline]
11148#[target_feature(enable = "avx512f")]
11149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11150#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152    unsafe {
11153        transmute(vcvtpd2ps(
11154            a.as_f64x8(),
11155            src.as_f32x8(),
11156            k,
11157            _MM_FROUND_CUR_DIRECTION,
11158        ))
11159    }
11160}
11161
11162/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163///
11164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165#[inline]
11166#[target_feature(enable = "avx512f")]
11167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11168#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170    unsafe {
11171        transmute(vcvtpd2ps(
11172            a.as_f64x8(),
11173            f32x8::ZERO,
11174            k,
11175            _MM_FROUND_CUR_DIRECTION,
11176        ))
11177    }
11178}
11179
11180/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183#[inline]
11184#[target_feature(enable = "avx512f,avx512vl")]
11185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11186#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188    unsafe {
11189        let convert = _mm256_cvtpd_ps(a);
11190        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11191    }
11192}
11193
11194/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197#[inline]
11198#[target_feature(enable = "avx512f,avx512vl")]
11199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11200#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202    unsafe {
11203        let convert = _mm256_cvtpd_ps(a);
11204        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11205    }
11206}
11207
11208/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209///
11210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211#[inline]
11212#[target_feature(enable = "avx512f,avx512vl")]
11213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11214#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216    unsafe {
11217        let convert = _mm_cvtpd_ps(a);
11218        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11219    }
11220}
11221
11222/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223///
11224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225#[inline]
11226#[target_feature(enable = "avx512f,avx512vl")]
11227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11228#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230    unsafe {
11231        let convert = _mm_cvtpd_ps(a);
11232        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11233    }
11234}
11235
11236/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237///
11238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239#[inline]
11240#[target_feature(enable = "avx512f")]
11241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11242#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244    unsafe {
11245        transmute(vcvtpd2dq(
11246            a.as_f64x8(),
11247            i32x8::ZERO,
11248            0b11111111,
11249            _MM_FROUND_CUR_DIRECTION,
11250        ))
11251    }
11252}
11253
11254/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255///
11256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257#[inline]
11258#[target_feature(enable = "avx512f")]
11259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11260#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262    unsafe {
11263        transmute(vcvtpd2dq(
11264            a.as_f64x8(),
11265            src.as_i32x8(),
11266            k,
11267            _MM_FROUND_CUR_DIRECTION,
11268        ))
11269    }
11270}
11271
11272/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273///
11274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275#[inline]
11276#[target_feature(enable = "avx512f")]
11277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11278#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280    unsafe {
11281        transmute(vcvtpd2dq(
11282            a.as_f64x8(),
11283            i32x8::ZERO,
11284            k,
11285            _MM_FROUND_CUR_DIRECTION,
11286        ))
11287    }
11288}
11289
11290/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291///
11292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293#[inline]
11294#[target_feature(enable = "avx512f,avx512vl")]
11295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11296#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298    unsafe {
11299        let convert = _mm256_cvtpd_epi32(a);
11300        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11301    }
11302}
11303
11304/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305///
11306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307#[inline]
11308#[target_feature(enable = "avx512f,avx512vl")]
11309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11310#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312    unsafe {
11313        let convert = _mm256_cvtpd_epi32(a);
11314        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11315    }
11316}
11317
11318/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319///
11320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321#[inline]
11322#[target_feature(enable = "avx512f,avx512vl")]
11323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11324#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326    unsafe {
11327        let convert = _mm_cvtpd_epi32(a);
11328        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11329    }
11330}
11331
11332/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333///
11334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335#[inline]
11336#[target_feature(enable = "avx512f,avx512vl")]
11337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11338#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340    unsafe {
11341        let convert = _mm_cvtpd_epi32(a);
11342        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11343    }
11344}
11345
11346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347///
11348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349#[inline]
11350#[target_feature(enable = "avx512f")]
11351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11352#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354    unsafe {
11355        transmute(vcvtpd2udq(
11356            a.as_f64x8(),
11357            u32x8::ZERO,
11358            0b11111111,
11359            _MM_FROUND_CUR_DIRECTION,
11360        ))
11361    }
11362}
11363
11364/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367#[inline]
11368#[target_feature(enable = "avx512f")]
11369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11370#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372    unsafe {
11373        transmute(vcvtpd2udq(
11374            a.as_f64x8(),
11375            src.as_u32x8(),
11376            k,
11377            _MM_FROUND_CUR_DIRECTION,
11378        ))
11379    }
11380}
11381
11382/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383///
11384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385#[inline]
11386#[target_feature(enable = "avx512f")]
11387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11388#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390    unsafe {
11391        transmute(vcvtpd2udq(
11392            a.as_f64x8(),
11393            u32x8::ZERO,
11394            k,
11395            _MM_FROUND_CUR_DIRECTION,
11396        ))
11397    }
11398}
11399
11400/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401///
11402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403#[inline]
11404#[target_feature(enable = "avx512f,avx512vl")]
11405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11406#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11409}
11410
11411/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412///
11413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414#[inline]
11415#[target_feature(enable = "avx512f,avx512vl")]
11416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11417#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11420}
11421
11422/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423///
11424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425#[inline]
11426#[target_feature(enable = "avx512f,avx512vl")]
11427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11428#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11431}
11432
11433/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434///
11435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436#[inline]
11437#[target_feature(enable = "avx512f,avx512vl")]
11438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11439#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11442}
11443
11444/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445///
11446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447#[inline]
11448#[target_feature(enable = "avx512f,avx512vl")]
11449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11450#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11453}
11454
11455/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458#[inline]
11459#[target_feature(enable = "avx512f,avx512vl")]
11460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11461#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11464}
11465
11466/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467///
11468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469#[inline]
11470#[target_feature(enable = "avx512f")]
11471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11472#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474    unsafe {
11475        let r: f32x8 = vcvtpd2ps(
11476            v2.as_f64x8(),
11477            f32x8::ZERO,
11478            0b11111111,
11479            _MM_FROUND_CUR_DIRECTION,
11480        );
11481        simd_shuffle!(
11482            r,
11483            f32x8::ZERO,
11484            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11485        )
11486    }
11487}
11488
11489/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497    unsafe {
11498        let r: f32x8 = vcvtpd2ps(
11499            v2.as_f64x8(),
11500            _mm512_castps512_ps256(src).as_f32x8(),
11501            k,
11502            _MM_FROUND_CUR_DIRECTION,
11503        );
11504        simd_shuffle!(
11505            r,
11506            f32x8::ZERO,
11507            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11508        )
11509    }
11510}
11511
11512/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11518#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520    unsafe {
11521        let a = a.as_i8x16();
11522        transmute::<i32x16, _>(simd_cast(a))
11523    }
11524}
11525
11526/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527///
11528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529#[inline]
11530#[target_feature(enable = "avx512f")]
11531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11532#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534    unsafe {
11535        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11536        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11537    }
11538}
11539
11540/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543#[inline]
11544#[target_feature(enable = "avx512f")]
11545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11546#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548    unsafe {
11549        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11550        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11551    }
11552}
11553
11554/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555///
11556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557#[inline]
11558#[target_feature(enable = "avx512f,avx512vl")]
11559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11560#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562    unsafe {
11563        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11564        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11565    }
11566}
11567
11568/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11574#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576    unsafe {
11577        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11578        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11579    }
11580}
11581
11582/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11588#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590    unsafe {
11591        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11592        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11593    }
11594}
11595
11596/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11602#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604    unsafe {
11605        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11606        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11607    }
11608}
11609
11610/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611///
11612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613#[inline]
11614#[target_feature(enable = "avx512f")]
11615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11616#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618    unsafe {
11619        let a = a.as_i8x16();
11620        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11621        transmute::<i64x8, _>(simd_cast(v64))
11622    }
11623}
11624
11625/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626///
11627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628#[inline]
11629#[target_feature(enable = "avx512f")]
11630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11631#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633    unsafe {
11634        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11635        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11636    }
11637}
11638
11639/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11645#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647    unsafe {
11648        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11649        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11650    }
11651}
11652
11653/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654///
11655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656#[inline]
11657#[target_feature(enable = "avx512f,avx512vl")]
11658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11659#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661    unsafe {
11662        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11663        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11664    }
11665}
11666
11667/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11673#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675    unsafe {
11676        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11677        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11678    }
11679}
11680
11681/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11687#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe {
11690        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11691        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11692    }
11693}
11694
11695/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696///
11697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698#[inline]
11699#[target_feature(enable = "avx512f,avx512vl")]
11700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11701#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703    unsafe {
11704        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11705        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11706    }
11707}
11708
11709/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710///
11711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712#[inline]
11713#[target_feature(enable = "avx512f")]
11714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11715#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717    unsafe {
11718        let a = a.as_u8x16();
11719        transmute::<i32x16, _>(simd_cast(a))
11720    }
11721}
11722
11723/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724///
11725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726#[inline]
11727#[target_feature(enable = "avx512f")]
11728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11729#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731    unsafe {
11732        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11733        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11734    }
11735}
11736
11737/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740#[inline]
11741#[target_feature(enable = "avx512f")]
11742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11743#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745    unsafe {
11746        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11747        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11748    }
11749}
11750
11751/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754#[inline]
11755#[target_feature(enable = "avx512f,avx512vl")]
11756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11757#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759    unsafe {
11760        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11761        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11762    }
11763}
11764
11765/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768#[inline]
11769#[target_feature(enable = "avx512f,avx512vl")]
11770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11771#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773    unsafe {
11774        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11775        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11776    }
11777}
11778
11779/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11785#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787    unsafe {
11788        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11789        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11790    }
11791}
11792
11793/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796#[inline]
11797#[target_feature(enable = "avx512f,avx512vl")]
11798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11799#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801    unsafe {
11802        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11803        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11804    }
11805}
11806
11807/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808///
11809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810#[inline]
11811#[target_feature(enable = "avx512f")]
11812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11813#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815    unsafe {
11816        let a = a.as_u8x16();
11817        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11818        transmute::<i64x8, _>(simd_cast(v64))
11819    }
11820}
11821
11822/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823///
11824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825#[inline]
11826#[target_feature(enable = "avx512f")]
11827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11828#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830    unsafe {
11831        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11832        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11833    }
11834}
11835
11836/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837///
11838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839#[inline]
11840#[target_feature(enable = "avx512f")]
11841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11842#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844    unsafe {
11845        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11846        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11847    }
11848}
11849
11850/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11856#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858    unsafe {
11859        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11860        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11861    }
11862}
11863
11864/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865///
11866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867#[inline]
11868#[target_feature(enable = "avx512f,avx512vl")]
11869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11870#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872    unsafe {
11873        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11874        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11875    }
11876}
11877
11878/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879///
11880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881#[inline]
11882#[target_feature(enable = "avx512f,avx512vl")]
11883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11884#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11888        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11889    }
11890}
11891
11892/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895#[inline]
11896#[target_feature(enable = "avx512f,avx512vl")]
11897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11898#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11902        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11903    }
11904}
11905
11906/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909#[inline]
11910#[target_feature(enable = "avx512f")]
11911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11912#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914    unsafe {
11915        let a = a.as_i16x16();
11916        transmute::<i32x16, _>(simd_cast(a))
11917    }
11918}
11919
11920/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921///
11922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923#[inline]
11924#[target_feature(enable = "avx512f")]
11925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11926#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928    unsafe {
11929        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11930        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11931    }
11932}
11933
11934/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935///
11936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937#[inline]
11938#[target_feature(enable = "avx512f")]
11939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11940#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942    unsafe {
11943        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11944        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11945    }
11946}
11947
11948/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11954#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956    unsafe {
11957        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11958        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11959    }
11960}
11961
11962/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963///
11964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965#[inline]
11966#[target_feature(enable = "avx512f,avx512vl")]
11967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11968#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970    unsafe {
11971        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11972        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11973    }
11974}
11975
11976/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977///
11978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979#[inline]
11980#[target_feature(enable = "avx512f,avx512vl")]
11981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11982#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984    unsafe {
11985        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
11986        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11987    }
11988}
11989
11990/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993#[inline]
11994#[target_feature(enable = "avx512f,avx512vl")]
11995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11996#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998    unsafe {
11999        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12000        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12001    }
12002}
12003
12004/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007#[inline]
12008#[target_feature(enable = "avx512f")]
12009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12010#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012    unsafe {
12013        let a = a.as_i16x8();
12014        transmute::<i64x8, _>(simd_cast(a))
12015    }
12016}
12017
12018/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021#[inline]
12022#[target_feature(enable = "avx512f")]
12023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12024#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026    unsafe {
12027        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12028        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12029    }
12030}
12031
12032/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033///
12034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035#[inline]
12036#[target_feature(enable = "avx512f")]
12037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12038#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040    unsafe {
12041        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12042        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12043    }
12044}
12045
12046/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047///
12048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049#[inline]
12050#[target_feature(enable = "avx512f,avx512vl")]
12051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12052#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054    unsafe {
12055        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12056        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12057    }
12058}
12059
12060/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061///
12062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063#[inline]
12064#[target_feature(enable = "avx512f,avx512vl")]
12065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12066#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068    unsafe {
12069        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12070        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12071    }
12072}
12073
12074/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075///
12076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077#[inline]
12078#[target_feature(enable = "avx512f,avx512vl")]
12079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12080#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12084        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12085    }
12086}
12087
12088/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12094#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096    unsafe {
12097        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12098        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12099    }
12100}
12101
12102/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105#[inline]
12106#[target_feature(enable = "avx512f")]
12107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12108#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110    unsafe {
12111        let a = a.as_u16x16();
12112        transmute::<i32x16, _>(simd_cast(a))
12113    }
12114}
12115
12116/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117///
12118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119#[inline]
12120#[target_feature(enable = "avx512f")]
12121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12122#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124    unsafe {
12125        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12126        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12127    }
12128}
12129
12130/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133#[inline]
12134#[target_feature(enable = "avx512f")]
12135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12136#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138    unsafe {
12139        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12140        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12141    }
12142}
12143
12144/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145///
12146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147#[inline]
12148#[target_feature(enable = "avx512f,avx512vl")]
12149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12150#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152    unsafe {
12153        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12154        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12155    }
12156}
12157
12158/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159///
12160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161#[inline]
12162#[target_feature(enable = "avx512f,avx512vl")]
12163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12164#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166    unsafe {
12167        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12168        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12169    }
12170}
12171
12172/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173///
12174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175#[inline]
12176#[target_feature(enable = "avx512f,avx512vl")]
12177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12178#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180    unsafe {
12181        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12182        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12183    }
12184}
12185
12186/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187///
12188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189#[inline]
12190#[target_feature(enable = "avx512f,avx512vl")]
12191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12192#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194    unsafe {
12195        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12196        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12197    }
12198}
12199
12200/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201///
12202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203#[inline]
12204#[target_feature(enable = "avx512f")]
12205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12206#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208    unsafe {
12209        let a = a.as_u16x8();
12210        transmute::<i64x8, _>(simd_cast(a))
12211    }
12212}
12213
12214/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217#[inline]
12218#[target_feature(enable = "avx512f")]
12219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12220#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222    unsafe {
12223        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12224        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12225    }
12226}
12227
12228/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231#[inline]
12232#[target_feature(enable = "avx512f")]
12233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12234#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236    unsafe {
12237        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12238        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12239    }
12240}
12241
12242/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243///
12244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245#[inline]
12246#[target_feature(enable = "avx512f,avx512vl")]
12247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12248#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250    unsafe {
12251        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12252        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12253    }
12254}
12255
12256/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257///
12258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259#[inline]
12260#[target_feature(enable = "avx512f,avx512vl")]
12261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12262#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264    unsafe {
12265        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12266        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12267    }
12268}
12269
12270/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271///
12272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273#[inline]
12274#[target_feature(enable = "avx512f,avx512vl")]
12275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12276#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278    unsafe {
12279        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12280        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12281    }
12282}
12283
12284/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12290#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292    unsafe {
12293        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12294        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12295    }
12296}
12297
12298/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299///
12300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301#[inline]
12302#[target_feature(enable = "avx512f")]
12303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12304#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306    unsafe {
12307        let a = a.as_i32x8();
12308        transmute::<i64x8, _>(simd_cast(a))
12309    }
12310}
12311
12312/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313///
12314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315#[inline]
12316#[target_feature(enable = "avx512f")]
12317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12318#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320    unsafe {
12321        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12322        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12323    }
12324}
12325
12326/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334    unsafe {
12335        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12336        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12337    }
12338}
12339
12340/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341///
12342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343#[inline]
12344#[target_feature(enable = "avx512f,avx512vl")]
12345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12346#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348    unsafe {
12349        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12350        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12351    }
12352}
12353
12354/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355///
12356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357#[inline]
12358#[target_feature(enable = "avx512f,avx512vl")]
12359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12360#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362    unsafe {
12363        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12364        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12365    }
12366}
12367
12368/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369///
12370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371#[inline]
12372#[target_feature(enable = "avx512f,avx512vl")]
12373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12374#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376    unsafe {
12377        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12378        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12379    }
12380}
12381
12382/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385#[inline]
12386#[target_feature(enable = "avx512f,avx512vl")]
12387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12388#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390    unsafe {
12391        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12392        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12393    }
12394}
12395
12396/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399#[inline]
12400#[target_feature(enable = "avx512f")]
12401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12402#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404    unsafe {
12405        let a = a.as_u32x8();
12406        transmute::<i64x8, _>(simd_cast(a))
12407    }
12408}
12409
12410/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411///
12412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413#[inline]
12414#[target_feature(enable = "avx512f")]
12415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12416#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418    unsafe {
12419        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12420        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12421    }
12422}
12423
12424/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425///
12426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427#[inline]
12428#[target_feature(enable = "avx512f")]
12429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12430#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432    unsafe {
12433        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12434        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12435    }
12436}
12437
12438/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12444#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446    unsafe {
12447        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12448        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12449    }
12450}
12451
12452/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453///
12454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455#[inline]
12456#[target_feature(enable = "avx512f,avx512vl")]
12457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12458#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460    unsafe {
12461        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12462        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12463    }
12464}
12465
12466/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467///
12468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469#[inline]
12470#[target_feature(enable = "avx512f,avx512vl")]
12471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12472#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474    unsafe {
12475        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12476        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12477    }
12478}
12479
12480/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12486#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488    unsafe {
12489        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12490        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12491    }
12492}
12493
12494/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497#[inline]
12498#[target_feature(enable = "avx512f")]
12499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12500#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502    unsafe {
12503        let a = a.as_i32x16();
12504        transmute::<f32x16, _>(simd_cast(a))
12505    }
12506}
12507
12508/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509///
12510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511#[inline]
12512#[target_feature(enable = "avx512f")]
12513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12514#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516    unsafe {
12517        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12518        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12519    }
12520}
12521
12522/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523///
12524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525#[inline]
12526#[target_feature(enable = "avx512f")]
12527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12528#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530    unsafe {
12531        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12532        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12533    }
12534}
12535
12536/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537///
12538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539#[inline]
12540#[target_feature(enable = "avx512f,avx512vl")]
12541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12542#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544    unsafe {
12545        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12546        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12547    }
12548}
12549
12550/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12556#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558    unsafe {
12559        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12560        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12561    }
12562}
12563
12564/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567#[inline]
12568#[target_feature(enable = "avx512f,avx512vl")]
12569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12570#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572    unsafe {
12573        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12574        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12575    }
12576}
12577
12578/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579///
12580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581#[inline]
12582#[target_feature(enable = "avx512f,avx512vl")]
12583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12584#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586    unsafe {
12587        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12588        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12589    }
12590}
12591
12592/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593///
12594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595#[inline]
12596#[target_feature(enable = "avx512f")]
12597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12598#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600    unsafe {
12601        let a = a.as_i32x8();
12602        transmute::<f64x8, _>(simd_cast(a))
12603    }
12604}
12605
12606/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609#[inline]
12610#[target_feature(enable = "avx512f")]
12611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12612#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614    unsafe {
12615        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12616        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12617    }
12618}
12619
12620/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621///
12622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623#[inline]
12624#[target_feature(enable = "avx512f")]
12625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12626#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628    unsafe {
12629        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12630        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12631    }
12632}
12633
12634/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637#[inline]
12638#[target_feature(enable = "avx512f,avx512vl")]
12639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12640#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642    unsafe {
12643        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12644        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12645    }
12646}
12647
12648/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651#[inline]
12652#[target_feature(enable = "avx512f,avx512vl")]
12653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12654#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656    unsafe {
12657        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12658        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12659    }
12660}
12661
12662/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663///
12664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665#[inline]
12666#[target_feature(enable = "avx512f,avx512vl")]
12667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12668#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670    unsafe {
12671        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12672        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12673    }
12674}
12675
12676/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12682#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684    unsafe {
12685        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12686        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12687    }
12688}
12689
12690/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691///
12692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693#[inline]
12694#[target_feature(enable = "avx512f")]
12695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12696#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698    unsafe {
12699        let a = a.as_u32x16();
12700        transmute::<f32x16, _>(simd_cast(a))
12701    }
12702}
12703
12704/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707#[inline]
12708#[target_feature(enable = "avx512f")]
12709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12710#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712    unsafe {
12713        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12714        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12715    }
12716}
12717
12718/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719///
12720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721#[inline]
12722#[target_feature(enable = "avx512f")]
12723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12724#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726    unsafe {
12727        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12728        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12729    }
12730}
12731
12732/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733///
12734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735#[inline]
12736#[target_feature(enable = "avx512f")]
12737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12738#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740    unsafe {
12741        let a = a.as_u32x8();
12742        transmute::<f64x8, _>(simd_cast(a))
12743    }
12744}
12745
12746/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747///
12748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749#[inline]
12750#[target_feature(enable = "avx512f")]
12751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12752#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754    unsafe {
12755        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12756        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12757    }
12758}
12759
12760/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761///
12762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763#[inline]
12764#[target_feature(enable = "avx512f")]
12765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12766#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768    unsafe {
12769        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12770        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12771    }
12772}
12773
12774/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775///
12776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777#[inline]
12778#[target_feature(enable = "avx512f,avx512vl")]
12779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12780#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782    unsafe {
12783        let a = a.as_u32x4();
12784        transmute::<f64x4, _>(simd_cast(a))
12785    }
12786}
12787
12788/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12794#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796    unsafe {
12797        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12798        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12799    }
12800}
12801
12802/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803///
12804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805#[inline]
12806#[target_feature(enable = "avx512f,avx512vl")]
12807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12808#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810    unsafe {
12811        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12812        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12813    }
12814}
12815
12816/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817///
12818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819#[inline]
12820#[target_feature(enable = "avx512f,avx512vl")]
12821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12822#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824    unsafe {
12825        let a = a.as_u32x4();
12826        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12827        transmute::<f64x2, _>(simd_cast(u64))
12828    }
12829}
12830
12831/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834#[inline]
12835#[target_feature(enable = "avx512f,avx512vl")]
12836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12837#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839    unsafe {
12840        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12841        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12842    }
12843}
12844
12845/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848#[inline]
12849#[target_feature(enable = "avx512f,avx512vl")]
12850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12851#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853    unsafe {
12854        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12855        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12856    }
12857}
12858
12859/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860///
12861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862#[inline]
12863#[target_feature(enable = "avx512f")]
12864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12865#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867    unsafe {
12868        let v2 = v2.as_i32x16();
12869        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12870        transmute::<f64x8, _>(simd_cast(v256))
12871    }
12872}
12873
12874/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875///
12876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877#[inline]
12878#[target_feature(enable = "avx512f")]
12879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12880#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882    unsafe {
12883        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12885    }
12886}
12887
12888/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889///
12890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891#[inline]
12892#[target_feature(enable = "avx512f")]
12893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12894#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896    unsafe {
12897        let v2 = v2.as_u32x16();
12898        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12899        transmute::<f64x8, _>(simd_cast(v256))
12900    }
12901}
12902
12903/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904///
12905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906#[inline]
12907#[target_feature(enable = "avx512f")]
12908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12909#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911    unsafe {
12912        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12914    }
12915}
12916
12917/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918///
12919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920#[inline]
12921#[target_feature(enable = "avx512f")]
12922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12923#[cfg_attr(test, assert_instr(vpmovdw))]
12924pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925    unsafe {
12926        let a = a.as_i32x16();
12927        transmute::<i16x16, _>(simd_cast(a))
12928    }
12929}
12930
12931/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12937#[cfg_attr(test, assert_instr(vpmovdw))]
12938pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939    unsafe {
12940        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12941        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
12942    }
12943}
12944
12945/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946///
12947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948#[inline]
12949#[target_feature(enable = "avx512f")]
12950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12951#[cfg_attr(test, assert_instr(vpmovdw))]
12952pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953    unsafe {
12954        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12955        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12956    }
12957}
12958
12959/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962#[inline]
12963#[target_feature(enable = "avx512f,avx512vl")]
12964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12965#[cfg_attr(test, assert_instr(vpmovdw))]
12966pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967    unsafe {
12968        let a = a.as_i32x8();
12969        transmute::<i16x8, _>(simd_cast(a))
12970    }
12971}
12972
12973/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974///
12975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976#[inline]
12977#[target_feature(enable = "avx512f,avx512vl")]
12978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12979#[cfg_attr(test, assert_instr(vpmovdw))]
12980pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981    unsafe {
12982        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12983        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12984    }
12985}
12986
12987/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988///
12989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990#[inline]
12991#[target_feature(enable = "avx512f,avx512vl")]
12992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12993#[cfg_attr(test, assert_instr(vpmovdw))]
12994pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995    unsafe {
12996        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12997        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12998    }
12999}
13000
13001/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002///
13003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004#[inline]
13005#[target_feature(enable = "avx512f,avx512vl")]
13006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13007#[cfg_attr(test, assert_instr(vpmovdw))]
13008pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13010}
13011
13012/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013///
13014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015#[inline]
13016#[target_feature(enable = "avx512f,avx512vl")]
13017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13018#[cfg_attr(test, assert_instr(vpmovdw))]
13019pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13021}
13022
13023/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024///
13025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026#[inline]
13027#[target_feature(enable = "avx512f,avx512vl")]
13028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13029#[cfg_attr(test, assert_instr(vpmovdw))]
13030pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13032}
13033
13034/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037#[inline]
13038#[target_feature(enable = "avx512f")]
13039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13040#[cfg_attr(test, assert_instr(vpmovdb))]
13041pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042    unsafe {
13043        let a = a.as_i32x16();
13044        transmute::<i8x16, _>(simd_cast(a))
13045    }
13046}
13047
13048/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049///
13050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051#[inline]
13052#[target_feature(enable = "avx512f")]
13053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13054#[cfg_attr(test, assert_instr(vpmovdb))]
13055pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056    unsafe {
13057        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13058        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13059    }
13060}
13061
13062/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063///
13064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065#[inline]
13066#[target_feature(enable = "avx512f")]
13067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13068#[cfg_attr(test, assert_instr(vpmovdb))]
13069pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070    unsafe {
13071        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13072        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13073    }
13074}
13075
13076/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077///
13078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079#[inline]
13080#[target_feature(enable = "avx512f,avx512vl")]
13081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13082#[cfg_attr(test, assert_instr(vpmovdb))]
13083pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13085}
13086
13087/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088///
13089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090#[inline]
13091#[target_feature(enable = "avx512f,avx512vl")]
13092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13093#[cfg_attr(test, assert_instr(vpmovdb))]
13094pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13096}
13097
13098/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099///
13100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101#[inline]
13102#[target_feature(enable = "avx512f,avx512vl")]
13103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13104#[cfg_attr(test, assert_instr(vpmovdb))]
13105pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13107}
13108
13109/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13115#[cfg_attr(test, assert_instr(vpmovdb))]
13116pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13118}
13119
13120/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121///
13122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123#[inline]
13124#[target_feature(enable = "avx512f,avx512vl")]
13125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13126#[cfg_attr(test, assert_instr(vpmovdb))]
13127pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13129}
13130
13131/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132///
13133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134#[inline]
13135#[target_feature(enable = "avx512f,avx512vl")]
13136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13137#[cfg_attr(test, assert_instr(vpmovdb))]
13138pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13140}
13141
13142/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143///
13144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145#[inline]
13146#[target_feature(enable = "avx512f")]
13147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13148#[cfg_attr(test, assert_instr(vpmovqd))]
13149pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150    unsafe {
13151        let a = a.as_i64x8();
13152        transmute::<i32x8, _>(simd_cast(a))
13153    }
13154}
13155
13156/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157///
13158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159#[inline]
13160#[target_feature(enable = "avx512f")]
13161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13162#[cfg_attr(test, assert_instr(vpmovqd))]
13163pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164    unsafe {
13165        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13166        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13167    }
13168}
13169
13170/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171///
13172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173#[inline]
13174#[target_feature(enable = "avx512f")]
13175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13176#[cfg_attr(test, assert_instr(vpmovqd))]
13177pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178    unsafe {
13179        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13180        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13181    }
13182}
13183
13184/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13190#[cfg_attr(test, assert_instr(vpmovqd))]
13191pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192    unsafe {
13193        let a = a.as_i64x4();
13194        transmute::<i32x4, _>(simd_cast(a))
13195    }
13196}
13197
13198/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201#[inline]
13202#[target_feature(enable = "avx512f,avx512vl")]
13203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13204#[cfg_attr(test, assert_instr(vpmovqd))]
13205pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206    unsafe {
13207        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13208        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13209    }
13210}
13211
13212/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213///
13214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215#[inline]
13216#[target_feature(enable = "avx512f,avx512vl")]
13217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13218#[cfg_attr(test, assert_instr(vpmovqd))]
13219pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220    unsafe {
13221        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13222        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13223    }
13224}
13225
13226/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227///
13228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229#[inline]
13230#[target_feature(enable = "avx512f,avx512vl")]
13231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13232#[cfg_attr(test, assert_instr(vpmovqd))]
13233pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13235}
13236
13237/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238///
13239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240#[inline]
13241#[target_feature(enable = "avx512f,avx512vl")]
13242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13243#[cfg_attr(test, assert_instr(vpmovqd))]
13244pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13246}
13247
13248/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249///
13250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251#[inline]
13252#[target_feature(enable = "avx512f,avx512vl")]
13253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13254#[cfg_attr(test, assert_instr(vpmovqd))]
13255pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13257}
13258
13259/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260///
13261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262#[inline]
13263#[target_feature(enable = "avx512f")]
13264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13265#[cfg_attr(test, assert_instr(vpmovqw))]
13266pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267    unsafe {
13268        let a = a.as_i64x8();
13269        transmute::<i16x8, _>(simd_cast(a))
13270    }
13271}
13272
13273/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274///
13275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276#[inline]
13277#[target_feature(enable = "avx512f")]
13278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13279#[cfg_attr(test, assert_instr(vpmovqw))]
13280pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281    unsafe {
13282        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13283        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13284    }
13285}
13286
13287/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288///
13289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290#[inline]
13291#[target_feature(enable = "avx512f")]
13292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13293#[cfg_attr(test, assert_instr(vpmovqw))]
13294pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295    unsafe {
13296        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13297        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13298    }
13299}
13300
13301/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302///
13303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304#[inline]
13305#[target_feature(enable = "avx512f,avx512vl")]
13306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13307#[cfg_attr(test, assert_instr(vpmovqw))]
13308pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13310}
13311
13312/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313///
13314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315#[inline]
13316#[target_feature(enable = "avx512f,avx512vl")]
13317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13318#[cfg_attr(test, assert_instr(vpmovqw))]
13319pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13321}
13322
13323/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324///
13325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326#[inline]
13327#[target_feature(enable = "avx512f,avx512vl")]
13328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13329#[cfg_attr(test, assert_instr(vpmovqw))]
13330pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13332}
13333
13334/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335///
13336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337#[inline]
13338#[target_feature(enable = "avx512f,avx512vl")]
13339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13340#[cfg_attr(test, assert_instr(vpmovqw))]
13341pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13343}
13344
13345/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346///
13347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348#[inline]
13349#[target_feature(enable = "avx512f,avx512vl")]
13350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13351#[cfg_attr(test, assert_instr(vpmovqw))]
13352pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13354}
13355
13356/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357///
13358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359#[inline]
13360#[target_feature(enable = "avx512f,avx512vl")]
13361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13362#[cfg_attr(test, assert_instr(vpmovqw))]
13363pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13365}
13366
13367/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370#[inline]
13371#[target_feature(enable = "avx512f")]
13372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13373#[cfg_attr(test, assert_instr(vpmovqb))]
13374pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13376}
13377
13378/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379///
13380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381#[inline]
13382#[target_feature(enable = "avx512f")]
13383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13384#[cfg_attr(test, assert_instr(vpmovqb))]
13385pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13387}
13388
13389/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390///
13391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392#[inline]
13393#[target_feature(enable = "avx512f")]
13394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13395#[cfg_attr(test, assert_instr(vpmovqb))]
13396pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13398}
13399
13400/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401///
13402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403#[inline]
13404#[target_feature(enable = "avx512f,avx512vl")]
13405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13406#[cfg_attr(test, assert_instr(vpmovqb))]
13407pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13409}
13410
13411/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412///
13413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414#[inline]
13415#[target_feature(enable = "avx512f,avx512vl")]
13416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13417#[cfg_attr(test, assert_instr(vpmovqb))]
13418pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13420}
13421
13422/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423///
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425#[inline]
13426#[target_feature(enable = "avx512f,avx512vl")]
13427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13428#[cfg_attr(test, assert_instr(vpmovqb))]
13429pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13431}
13432
13433/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434///
13435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436#[inline]
13437#[target_feature(enable = "avx512f,avx512vl")]
13438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13439#[cfg_attr(test, assert_instr(vpmovqb))]
13440pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13442}
13443
13444/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445///
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447#[inline]
13448#[target_feature(enable = "avx512f,avx512vl")]
13449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13450#[cfg_attr(test, assert_instr(vpmovqb))]
13451pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13453}
13454
13455/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456///
13457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458#[inline]
13459#[target_feature(enable = "avx512f,avx512vl")]
13460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13461#[cfg_attr(test, assert_instr(vpmovqb))]
13462pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13464}
13465
13466/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467///
13468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469#[inline]
13470#[target_feature(enable = "avx512f")]
13471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13472#[cfg_attr(test, assert_instr(vpmovsdw))]
13473pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13475}
13476
13477/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478///
13479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480#[inline]
13481#[target_feature(enable = "avx512f")]
13482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13483#[cfg_attr(test, assert_instr(vpmovsdw))]
13484pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13486}
13487
13488/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489///
13490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491#[inline]
13492#[target_feature(enable = "avx512f")]
13493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13494#[cfg_attr(test, assert_instr(vpmovsdw))]
13495pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13497}
13498
13499/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500///
13501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502#[inline]
13503#[target_feature(enable = "avx512f,avx512vl")]
13504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13505#[cfg_attr(test, assert_instr(vpmovsdw))]
13506pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13508}
13509
13510/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511///
13512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513#[inline]
13514#[target_feature(enable = "avx512f,avx512vl")]
13515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13516#[cfg_attr(test, assert_instr(vpmovsdw))]
13517pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13519}
13520
13521/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522///
13523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524#[inline]
13525#[target_feature(enable = "avx512f,avx512vl")]
13526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13527#[cfg_attr(test, assert_instr(vpmovsdw))]
13528pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13530}
13531
13532/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533///
13534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535#[inline]
13536#[target_feature(enable = "avx512f,avx512vl")]
13537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13538#[cfg_attr(test, assert_instr(vpmovsdw))]
13539pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13541}
13542
13543/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544///
13545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546#[inline]
13547#[target_feature(enable = "avx512f,avx512vl")]
13548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13549#[cfg_attr(test, assert_instr(vpmovsdw))]
13550pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13552}
13553
13554/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555///
13556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557#[inline]
13558#[target_feature(enable = "avx512f,avx512vl")]
13559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13560#[cfg_attr(test, assert_instr(vpmovsdw))]
13561pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13563}
13564
13565/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566///
13567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568#[inline]
13569#[target_feature(enable = "avx512f")]
13570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13571#[cfg_attr(test, assert_instr(vpmovsdb))]
13572pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13574}
13575
13576/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577///
13578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579#[inline]
13580#[target_feature(enable = "avx512f")]
13581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13582#[cfg_attr(test, assert_instr(vpmovsdb))]
13583pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13585}
13586
13587/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588///
13589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590#[inline]
13591#[target_feature(enable = "avx512f")]
13592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13593#[cfg_attr(test, assert_instr(vpmovsdb))]
13594pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13596}
13597
13598/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599///
13600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601#[inline]
13602#[target_feature(enable = "avx512f,avx512vl")]
13603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13604#[cfg_attr(test, assert_instr(vpmovsdb))]
13605pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13607}
13608
13609/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610///
13611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612#[inline]
13613#[target_feature(enable = "avx512f,avx512vl")]
13614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13615#[cfg_attr(test, assert_instr(vpmovsdb))]
13616pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13618}
13619
13620/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621///
13622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623#[inline]
13624#[target_feature(enable = "avx512f,avx512vl")]
13625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13626#[cfg_attr(test, assert_instr(vpmovsdb))]
13627pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13629}
13630
13631/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632///
13633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634#[inline]
13635#[target_feature(enable = "avx512f,avx512vl")]
13636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13637#[cfg_attr(test, assert_instr(vpmovsdb))]
13638pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13640}
13641
13642/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645#[inline]
13646#[target_feature(enable = "avx512f,avx512vl")]
13647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13648#[cfg_attr(test, assert_instr(vpmovsdb))]
13649pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13651}
13652
13653/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656#[inline]
13657#[target_feature(enable = "avx512f,avx512vl")]
13658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13659#[cfg_attr(test, assert_instr(vpmovsdb))]
13660pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13662}
13663
13664/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13670#[cfg_attr(test, assert_instr(vpmovsqd))]
13671pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
13673}
13674
13675/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676///
13677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678#[inline]
13679#[target_feature(enable = "avx512f")]
13680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13681#[cfg_attr(test, assert_instr(vpmovsqd))]
13682pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
13684}
13685
13686/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689#[inline]
13690#[target_feature(enable = "avx512f")]
13691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13692#[cfg_attr(test, assert_instr(vpmovsqd))]
13693pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
13695}
13696
13697/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700#[inline]
13701#[target_feature(enable = "avx512f,avx512vl")]
13702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13703#[cfg_attr(test, assert_instr(vpmovsqd))]
13704pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
13706}
13707
13708/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709///
13710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711#[inline]
13712#[target_feature(enable = "avx512f,avx512vl")]
13713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13714#[cfg_attr(test, assert_instr(vpmovsqd))]
13715pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
13717}
13718
13719/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722#[inline]
13723#[target_feature(enable = "avx512f,avx512vl")]
13724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13725#[cfg_attr(test, assert_instr(vpmovsqd))]
13726pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
13728}
13729
13730/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731///
13732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733#[inline]
13734#[target_feature(enable = "avx512f,avx512vl")]
13735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13736#[cfg_attr(test, assert_instr(vpmovsqd))]
13737pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13739}
13740
13741/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742///
13743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744#[inline]
13745#[target_feature(enable = "avx512f,avx512vl")]
13746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13747#[cfg_attr(test, assert_instr(vpmovsqd))]
13748pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13750}
13751
13752/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753///
13754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755#[inline]
13756#[target_feature(enable = "avx512f,avx512vl")]
13757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13758#[cfg_attr(test, assert_instr(vpmovsqd))]
13759pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13761}
13762
13763/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764///
13765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766#[inline]
13767#[target_feature(enable = "avx512f")]
13768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13769#[cfg_attr(test, assert_instr(vpmovsqw))]
13770pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
13772}
13773
13774/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775///
13776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777#[inline]
13778#[target_feature(enable = "avx512f")]
13779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13780#[cfg_attr(test, assert_instr(vpmovsqw))]
13781pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
13783}
13784
13785/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786///
13787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788#[inline]
13789#[target_feature(enable = "avx512f")]
13790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13791#[cfg_attr(test, assert_instr(vpmovsqw))]
13792pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
13794}
13795
13796/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797///
13798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799#[inline]
13800#[target_feature(enable = "avx512f,avx512vl")]
13801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13802#[cfg_attr(test, assert_instr(vpmovsqw))]
13803pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13805}
13806
13807/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808///
13809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810#[inline]
13811#[target_feature(enable = "avx512f,avx512vl")]
13812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13813#[cfg_attr(test, assert_instr(vpmovsqw))]
13814pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13816}
13817
13818/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819///
13820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821#[inline]
13822#[target_feature(enable = "avx512f,avx512vl")]
13823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13824#[cfg_attr(test, assert_instr(vpmovsqw))]
13825pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13827}
13828
13829/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830///
13831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832#[inline]
13833#[target_feature(enable = "avx512f,avx512vl")]
13834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13835#[cfg_attr(test, assert_instr(vpmovsqw))]
13836pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13838}
13839
13840/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841///
13842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843#[inline]
13844#[target_feature(enable = "avx512f,avx512vl")]
13845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13846#[cfg_attr(test, assert_instr(vpmovsqw))]
13847pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13849}
13850
13851/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852///
13853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854#[inline]
13855#[target_feature(enable = "avx512f,avx512vl")]
13856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13857#[cfg_attr(test, assert_instr(vpmovsqw))]
13858pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13860}
13861
13862/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863///
13864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865#[inline]
13866#[target_feature(enable = "avx512f")]
13867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13868#[cfg_attr(test, assert_instr(vpmovsqb))]
13869pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13871}
13872
13873/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874///
13875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876#[inline]
13877#[target_feature(enable = "avx512f")]
13878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13879#[cfg_attr(test, assert_instr(vpmovsqb))]
13880pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
13882}
13883
13884/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13890#[cfg_attr(test, assert_instr(vpmovsqb))]
13891pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
13893}
13894
13895/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898#[inline]
13899#[target_feature(enable = "avx512f,avx512vl")]
13900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13901#[cfg_attr(test, assert_instr(vpmovsqb))]
13902pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13904}
13905
13906/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907///
13908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909#[inline]
13910#[target_feature(enable = "avx512f,avx512vl")]
13911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13912#[cfg_attr(test, assert_instr(vpmovsqb))]
13913pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13915}
13916
13917/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918///
13919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920#[inline]
13921#[target_feature(enable = "avx512f,avx512vl")]
13922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13923#[cfg_attr(test, assert_instr(vpmovsqb))]
13924pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13926}
13927
13928/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929///
13930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931#[inline]
13932#[target_feature(enable = "avx512f,avx512vl")]
13933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13934#[cfg_attr(test, assert_instr(vpmovsqb))]
13935pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13937}
13938
13939/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940///
13941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942#[inline]
13943#[target_feature(enable = "avx512f,avx512vl")]
13944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13945#[cfg_attr(test, assert_instr(vpmovsqb))]
13946pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13948}
13949
13950/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951///
13952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953#[inline]
13954#[target_feature(enable = "avx512f,avx512vl")]
13955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13956#[cfg_attr(test, assert_instr(vpmovsqb))]
13957pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13959}
13960
13961/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962///
13963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964#[inline]
13965#[target_feature(enable = "avx512f")]
13966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13967#[cfg_attr(test, assert_instr(vpmovusdw))]
13968pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
13970}
13971
13972/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973///
13974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975#[inline]
13976#[target_feature(enable = "avx512f")]
13977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13978#[cfg_attr(test, assert_instr(vpmovusdw))]
13979pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
13981}
13982
13983/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984///
13985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986#[inline]
13987#[target_feature(enable = "avx512f")]
13988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13989#[cfg_attr(test, assert_instr(vpmovusdw))]
13990pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
13992}
13993
13994/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995///
13996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997#[inline]
13998#[target_feature(enable = "avx512f,avx512vl")]
13999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14000#[cfg_attr(test, assert_instr(vpmovusdw))]
14001pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14003}
14004
14005/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006///
14007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008#[inline]
14009#[target_feature(enable = "avx512f,avx512vl")]
14010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14011#[cfg_attr(test, assert_instr(vpmovusdw))]
14012pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14014}
14015
14016/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017///
14018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019#[inline]
14020#[target_feature(enable = "avx512f,avx512vl")]
14021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14022#[cfg_attr(test, assert_instr(vpmovusdw))]
14023pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14025}
14026
14027/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028///
14029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030#[inline]
14031#[target_feature(enable = "avx512f,avx512vl")]
14032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14033#[cfg_attr(test, assert_instr(vpmovusdw))]
14034pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14036}
14037
14038/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039///
14040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041#[inline]
14042#[target_feature(enable = "avx512f,avx512vl")]
14043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14044#[cfg_attr(test, assert_instr(vpmovusdw))]
14045pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14047}
14048
14049/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050///
14051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052#[inline]
14053#[target_feature(enable = "avx512f,avx512vl")]
14054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14055#[cfg_attr(test, assert_instr(vpmovusdw))]
14056pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14058}
14059
14060/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061///
14062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063#[inline]
14064#[target_feature(enable = "avx512f")]
14065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14066#[cfg_attr(test, assert_instr(vpmovusdb))]
14067pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14069}
14070
14071/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072///
14073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074#[inline]
14075#[target_feature(enable = "avx512f")]
14076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14077#[cfg_attr(test, assert_instr(vpmovusdb))]
14078pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14080}
14081
14082/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083///
14084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085#[inline]
14086#[target_feature(enable = "avx512f")]
14087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14088#[cfg_attr(test, assert_instr(vpmovusdb))]
14089pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14091}
14092
14093/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094///
14095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096#[inline]
14097#[target_feature(enable = "avx512f,avx512vl")]
14098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14099#[cfg_attr(test, assert_instr(vpmovusdb))]
14100pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14102}
14103
14104/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105///
14106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107#[inline]
14108#[target_feature(enable = "avx512f,avx512vl")]
14109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14110#[cfg_attr(test, assert_instr(vpmovusdb))]
14111pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14113}
14114
14115/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116///
14117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118#[inline]
14119#[target_feature(enable = "avx512f,avx512vl")]
14120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14121#[cfg_attr(test, assert_instr(vpmovusdb))]
14122pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14124}
14125
14126/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129#[inline]
14130#[target_feature(enable = "avx512f,avx512vl")]
14131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14132#[cfg_attr(test, assert_instr(vpmovusdb))]
14133pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14135}
14136
14137/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140#[inline]
14141#[target_feature(enable = "avx512f,avx512vl")]
14142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14143#[cfg_attr(test, assert_instr(vpmovusdb))]
14144pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14146}
14147
14148/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14154#[cfg_attr(test, assert_instr(vpmovusdb))]
14155pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14157}
14158
14159/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160///
14161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162#[inline]
14163#[target_feature(enable = "avx512f")]
14164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14165#[cfg_attr(test, assert_instr(vpmovusqd))]
14166pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14168}
14169
14170/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171///
14172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173#[inline]
14174#[target_feature(enable = "avx512f")]
14175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14176#[cfg_attr(test, assert_instr(vpmovusqd))]
14177pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14179}
14180
14181/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184#[inline]
14185#[target_feature(enable = "avx512f")]
14186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14187#[cfg_attr(test, assert_instr(vpmovusqd))]
14188pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14190}
14191
14192/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193///
14194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195#[inline]
14196#[target_feature(enable = "avx512f,avx512vl")]
14197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14198#[cfg_attr(test, assert_instr(vpmovusqd))]
14199pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14201}
14202
14203/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204///
14205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206#[inline]
14207#[target_feature(enable = "avx512f,avx512vl")]
14208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14209#[cfg_attr(test, assert_instr(vpmovusqd))]
14210pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14212}
14213
14214/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215///
14216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217#[inline]
14218#[target_feature(enable = "avx512f,avx512vl")]
14219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14220#[cfg_attr(test, assert_instr(vpmovusqd))]
14221pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14223}
14224
14225/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226///
14227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228#[inline]
14229#[target_feature(enable = "avx512f,avx512vl")]
14230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14231#[cfg_attr(test, assert_instr(vpmovusqd))]
14232pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14234}
14235
14236/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239#[inline]
14240#[target_feature(enable = "avx512f,avx512vl")]
14241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14242#[cfg_attr(test, assert_instr(vpmovusqd))]
14243pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14245}
14246
14247/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248///
14249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250#[inline]
14251#[target_feature(enable = "avx512f,avx512vl")]
14252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14253#[cfg_attr(test, assert_instr(vpmovusqd))]
14254pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14256}
14257
14258/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259///
14260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261#[inline]
14262#[target_feature(enable = "avx512f")]
14263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14264#[cfg_attr(test, assert_instr(vpmovusqw))]
14265pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14267}
14268
14269/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270///
14271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272#[inline]
14273#[target_feature(enable = "avx512f")]
14274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14275#[cfg_attr(test, assert_instr(vpmovusqw))]
14276pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14278}
14279
14280/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281///
14282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283#[inline]
14284#[target_feature(enable = "avx512f")]
14285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14286#[cfg_attr(test, assert_instr(vpmovusqw))]
14287pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14289}
14290
14291/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292///
14293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294#[inline]
14295#[target_feature(enable = "avx512f,avx512vl")]
14296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14297#[cfg_attr(test, assert_instr(vpmovusqw))]
14298pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14300}
14301
14302/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303///
14304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305#[inline]
14306#[target_feature(enable = "avx512f,avx512vl")]
14307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14308#[cfg_attr(test, assert_instr(vpmovusqw))]
14309pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14311}
14312
14313/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316#[inline]
14317#[target_feature(enable = "avx512f,avx512vl")]
14318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14319#[cfg_attr(test, assert_instr(vpmovusqw))]
14320pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14322}
14323
14324/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327#[inline]
14328#[target_feature(enable = "avx512f,avx512vl")]
14329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14330#[cfg_attr(test, assert_instr(vpmovusqw))]
14331pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14333}
14334
14335/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336///
14337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338#[inline]
14339#[target_feature(enable = "avx512f,avx512vl")]
14340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14341#[cfg_attr(test, assert_instr(vpmovusqw))]
14342pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14344}
14345
14346/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347///
14348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349#[inline]
14350#[target_feature(enable = "avx512f,avx512vl")]
14351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14352#[cfg_attr(test, assert_instr(vpmovusqw))]
14353pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14355}
14356
14357/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358///
14359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360#[inline]
14361#[target_feature(enable = "avx512f")]
14362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14363#[cfg_attr(test, assert_instr(vpmovusqb))]
14364pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14366}
14367
14368/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371#[inline]
14372#[target_feature(enable = "avx512f")]
14373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14374#[cfg_attr(test, assert_instr(vpmovusqb))]
14375pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14377}
14378
14379/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382#[inline]
14383#[target_feature(enable = "avx512f")]
14384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14385#[cfg_attr(test, assert_instr(vpmovusqb))]
14386pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14388}
14389
14390/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391///
14392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393#[inline]
14394#[target_feature(enable = "avx512f,avx512vl")]
14395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14396#[cfg_attr(test, assert_instr(vpmovusqb))]
14397pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14399}
14400
14401/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402///
14403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404#[inline]
14405#[target_feature(enable = "avx512f,avx512vl")]
14406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14407#[cfg_attr(test, assert_instr(vpmovusqb))]
14408pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14410}
14411
14412/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413///
14414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415#[inline]
14416#[target_feature(enable = "avx512f,avx512vl")]
14417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14418#[cfg_attr(test, assert_instr(vpmovusqb))]
14419pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14421}
14422
14423/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424///
14425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426#[inline]
14427#[target_feature(enable = "avx512f,avx512vl")]
14428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14429#[cfg_attr(test, assert_instr(vpmovusqb))]
14430pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14432}
14433
14434/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435///
14436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437#[inline]
14438#[target_feature(enable = "avx512f,avx512vl")]
14439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14440#[cfg_attr(test, assert_instr(vpmovusqb))]
14441pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14443}
14444
14445/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446///
14447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448#[inline]
14449#[target_feature(enable = "avx512f,avx512vl")]
14450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14451#[cfg_attr(test, assert_instr(vpmovusqb))]
14452pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14454}
14455
14456/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457///
14458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464///
14465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466#[inline]
14467#[target_feature(enable = "avx512f")]
14468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14469#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14470#[rustc_legacy_const_generics(1)]
14471pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472    unsafe {
14473        static_assert_rounding!(ROUNDING);
14474        let a = a.as_f32x16();
14475        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14476        transmute(r)
14477    }
14478}
14479
14480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481///
14482/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488///
14489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490#[inline]
14491#[target_feature(enable = "avx512f")]
14492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14493#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14494#[rustc_legacy_const_generics(3)]
14495pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496    src: __m512i,
14497    k: __mmask16,
14498    a: __m512,
14499) -> __m512i {
14500    unsafe {
14501        static_assert_rounding!(ROUNDING);
14502        let a = a.as_f32x16();
14503        let src = src.as_i32x16();
14504        let r = vcvtps2dq(a, src, k, ROUNDING);
14505        transmute(r)
14506    }
14507}
14508
14509/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510///
14511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517///
14518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519#[inline]
14520#[target_feature(enable = "avx512f")]
14521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14522#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525    unsafe {
14526        static_assert_rounding!(ROUNDING);
14527        let a = a.as_f32x16();
14528        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14529        transmute(r)
14530    }
14531}
14532
14533/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534///
14535/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541///
14542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543#[inline]
14544#[target_feature(enable = "avx512f")]
14545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14546#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14547#[rustc_legacy_const_generics(1)]
14548pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549    unsafe {
14550        static_assert_rounding!(ROUNDING);
14551        let a = a.as_f32x16();
14552        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14553        transmute(r)
14554    }
14555}
14556
14557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558///
14559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565///
14566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567#[inline]
14568#[target_feature(enable = "avx512f")]
14569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14570#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14571#[rustc_legacy_const_generics(3)]
14572pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573    src: __m512i,
14574    k: __mmask16,
14575    a: __m512,
14576) -> __m512i {
14577    unsafe {
14578        static_assert_rounding!(ROUNDING);
14579        let a = a.as_f32x16();
14580        let src = src.as_u32x16();
14581        let r = vcvtps2udq(a, src, k, ROUNDING);
14582        transmute(r)
14583    }
14584}
14585
14586/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587///
14588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594///
14595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596#[inline]
14597#[target_feature(enable = "avx512f")]
14598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14599#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14600#[rustc_legacy_const_generics(2)]
14601pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602    unsafe {
14603        static_assert_rounding!(ROUNDING);
14604        let a = a.as_f32x16();
14605        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14606        transmute(r)
14607    }
14608}
14609
14610/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14617#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14618#[rustc_legacy_const_generics(1)]
14619pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620    unsafe {
14621        static_assert_sae!(SAE);
14622        let a = a.as_f32x8();
14623        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
14624        transmute(r)
14625    }
14626}
14627
14628/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632#[inline]
14633#[target_feature(enable = "avx512f")]
14634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14635#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14636#[rustc_legacy_const_generics(3)]
14637pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638    unsafe {
14639        static_assert_sae!(SAE);
14640        let a = a.as_f32x8();
14641        let src = src.as_f64x8();
14642        let r = vcvtps2pd(a, src, k, SAE);
14643        transmute(r)
14644    }
14645}
14646
14647/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649///
14650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651#[inline]
14652#[target_feature(enable = "avx512f")]
14653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14654#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14655#[rustc_legacy_const_generics(2)]
14656pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657    unsafe {
14658        static_assert_sae!(SAE);
14659        let a = a.as_f32x8();
14660        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
14661        transmute(r)
14662    }
14663}
14664
14665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666///
14667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673///
14674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675#[inline]
14676#[target_feature(enable = "avx512f")]
14677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14679#[rustc_legacy_const_generics(1)]
14680pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681    unsafe {
14682        static_assert_rounding!(ROUNDING);
14683        let a = a.as_f64x8();
14684        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
14685        transmute(r)
14686    }
14687}
14688
14689/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690///
14691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697///
14698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699#[inline]
14700#[target_feature(enable = "avx512f")]
14701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14702#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14703#[rustc_legacy_const_generics(3)]
14704pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705    src: __m256i,
14706    k: __mmask8,
14707    a: __m512d,
14708) -> __m256i {
14709    unsafe {
14710        static_assert_rounding!(ROUNDING);
14711        let a = a.as_f64x8();
14712        let src = src.as_i32x8();
14713        let r = vcvtpd2dq(a, src, k, ROUNDING);
14714        transmute(r)
14715    }
14716}
14717
14718/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719///
14720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726///
14727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728#[inline]
14729#[target_feature(enable = "avx512f")]
14730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14731#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14732#[rustc_legacy_const_generics(2)]
14733pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734    unsafe {
14735        static_assert_rounding!(ROUNDING);
14736        let a = a.as_f64x8();
14737        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
14738        transmute(r)
14739    }
14740}
14741
14742/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743///
14744/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750///
14751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752#[inline]
14753#[target_feature(enable = "avx512f")]
14754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14755#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14756#[rustc_legacy_const_generics(1)]
14757pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758    unsafe {
14759        static_assert_rounding!(ROUNDING);
14760        let a = a.as_f64x8();
14761        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
14762        transmute(r)
14763    }
14764}
14765
14766/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767///
14768/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776#[inline]
14777#[target_feature(enable = "avx512f")]
14778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14779#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14780#[rustc_legacy_const_generics(3)]
14781pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782    src: __m256i,
14783    k: __mmask8,
14784    a: __m512d,
14785) -> __m256i {
14786    unsafe {
14787        static_assert_rounding!(ROUNDING);
14788        let a = a.as_f64x8();
14789        let src = src.as_u32x8();
14790        let r = vcvtpd2udq(a, src, k, ROUNDING);
14791        transmute(r)
14792    }
14793}
14794
14795/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796///
14797/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803///
14804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805#[inline]
14806#[target_feature(enable = "avx512f")]
14807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14808#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14809#[rustc_legacy_const_generics(2)]
14810pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811    unsafe {
14812        static_assert_rounding!(ROUNDING);
14813        let a = a.as_f64x8();
14814        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
14815        transmute(r)
14816    }
14817}
14818
14819/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820///
14821/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827///
14828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829#[inline]
14830#[target_feature(enable = "avx512f")]
14831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14832#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14833#[rustc_legacy_const_generics(1)]
14834pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835    unsafe {
14836        static_assert_rounding!(ROUNDING);
14837        let a = a.as_f64x8();
14838        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
14839        transmute(r)
14840    }
14841}
14842
14843/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844///
14845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851///
14852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853#[inline]
14854#[target_feature(enable = "avx512f")]
14855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14856#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14857#[rustc_legacy_const_generics(3)]
14858pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859    src: __m256,
14860    k: __mmask8,
14861    a: __m512d,
14862) -> __m256 {
14863    unsafe {
14864        static_assert_rounding!(ROUNDING);
14865        let a = a.as_f64x8();
14866        let src = src.as_f32x8();
14867        let r = vcvtpd2ps(a, src, k, ROUNDING);
14868        transmute(r)
14869    }
14870}
14871
14872/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873///
14874/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880///
14881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882#[inline]
14883#[target_feature(enable = "avx512f")]
14884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14885#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14886#[rustc_legacy_const_generics(2)]
14887pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888    unsafe {
14889        static_assert_rounding!(ROUNDING);
14890        let a = a.as_f64x8();
14891        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
14892        transmute(r)
14893    }
14894}
14895
14896/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897///
14898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906#[inline]
14907#[target_feature(enable = "avx512f")]
14908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14909#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14910#[rustc_legacy_const_generics(1)]
14911pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912    unsafe {
14913        static_assert_rounding!(ROUNDING);
14914        let a = a.as_i32x16();
14915        let r = vcvtdq2ps(a, ROUNDING);
14916        transmute(r)
14917    }
14918}
14919
14920/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921///
14922/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928///
14929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930#[inline]
14931#[target_feature(enable = "avx512f")]
14932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14933#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14934#[rustc_legacy_const_generics(3)]
14935pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936    src: __m512,
14937    k: __mmask16,
14938    a: __m512i,
14939) -> __m512 {
14940    unsafe {
14941        static_assert_rounding!(ROUNDING);
14942        let a = a.as_i32x16();
14943        let r = vcvtdq2ps(a, ROUNDING);
14944        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
14945    }
14946}
14947
14948/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949///
14950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956///
14957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958#[inline]
14959#[target_feature(enable = "avx512f")]
14960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14961#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14962#[rustc_legacy_const_generics(2)]
14963pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964    unsafe {
14965        static_assert_rounding!(ROUNDING);
14966        let a = a.as_i32x16();
14967        let r = vcvtdq2ps(a, ROUNDING);
14968        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
14969    }
14970}
14971
14972/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973///
14974/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14985#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14986#[rustc_legacy_const_generics(1)]
14987pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988    unsafe {
14989        static_assert_rounding!(ROUNDING);
14990        let a = a.as_u32x16();
14991        let r = vcvtudq2ps(a, ROUNDING);
14992        transmute(r)
14993    }
14994}
14995
14996/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997///
14998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004///
15005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006#[inline]
15007#[target_feature(enable = "avx512f")]
15008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15009#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15010#[rustc_legacy_const_generics(3)]
15011pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012    src: __m512,
15013    k: __mmask16,
15014    a: __m512i,
15015) -> __m512 {
15016    unsafe {
15017        static_assert_rounding!(ROUNDING);
15018        let a = a.as_u32x16();
15019        let r = vcvtudq2ps(a, ROUNDING);
15020        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15021    }
15022}
15023
15024/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025///
15026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032///
15033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034#[inline]
15035#[target_feature(enable = "avx512f")]
15036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15037#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15038#[rustc_legacy_const_generics(2)]
15039pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040    unsafe {
15041        static_assert_rounding!(ROUNDING);
15042        let a = a.as_u32x16();
15043        let r = vcvtudq2ps(a, ROUNDING);
15044        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15045    }
15046}
15047
15048/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15050///
15051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15052#[inline]
15053#[target_feature(enable = "avx512f")]
15054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15055#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15056#[rustc_legacy_const_generics(1)]
15057pub fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
15058    unsafe {
15059        static_assert_sae!(SAE);
15060        let a = a.as_f32x16();
15061        let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
15062        transmute(r)
15063    }
15064}
15065
15066/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15067/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15068///
15069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15070#[inline]
15071#[target_feature(enable = "avx512f")]
15072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15073#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15074#[rustc_legacy_const_generics(3)]
15075pub fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
15076    src: __m256i,
15077    k: __mmask16,
15078    a: __m512,
15079) -> __m256i {
15080    unsafe {
15081        static_assert_sae!(SAE);
15082        let a = a.as_f32x16();
15083        let src = src.as_i16x16();
15084        let r = vcvtps2ph(a, SAE, src, k);
15085        transmute(r)
15086    }
15087}
15088
15089/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15090/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15091///
15092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15093#[inline]
15094#[target_feature(enable = "avx512f")]
15095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15096#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15097#[rustc_legacy_const_generics(2)]
15098pub fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15099    unsafe {
15100        static_assert_sae!(SAE);
15101        let a = a.as_f32x16();
15102        let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
15103        transmute(r)
15104    }
15105}
15106
15107/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15108/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15109/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15110/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15111/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15112/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15113/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15114///
15115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15116#[inline]
15117#[target_feature(enable = "avx512f,avx512vl")]
15118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15119#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15120#[rustc_legacy_const_generics(3)]
15121pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15122    src: __m128i,
15123    k: __mmask8,
15124    a: __m256,
15125) -> __m128i {
15126    unsafe {
15127        static_assert_uimm_bits!(IMM8, 8);
15128        let a = a.as_f32x8();
15129        let src = src.as_i16x8();
15130        let r = vcvtps2ph256(a, IMM8, src, k);
15131        transmute(r)
15132    }
15133}
15134
15135/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15136/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15137/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15138/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15139/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15140/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15141/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15142///
15143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15144#[inline]
15145#[target_feature(enable = "avx512f,avx512vl")]
15146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15147#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15148#[rustc_legacy_const_generics(2)]
15149pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15150    unsafe {
15151        static_assert_uimm_bits!(IMM8, 8);
15152        let a = a.as_f32x8();
15153        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15154        transmute(r)
15155    }
15156}
15157
15158/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15159/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15160/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15161/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15162/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15163/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15164/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15165///
15166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15167#[inline]
15168#[target_feature(enable = "avx512f,avx512vl")]
15169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15170#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15171#[rustc_legacy_const_generics(3)]
15172pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15173    unsafe {
15174        static_assert_uimm_bits!(IMM8, 8);
15175        let a = a.as_f32x4();
15176        let src = src.as_i16x8();
15177        let r = vcvtps2ph128(a, IMM8, src, k);
15178        transmute(r)
15179    }
15180}
15181
15182/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15183/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15184/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15185/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15186/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15187/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15188/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15189///
15190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15191#[inline]
15192#[target_feature(enable = "avx512f,avx512vl")]
15193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15194#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15195#[rustc_legacy_const_generics(2)]
15196pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15197    unsafe {
15198        static_assert_uimm_bits!(IMM8, 8);
15199        let a = a.as_f32x4();
15200        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15201        transmute(r)
15202    }
15203}
15204
15205/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15206/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15207///
15208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15209#[inline]
15210#[target_feature(enable = "avx512f")]
15211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15212#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15213#[rustc_legacy_const_generics(1)]
15214pub fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
15215    unsafe {
15216        static_assert_sae!(SAE);
15217        let a = a.as_f32x16();
15218        let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
15219        transmute(r)
15220    }
15221}
15222
15223/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15225///
15226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15227#[inline]
15228#[target_feature(enable = "avx512f")]
15229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15230#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15231#[rustc_legacy_const_generics(3)]
15232pub fn _mm512_mask_cvtps_ph<const SAE: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15233    unsafe {
15234        static_assert_sae!(SAE);
15235        let a = a.as_f32x16();
15236        let src = src.as_i16x16();
15237        let r = vcvtps2ph(a, SAE, src, k);
15238        transmute(r)
15239    }
15240}
15241
15242/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15243/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15244///
15245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15246#[inline]
15247#[target_feature(enable = "avx512f")]
15248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15249#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15250#[rustc_legacy_const_generics(2)]
15251pub fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15252    unsafe {
15253        static_assert_sae!(SAE);
15254        let a = a.as_f32x16();
15255        let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
15256        transmute(r)
15257    }
15258}
15259
15260/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15261/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15262/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15263/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15264/// * [`_MM_FROUND_TO_POS_INF`] : round up
15265/// * [`_MM_FROUND_TO_ZERO`] : truncate
15266/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15267///
15268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15269#[inline]
15270#[target_feature(enable = "avx512f,avx512vl")]
15271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15272#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15273#[rustc_legacy_const_generics(3)]
15274pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15275    unsafe {
15276        static_assert_uimm_bits!(IMM8, 8);
15277        let a = a.as_f32x8();
15278        let src = src.as_i16x8();
15279        let r = vcvtps2ph256(a, IMM8, src, k);
15280        transmute(r)
15281    }
15282}
15283
15284/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15285/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15286/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15287/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15288/// * [`_MM_FROUND_TO_POS_INF`] : round up
15289/// * [`_MM_FROUND_TO_ZERO`] : truncate
15290/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15291///
15292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15293#[inline]
15294#[target_feature(enable = "avx512f,avx512vl")]
15295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15296#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15297#[rustc_legacy_const_generics(2)]
15298pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15299    unsafe {
15300        static_assert_uimm_bits!(IMM8, 8);
15301        let a = a.as_f32x8();
15302        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15303        transmute(r)
15304    }
15305}
15306
15307/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15308/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15309/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15310/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15311/// * [`_MM_FROUND_TO_POS_INF`] : round up
15312/// * [`_MM_FROUND_TO_ZERO`] : truncate
15313/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15314///
15315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15316#[inline]
15317#[target_feature(enable = "avx512f,avx512vl")]
15318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15319#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15320#[rustc_legacy_const_generics(3)]
15321pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15322    unsafe {
15323        static_assert_uimm_bits!(IMM8, 8);
15324        let a = a.as_f32x4();
15325        let src = src.as_i16x8();
15326        let r = vcvtps2ph128(a, IMM8, src, k);
15327        transmute(r)
15328    }
15329}
15330
15331/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15332/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15333/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15334/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15335/// * [`_MM_FROUND_TO_POS_INF`] : round up
15336/// * [`_MM_FROUND_TO_ZERO`] : truncate
15337/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15338///
15339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15340#[inline]
15341#[target_feature(enable = "avx512f,avx512vl")]
15342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15343#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15344#[rustc_legacy_const_generics(2)]
15345pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15346    unsafe {
15347        static_assert_uimm_bits!(IMM8, 8);
15348        let a = a.as_f32x4();
15349        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15350        transmute(r)
15351    }
15352}
15353
15354/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15355/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15356///
15357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15358#[inline]
15359#[target_feature(enable = "avx512f")]
15360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15361#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15362#[rustc_legacy_const_generics(1)]
15363pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15364    unsafe {
15365        static_assert_sae!(SAE);
15366        let a = a.as_i16x16();
15367        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15368        transmute(r)
15369    }
15370}
15371
15372/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15373/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15374///
15375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15376#[inline]
15377#[target_feature(enable = "avx512f")]
15378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15379#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15380#[rustc_legacy_const_generics(3)]
15381pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15382    unsafe {
15383        static_assert_sae!(SAE);
15384        let a = a.as_i16x16();
15385        let src = src.as_f32x16();
15386        let r = vcvtph2ps(a, src, k, SAE);
15387        transmute(r)
15388    }
15389}
15390
15391/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15393///
15394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15395#[inline]
15396#[target_feature(enable = "avx512f")]
15397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15398#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15399#[rustc_legacy_const_generics(2)]
15400pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15401    unsafe {
15402        static_assert_sae!(SAE);
15403        let a = a.as_i16x16();
15404        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15405        transmute(r)
15406    }
15407}
15408
15409/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15410///
15411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15412#[inline]
15413#[target_feature(enable = "avx512f")]
15414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15415#[cfg_attr(test, assert_instr(vcvtph2ps))]
15416pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15417    unsafe {
15418        transmute(vcvtph2ps(
15419            a.as_i16x16(),
15420            f32x16::ZERO,
15421            0b11111111_11111111,
15422            _MM_FROUND_NO_EXC,
15423        ))
15424    }
15425}
15426
15427/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15428///
15429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15430#[inline]
15431#[target_feature(enable = "avx512f")]
15432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15433#[cfg_attr(test, assert_instr(vcvtph2ps))]
15434pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15435    unsafe {
15436        transmute(vcvtph2ps(
15437            a.as_i16x16(),
15438            src.as_f32x16(),
15439            k,
15440            _MM_FROUND_NO_EXC,
15441        ))
15442    }
15443}
15444
15445/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15446///
15447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15448#[inline]
15449#[target_feature(enable = "avx512f")]
15450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15451#[cfg_attr(test, assert_instr(vcvtph2ps))]
15452pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15453    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15454}
15455
15456/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15457///
15458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15459#[inline]
15460#[target_feature(enable = "avx512f,avx512vl")]
15461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15462#[cfg_attr(test, assert_instr(vcvtph2ps))]
15463pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15464    unsafe {
15465        let convert = _mm256_cvtph_ps(a);
15466        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15467    }
15468}
15469
15470/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15471///
15472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15473#[inline]
15474#[target_feature(enable = "avx512f,avx512vl")]
15475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15476#[cfg_attr(test, assert_instr(vcvtph2ps))]
15477pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15478    unsafe {
15479        let convert = _mm256_cvtph_ps(a);
15480        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15481    }
15482}
15483
15484/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15485///
15486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15487#[inline]
15488#[target_feature(enable = "avx512f,avx512vl")]
15489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15490#[cfg_attr(test, assert_instr(vcvtph2ps))]
15491pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15492    unsafe {
15493        let convert = _mm_cvtph_ps(a);
15494        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15495    }
15496}
15497
15498/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15499///
15500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15501#[inline]
15502#[target_feature(enable = "avx512f,avx512vl")]
15503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15504#[cfg_attr(test, assert_instr(vcvtph2ps))]
15505pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15506    unsafe {
15507        let convert = _mm_cvtph_ps(a);
15508        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15509    }
15510}
15511
15512/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15514///
15515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15516#[inline]
15517#[target_feature(enable = "avx512f")]
15518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15519#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15520#[rustc_legacy_const_generics(1)]
15521pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15522    unsafe {
15523        static_assert_sae!(SAE);
15524        let a = a.as_f32x16();
15525        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15526        transmute(r)
15527    }
15528}
15529
15530/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15532///
15533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15534#[inline]
15535#[target_feature(enable = "avx512f")]
15536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15537#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15538#[rustc_legacy_const_generics(3)]
15539pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15540    src: __m512i,
15541    k: __mmask16,
15542    a: __m512,
15543) -> __m512i {
15544    unsafe {
15545        static_assert_sae!(SAE);
15546        let a = a.as_f32x16();
15547        let src = src.as_i32x16();
15548        let r = vcvttps2dq(a, src, k, SAE);
15549        transmute(r)
15550    }
15551}
15552
15553/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15554/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15555///
15556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15557#[inline]
15558#[target_feature(enable = "avx512f")]
15559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15560#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15561#[rustc_legacy_const_generics(2)]
15562pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15563    unsafe {
15564        static_assert_sae!(SAE);
15565        let a = a.as_f32x16();
15566        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
15567        transmute(r)
15568    }
15569}
15570
15571/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15572/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15573///
15574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15575#[inline]
15576#[target_feature(enable = "avx512f")]
15577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15578#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15579#[rustc_legacy_const_generics(1)]
15580pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15581    unsafe {
15582        static_assert_sae!(SAE);
15583        let a = a.as_f32x16();
15584        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
15585        transmute(r)
15586    }
15587}
15588
15589/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15590/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15591///
15592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15593#[inline]
15594#[target_feature(enable = "avx512f")]
15595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15596#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15597#[rustc_legacy_const_generics(3)]
15598pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15599    src: __m512i,
15600    k: __mmask16,
15601    a: __m512,
15602) -> __m512i {
15603    unsafe {
15604        static_assert_sae!(SAE);
15605        let a = a.as_f32x16();
15606        let src = src.as_u32x16();
15607        let r = vcvttps2udq(a, src, k, SAE);
15608        transmute(r)
15609    }
15610}
15611
15612/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15613/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15614///
15615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15616#[inline]
15617#[target_feature(enable = "avx512f")]
15618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15619#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15620#[rustc_legacy_const_generics(2)]
15621pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15622    unsafe {
15623        static_assert_sae!(SAE);
15624        let a = a.as_f32x16();
15625        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
15626        transmute(r)
15627    }
15628}
15629
15630/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15631/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15632///
15633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15634#[inline]
15635#[target_feature(enable = "avx512f")]
15636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15637#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15638#[rustc_legacy_const_generics(1)]
15639pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15640    unsafe {
15641        static_assert_sae!(SAE);
15642        let a = a.as_f64x8();
15643        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
15644        transmute(r)
15645    }
15646}
15647
15648/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15649/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15650///
15651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15652#[inline]
15653#[target_feature(enable = "avx512f")]
15654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15655#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15656#[rustc_legacy_const_generics(3)]
15657pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15658    src: __m256i,
15659    k: __mmask8,
15660    a: __m512d,
15661) -> __m256i {
15662    unsafe {
15663        static_assert_sae!(SAE);
15664        let a = a.as_f64x8();
15665        let src = src.as_i32x8();
15666        let r = vcvttpd2dq(a, src, k, SAE);
15667        transmute(r)
15668    }
15669}
15670
15671/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15672/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15673///
15674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15675#[inline]
15676#[target_feature(enable = "avx512f")]
15677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15678#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15679#[rustc_legacy_const_generics(2)]
15680pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15681    unsafe {
15682        static_assert_sae!(SAE);
15683        let a = a.as_f64x8();
15684        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
15685        transmute(r)
15686    }
15687}
15688
15689/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15691///
15692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15693#[inline]
15694#[target_feature(enable = "avx512f")]
15695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15696#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15697#[rustc_legacy_const_generics(1)]
15698pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15699    unsafe {
15700        static_assert_sae!(SAE);
15701        let a = a.as_f64x8();
15702        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
15703        transmute(r)
15704    }
15705}
15706
15707/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15708/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15709///
15710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15711#[inline]
15712#[target_feature(enable = "avx512f")]
15713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15714#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15715#[rustc_legacy_const_generics(3)]
15716pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15717    src: __m256i,
15718    k: __mmask8,
15719    a: __m512d,
15720) -> __m256i {
15721    unsafe {
15722        static_assert_sae!(SAE);
15723        let a = a.as_f64x8();
15724        let src = src.as_i32x8();
15725        let r = vcvttpd2udq(a, src, k, SAE);
15726        transmute(r)
15727    }
15728}
15729
15730/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15731///
15732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15733#[inline]
15734#[target_feature(enable = "avx512f")]
15735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15736#[cfg_attr(test, assert_instr(vcvttps2dq))]
15737pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15738    unsafe {
15739        transmute(vcvttps2dq(
15740            a.as_f32x16(),
15741            i32x16::ZERO,
15742            0b11111111_11111111,
15743            _MM_FROUND_CUR_DIRECTION,
15744        ))
15745    }
15746}
15747
15748/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15749///
15750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15751#[inline]
15752#[target_feature(enable = "avx512f")]
15753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15754#[cfg_attr(test, assert_instr(vcvttps2dq))]
15755pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15756    unsafe {
15757        transmute(vcvttps2dq(
15758            a.as_f32x16(),
15759            src.as_i32x16(),
15760            k,
15761            _MM_FROUND_CUR_DIRECTION,
15762        ))
15763    }
15764}
15765
15766/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15767///
15768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15769#[inline]
15770#[target_feature(enable = "avx512f")]
15771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15772#[cfg_attr(test, assert_instr(vcvttps2dq))]
15773pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15774    unsafe {
15775        transmute(vcvttps2dq(
15776            a.as_f32x16(),
15777            i32x16::ZERO,
15778            k,
15779            _MM_FROUND_CUR_DIRECTION,
15780        ))
15781    }
15782}
15783
15784/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15785///
15786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15787#[inline]
15788#[target_feature(enable = "avx512f,avx512vl")]
15789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15790#[cfg_attr(test, assert_instr(vcvttps2dq))]
15791pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15792    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
15793}
15794
15795/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15796///
15797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15798#[inline]
15799#[target_feature(enable = "avx512f,avx512vl")]
15800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15801#[cfg_attr(test, assert_instr(vcvttps2dq))]
15802pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15803    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
15804}
15805
15806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15807///
15808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15809#[inline]
15810#[target_feature(enable = "avx512f,avx512vl")]
15811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15812#[cfg_attr(test, assert_instr(vcvttps2dq))]
15813pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15814    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
15815}
15816
15817/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15818///
15819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15820#[inline]
15821#[target_feature(enable = "avx512f,avx512vl")]
15822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15823#[cfg_attr(test, assert_instr(vcvttps2dq))]
15824pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15825    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
15826}
15827
15828/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15829///
15830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15831#[inline]
15832#[target_feature(enable = "avx512f")]
15833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15834#[cfg_attr(test, assert_instr(vcvttps2udq))]
15835pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15836    unsafe {
15837        transmute(vcvttps2udq(
15838            a.as_f32x16(),
15839            u32x16::ZERO,
15840            0b11111111_11111111,
15841            _MM_FROUND_CUR_DIRECTION,
15842        ))
15843    }
15844}
15845
15846/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15847///
15848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15849#[inline]
15850#[target_feature(enable = "avx512f")]
15851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15852#[cfg_attr(test, assert_instr(vcvttps2udq))]
15853pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15854    unsafe {
15855        transmute(vcvttps2udq(
15856            a.as_f32x16(),
15857            src.as_u32x16(),
15858            k,
15859            _MM_FROUND_CUR_DIRECTION,
15860        ))
15861    }
15862}
15863
15864/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15865///
15866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15867#[inline]
15868#[target_feature(enable = "avx512f")]
15869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15870#[cfg_attr(test, assert_instr(vcvttps2udq))]
15871pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15872    unsafe {
15873        transmute(vcvttps2udq(
15874            a.as_f32x16(),
15875            u32x16::ZERO,
15876            k,
15877            _MM_FROUND_CUR_DIRECTION,
15878        ))
15879    }
15880}
15881
15882/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15883///
15884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15885#[inline]
15886#[target_feature(enable = "avx512f,avx512vl")]
15887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15888#[cfg_attr(test, assert_instr(vcvttps2udq))]
15889pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15890    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
15891}
15892
15893/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15894///
15895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15896#[inline]
15897#[target_feature(enable = "avx512f,avx512vl")]
15898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15899#[cfg_attr(test, assert_instr(vcvttps2udq))]
15900pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15901    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
15902}
15903
15904/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15905///
15906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15907#[inline]
15908#[target_feature(enable = "avx512f,avx512vl")]
15909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15910#[cfg_attr(test, assert_instr(vcvttps2udq))]
15911pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15912    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
15913}
15914
15915/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15916///
15917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15918#[inline]
15919#[target_feature(enable = "avx512f,avx512vl")]
15920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15921#[cfg_attr(test, assert_instr(vcvttps2udq))]
15922pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15923    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
15924}
15925
15926/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15927///
15928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15929#[inline]
15930#[target_feature(enable = "avx512f,avx512vl")]
15931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15932#[cfg_attr(test, assert_instr(vcvttps2udq))]
15933pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15934    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
15935}
15936
15937/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15938///
15939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
15940#[inline]
15941#[target_feature(enable = "avx512f,avx512vl")]
15942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15943#[cfg_attr(test, assert_instr(vcvttps2udq))]
15944pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
15945    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
15946}
15947
15948/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15949/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15950///
15951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
15952#[inline]
15953#[target_feature(enable = "avx512f")]
15954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15955#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15956#[rustc_legacy_const_generics(2)]
15957pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15958    unsafe {
15959        static_assert_sae!(SAE);
15960        let a = a.as_f64x8();
15961        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
15962        transmute(r)
15963    }
15964}
15965
15966/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15967///
15968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
15969#[inline]
15970#[target_feature(enable = "avx512f")]
15971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15972#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15973pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
15974    unsafe {
15975        transmute(vcvttpd2dq(
15976            a.as_f64x8(),
15977            i32x8::ZERO,
15978            0b11111111,
15979            _MM_FROUND_CUR_DIRECTION,
15980        ))
15981    }
15982}
15983
15984/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15985///
15986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
15987#[inline]
15988#[target_feature(enable = "avx512f")]
15989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15990#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15991pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15992    unsafe {
15993        transmute(vcvttpd2dq(
15994            a.as_f64x8(),
15995            src.as_i32x8(),
15996            k,
15997            _MM_FROUND_CUR_DIRECTION,
15998        ))
15999    }
16000}
16001
16002/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16003///
16004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16005#[inline]
16006#[target_feature(enable = "avx512f")]
16007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16008#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16009pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16010    unsafe {
16011        transmute(vcvttpd2dq(
16012            a.as_f64x8(),
16013            i32x8::ZERO,
16014            k,
16015            _MM_FROUND_CUR_DIRECTION,
16016        ))
16017    }
16018}
16019
16020/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16021///
16022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16023#[inline]
16024#[target_feature(enable = "avx512f,avx512vl")]
16025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16026#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16027pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16028    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16029}
16030
16031/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16032///
16033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16034#[inline]
16035#[target_feature(enable = "avx512f,avx512vl")]
16036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16037#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16038pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16039    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16040}
16041
16042/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16043///
16044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16045#[inline]
16046#[target_feature(enable = "avx512f,avx512vl")]
16047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16048#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16049pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16050    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16051}
16052
16053/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16054///
16055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16056#[inline]
16057#[target_feature(enable = "avx512f,avx512vl")]
16058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16059#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16060pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16061    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16062}
16063
16064/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16065///
16066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16067#[inline]
16068#[target_feature(enable = "avx512f")]
16069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16070#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16071pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16072    unsafe {
16073        transmute(vcvttpd2udq(
16074            a.as_f64x8(),
16075            i32x8::ZERO,
16076            0b11111111,
16077            _MM_FROUND_CUR_DIRECTION,
16078        ))
16079    }
16080}
16081
16082/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16083///
16084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16085#[inline]
16086#[target_feature(enable = "avx512f")]
16087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16088#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16089pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16090    unsafe {
16091        transmute(vcvttpd2udq(
16092            a.as_f64x8(),
16093            src.as_i32x8(),
16094            k,
16095            _MM_FROUND_CUR_DIRECTION,
16096        ))
16097    }
16098}
16099
16100/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16101///
16102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16103#[inline]
16104#[target_feature(enable = "avx512f")]
16105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16106#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16107pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16108    unsafe {
16109        transmute(vcvttpd2udq(
16110            a.as_f64x8(),
16111            i32x8::ZERO,
16112            k,
16113            _MM_FROUND_CUR_DIRECTION,
16114        ))
16115    }
16116}
16117
16118/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16119///
16120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16121#[inline]
16122#[target_feature(enable = "avx512f,avx512vl")]
16123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16124#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16125pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16126    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16127}
16128
16129/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16130///
16131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16132#[inline]
16133#[target_feature(enable = "avx512f,avx512vl")]
16134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16135#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16136pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16137    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16138}
16139
16140/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16141///
16142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16143#[inline]
16144#[target_feature(enable = "avx512f,avx512vl")]
16145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16146#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16147pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16148    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16149}
16150
16151/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16152///
16153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16154#[inline]
16155#[target_feature(enable = "avx512f,avx512vl")]
16156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16157#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16158pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16159    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16160}
16161
16162/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16163///
16164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16165#[inline]
16166#[target_feature(enable = "avx512f,avx512vl")]
16167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16168#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16169pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16170    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16171}
16172
16173/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16174///
16175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16176#[inline]
16177#[target_feature(enable = "avx512f,avx512vl")]
16178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16179#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16180pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16181    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16182}
16183
16184/// Returns vector of type `__m512d` with all elements set to zero.
16185///
16186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16187#[inline]
16188#[target_feature(enable = "avx512f")]
16189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16190#[cfg_attr(test, assert_instr(vxorps))]
16191pub fn _mm512_setzero_pd() -> __m512d {
16192    // All-0 is a properly initialized __m512d
16193    unsafe { const { mem::zeroed() } }
16194}
16195
16196/// Returns vector of type `__m512` with all elements set to zero.
16197///
16198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16199#[inline]
16200#[target_feature(enable = "avx512f")]
16201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16202#[cfg_attr(test, assert_instr(vxorps))]
16203pub fn _mm512_setzero_ps() -> __m512 {
16204    // All-0 is a properly initialized __m512
16205    unsafe { const { mem::zeroed() } }
16206}
16207
16208/// Return vector of type `__m512` with all elements set to zero.
16209///
16210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16211#[inline]
16212#[target_feature(enable = "avx512f")]
16213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16214#[cfg_attr(test, assert_instr(vxorps))]
16215pub fn _mm512_setzero() -> __m512 {
16216    // All-0 is a properly initialized __m512
16217    unsafe { const { mem::zeroed() } }
16218}
16219
16220/// Returns vector of type `__m512i` with all elements set to zero.
16221///
16222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16223#[inline]
16224#[target_feature(enable = "avx512f")]
16225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16226#[cfg_attr(test, assert_instr(vxorps))]
16227pub fn _mm512_setzero_si512() -> __m512i {
16228    // All-0 is a properly initialized __m512i
16229    unsafe { const { mem::zeroed() } }
16230}
16231
16232/// Return vector of type `__m512i` with all elements set to zero.
16233///
16234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16235#[inline]
16236#[target_feature(enable = "avx512f")]
16237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16238#[cfg_attr(test, assert_instr(vxorps))]
16239pub fn _mm512_setzero_epi32() -> __m512i {
16240    // All-0 is a properly initialized __m512i
16241    unsafe { const { mem::zeroed() } }
16242}
16243
16244/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16245/// order.
16246///
16247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16248#[inline]
16249#[target_feature(enable = "avx512f")]
16250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16251pub fn _mm512_setr_epi32(
16252    e15: i32,
16253    e14: i32,
16254    e13: i32,
16255    e12: i32,
16256    e11: i32,
16257    e10: i32,
16258    e9: i32,
16259    e8: i32,
16260    e7: i32,
16261    e6: i32,
16262    e5: i32,
16263    e4: i32,
16264    e3: i32,
16265    e2: i32,
16266    e1: i32,
16267    e0: i32,
16268) -> __m512i {
16269    unsafe {
16270        let r = i32x16::new(
16271            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16272        );
16273        transmute(r)
16274    }
16275}
16276
16277/// Set packed 8-bit integers in dst with the supplied values.
16278///
16279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16280#[inline]
16281#[target_feature(enable = "avx512f")]
16282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16283pub fn _mm512_set_epi8(
16284    e63: i8,
16285    e62: i8,
16286    e61: i8,
16287    e60: i8,
16288    e59: i8,
16289    e58: i8,
16290    e57: i8,
16291    e56: i8,
16292    e55: i8,
16293    e54: i8,
16294    e53: i8,
16295    e52: i8,
16296    e51: i8,
16297    e50: i8,
16298    e49: i8,
16299    e48: i8,
16300    e47: i8,
16301    e46: i8,
16302    e45: i8,
16303    e44: i8,
16304    e43: i8,
16305    e42: i8,
16306    e41: i8,
16307    e40: i8,
16308    e39: i8,
16309    e38: i8,
16310    e37: i8,
16311    e36: i8,
16312    e35: i8,
16313    e34: i8,
16314    e33: i8,
16315    e32: i8,
16316    e31: i8,
16317    e30: i8,
16318    e29: i8,
16319    e28: i8,
16320    e27: i8,
16321    e26: i8,
16322    e25: i8,
16323    e24: i8,
16324    e23: i8,
16325    e22: i8,
16326    e21: i8,
16327    e20: i8,
16328    e19: i8,
16329    e18: i8,
16330    e17: i8,
16331    e16: i8,
16332    e15: i8,
16333    e14: i8,
16334    e13: i8,
16335    e12: i8,
16336    e11: i8,
16337    e10: i8,
16338    e9: i8,
16339    e8: i8,
16340    e7: i8,
16341    e6: i8,
16342    e5: i8,
16343    e4: i8,
16344    e3: i8,
16345    e2: i8,
16346    e1: i8,
16347    e0: i8,
16348) -> __m512i {
16349    unsafe {
16350        let r = i8x64::new(
16351            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16352            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16353            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16354            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16355        );
16356        transmute(r)
16357    }
16358}
16359
16360/// Set packed 16-bit integers in dst with the supplied values.
16361///
16362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16363#[inline]
16364#[target_feature(enable = "avx512f")]
16365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16366pub fn _mm512_set_epi16(
16367    e31: i16,
16368    e30: i16,
16369    e29: i16,
16370    e28: i16,
16371    e27: i16,
16372    e26: i16,
16373    e25: i16,
16374    e24: i16,
16375    e23: i16,
16376    e22: i16,
16377    e21: i16,
16378    e20: i16,
16379    e19: i16,
16380    e18: i16,
16381    e17: i16,
16382    e16: i16,
16383    e15: i16,
16384    e14: i16,
16385    e13: i16,
16386    e12: i16,
16387    e11: i16,
16388    e10: i16,
16389    e9: i16,
16390    e8: i16,
16391    e7: i16,
16392    e6: i16,
16393    e5: i16,
16394    e4: i16,
16395    e3: i16,
16396    e2: i16,
16397    e1: i16,
16398    e0: i16,
16399) -> __m512i {
16400    unsafe {
16401        let r = i16x32::new(
16402            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16403            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16404        );
16405        transmute(r)
16406    }
16407}
16408
16409/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16410///
16411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16412#[inline]
16413#[target_feature(enable = "avx512f")]
16414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16415pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16416    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16417}
16418
16419/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16425pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16426    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16427}
16428
16429/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16430///
16431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16432#[inline]
16433#[target_feature(enable = "avx512f")]
16434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16435pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16436    _mm512_set_pd(d, c, b, a, d, c, b, a)
16437}
16438
16439/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16440///
16441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16442#[inline]
16443#[target_feature(enable = "avx512f")]
16444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16445pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16446    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16447}
16448
16449/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16450///
16451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16452#[inline]
16453#[target_feature(enable = "avx512f")]
16454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16455pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16456    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16457}
16458
16459/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16460///
16461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16462#[inline]
16463#[target_feature(enable = "avx512f")]
16464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16465pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16466    _mm512_set_pd(a, b, c, d, a, b, c, d)
16467}
16468
16469/// Set packed 64-bit integers in dst with the supplied values.
16470///
16471/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16472#[inline]
16473#[target_feature(enable = "avx512f")]
16474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16475pub fn _mm512_set_epi64(
16476    e0: i64,
16477    e1: i64,
16478    e2: i64,
16479    e3: i64,
16480    e4: i64,
16481    e5: i64,
16482    e6: i64,
16483    e7: i64,
16484) -> __m512i {
16485    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16486}
16487
16488/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16489///
16490/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16491#[inline]
16492#[target_feature(enable = "avx512f")]
16493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16494pub fn _mm512_setr_epi64(
16495    e0: i64,
16496    e1: i64,
16497    e2: i64,
16498    e3: i64,
16499    e4: i64,
16500    e5: i64,
16501    e6: i64,
16502    e7: i64,
16503) -> __m512i {
16504    unsafe {
16505        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16506        transmute(r)
16507    }
16508}
16509
16510/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16511///
16512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16513#[inline]
16514#[target_feature(enable = "avx512f")]
16515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16516#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16517#[rustc_legacy_const_generics(2)]
16518pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(offsets: __m256i, slice: *const u8) -> __m512d {
16519    static_assert_imm8_scale!(SCALE);
16520    let zero = f64x8::ZERO;
16521    let neg_one = -1;
16522    let slice = slice as *const i8;
16523    let offsets = offsets.as_i32x8();
16524    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16525    transmute(r)
16526}
16527
16528/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16529///
16530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16531#[inline]
16532#[target_feature(enable = "avx512f")]
16533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16534#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16535#[rustc_legacy_const_generics(4)]
16536pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16537    src: __m512d,
16538    mask: __mmask8,
16539    offsets: __m256i,
16540    slice: *const u8,
16541) -> __m512d {
16542    static_assert_imm8_scale!(SCALE);
16543    let src = src.as_f64x8();
16544    let slice = slice as *const i8;
16545    let offsets = offsets.as_i32x8();
16546    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16547    transmute(r)
16548}
16549
16550/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16551///
16552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16553#[inline]
16554#[target_feature(enable = "avx512f")]
16555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16556#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16557#[rustc_legacy_const_generics(2)]
16558pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512d {
16559    static_assert_imm8_scale!(SCALE);
16560    let zero = f64x8::ZERO;
16561    let neg_one = -1;
16562    let slice = slice as *const i8;
16563    let offsets = offsets.as_i64x8();
16564    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
16565    transmute(r)
16566}
16567
16568/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16569///
16570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16571#[inline]
16572#[target_feature(enable = "avx512f")]
16573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16574#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16575#[rustc_legacy_const_generics(4)]
16576pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16577    src: __m512d,
16578    mask: __mmask8,
16579    offsets: __m512i,
16580    slice: *const u8,
16581) -> __m512d {
16582    static_assert_imm8_scale!(SCALE);
16583    let src = src.as_f64x8();
16584    let slice = slice as *const i8;
16585    let offsets = offsets.as_i64x8();
16586    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16587    transmute(r)
16588}
16589
16590/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16591///
16592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16593#[inline]
16594#[target_feature(enable = "avx512f")]
16595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16596#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16597#[rustc_legacy_const_generics(2)]
16598pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m256 {
16599    static_assert_imm8_scale!(SCALE);
16600    let zero = f32x8::ZERO;
16601    let neg_one = -1;
16602    let slice = slice as *const i8;
16603    let offsets = offsets.as_i64x8();
16604    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
16605    transmute(r)
16606}
16607
16608/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16609///
16610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16611#[inline]
16612#[target_feature(enable = "avx512f")]
16613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16614#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16615#[rustc_legacy_const_generics(4)]
16616pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16617    src: __m256,
16618    mask: __mmask8,
16619    offsets: __m512i,
16620    slice: *const u8,
16621) -> __m256 {
16622    static_assert_imm8_scale!(SCALE);
16623    let src = src.as_f32x8();
16624    let slice = slice as *const i8;
16625    let offsets = offsets.as_i64x8();
16626    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16627    transmute(r)
16628}
16629
16630/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16631///
16632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16633#[inline]
16634#[target_feature(enable = "avx512f")]
16635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16636#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16637#[rustc_legacy_const_generics(2)]
16638pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512 {
16639    static_assert_imm8_scale!(SCALE);
16640    let zero = f32x16::ZERO;
16641    let neg_one = -1;
16642    let slice = slice as *const i8;
16643    let offsets = offsets.as_i32x16();
16644    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
16645    transmute(r)
16646}
16647
16648/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16649///
16650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16651#[inline]
16652#[target_feature(enable = "avx512f")]
16653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16654#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16655#[rustc_legacy_const_generics(4)]
16656pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16657    src: __m512,
16658    mask: __mmask16,
16659    offsets: __m512i,
16660    slice: *const u8,
16661) -> __m512 {
16662    static_assert_imm8_scale!(SCALE);
16663    let src = src.as_f32x16();
16664    let slice = slice as *const i8;
16665    let offsets = offsets.as_i32x16();
16666    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16667    transmute(r)
16668}
16669
16670/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16671///
16672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16673#[inline]
16674#[target_feature(enable = "avx512f")]
16675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16676#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16677#[rustc_legacy_const_generics(2)]
16678pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16679    offsets: __m512i,
16680    slice: *const u8,
16681) -> __m512i {
16682    static_assert_imm8_scale!(SCALE);
16683    let zero = i32x16::ZERO;
16684    let neg_one = -1;
16685    let slice = slice as *const i8;
16686    let offsets = offsets.as_i32x16();
16687    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
16688    transmute(r)
16689}
16690
16691/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16692///
16693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16694#[inline]
16695#[target_feature(enable = "avx512f")]
16696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16697#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16698#[rustc_legacy_const_generics(4)]
16699pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16700    src: __m512i,
16701    mask: __mmask16,
16702    offsets: __m512i,
16703    slice: *const u8,
16704) -> __m512i {
16705    static_assert_imm8_scale!(SCALE);
16706    let src = src.as_i32x16();
16707    let mask = mask as i16;
16708    let slice = slice as *const i8;
16709    let offsets = offsets.as_i32x16();
16710    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
16711    transmute(r)
16712}
16713
16714/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16715///
16716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16717#[inline]
16718#[target_feature(enable = "avx512f")]
16719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16720#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16721#[rustc_legacy_const_generics(2)]
16722pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16723    offsets: __m256i,
16724    slice: *const u8,
16725) -> __m512i {
16726    static_assert_imm8_scale!(SCALE);
16727    let zero = i64x8::ZERO;
16728    let neg_one = -1;
16729    let slice = slice as *const i8;
16730    let offsets = offsets.as_i32x8();
16731    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
16732    transmute(r)
16733}
16734
16735/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16736///
16737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16738#[inline]
16739#[target_feature(enable = "avx512f")]
16740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16741#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16742#[rustc_legacy_const_generics(4)]
16743pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16744    src: __m512i,
16745    mask: __mmask8,
16746    offsets: __m256i,
16747    slice: *const u8,
16748) -> __m512i {
16749    static_assert_imm8_scale!(SCALE);
16750    let src = src.as_i64x8();
16751    let mask = mask as i8;
16752    let slice = slice as *const i8;
16753    let offsets = offsets.as_i32x8();
16754    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
16755    transmute(r)
16756}
16757
16758/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16759///
16760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16761#[inline]
16762#[target_feature(enable = "avx512f")]
16763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16764#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16765#[rustc_legacy_const_generics(2)]
16766pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16767    offsets: __m512i,
16768    slice: *const u8,
16769) -> __m512i {
16770    static_assert_imm8_scale!(SCALE);
16771    let zero = i64x8::ZERO;
16772    let neg_one = -1;
16773    let slice = slice as *const i8;
16774    let offsets = offsets.as_i64x8();
16775    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
16776    transmute(r)
16777}
16778
16779/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16780///
16781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16782#[inline]
16783#[target_feature(enable = "avx512f")]
16784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16785#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16786#[rustc_legacy_const_generics(4)]
16787pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16788    src: __m512i,
16789    mask: __mmask8,
16790    offsets: __m512i,
16791    slice: *const u8,
16792) -> __m512i {
16793    static_assert_imm8_scale!(SCALE);
16794    let src = src.as_i64x8();
16795    let mask = mask as i8;
16796    let slice = slice as *const i8;
16797    let offsets = offsets.as_i64x8();
16798    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
16799    transmute(r)
16800}
16801
16802/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16803///
16804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16805#[inline]
16806#[target_feature(enable = "avx512f")]
16807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16808#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16809#[rustc_legacy_const_generics(2)]
16810pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16811    offsets: __m512i,
16812    slice: *const u8,
16813) -> __m256i {
16814    static_assert_imm8_scale!(SCALE);
16815    let zeros = i32x8::ZERO;
16816    let neg_one = -1;
16817    let slice = slice as *const i8;
16818    let offsets = offsets.as_i64x8();
16819    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
16820    transmute(r)
16821}
16822
16823/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16824///
16825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16826#[inline]
16827#[target_feature(enable = "avx512f")]
16828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16829#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16830#[rustc_legacy_const_generics(4)]
16831pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16832    src: __m256i,
16833    mask: __mmask8,
16834    offsets: __m512i,
16835    slice: *const u8,
16836) -> __m256i {
16837    static_assert_imm8_scale!(SCALE);
16838    let src = src.as_i32x8();
16839    let mask = mask as i8;
16840    let slice = slice as *const i8;
16841    let offsets = offsets.as_i64x8();
16842    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
16843    transmute(r)
16844}
16845
16846/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16847///
16848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16849#[inline]
16850#[target_feature(enable = "avx512f")]
16851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16852#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16853#[rustc_legacy_const_generics(3)]
16854pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16855    slice: *mut u8,
16856    offsets: __m256i,
16857    src: __m512d,
16858) {
16859    static_assert_imm8_scale!(SCALE);
16860    let src = src.as_f64x8();
16861    let neg_one = -1;
16862    let slice = slice as *mut i8;
16863    let offsets = offsets.as_i32x8();
16864    vscatterdpd(slice, neg_one, offsets, src, SCALE);
16865}
16866
16867/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16868///
16869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16870#[inline]
16871#[target_feature(enable = "avx512f")]
16872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16873#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16874#[rustc_legacy_const_generics(4)]
16875pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16876    slice: *mut u8,
16877    mask: __mmask8,
16878    offsets: __m256i,
16879    src: __m512d,
16880) {
16881    static_assert_imm8_scale!(SCALE);
16882    let src = src.as_f64x8();
16883    let slice = slice as *mut i8;
16884    let offsets = offsets.as_i32x8();
16885    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16886}
16887
16888/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16889///
16890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16891#[inline]
16892#[target_feature(enable = "avx512f")]
16893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16894#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16895#[rustc_legacy_const_generics(3)]
16896pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16897    slice: *mut u8,
16898    offsets: __m512i,
16899    src: __m512d,
16900) {
16901    static_assert_imm8_scale!(SCALE);
16902    let src = src.as_f64x8();
16903    let neg_one = -1;
16904    let slice = slice as *mut i8;
16905    let offsets = offsets.as_i64x8();
16906    vscatterqpd(slice, neg_one, offsets, src, SCALE);
16907}
16908
16909/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16910///
16911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16912#[inline]
16913#[target_feature(enable = "avx512f")]
16914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16915#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16916#[rustc_legacy_const_generics(4)]
16917pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16918    slice: *mut u8,
16919    mask: __mmask8,
16920    offsets: __m512i,
16921    src: __m512d,
16922) {
16923    static_assert_imm8_scale!(SCALE);
16924    let src = src.as_f64x8();
16925    let slice = slice as *mut i8;
16926    let offsets = offsets.as_i64x8();
16927    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16928}
16929
16930/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16931///
16932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16933#[inline]
16934#[target_feature(enable = "avx512f")]
16935#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16936#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16937#[rustc_legacy_const_generics(3)]
16938pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
16939    slice: *mut u8,
16940    offsets: __m512i,
16941    src: __m512,
16942) {
16943    static_assert_imm8_scale!(SCALE);
16944    let src = src.as_f32x16();
16945    let neg_one = -1;
16946    let slice = slice as *mut i8;
16947    let offsets = offsets.as_i32x16();
16948    vscatterdps(slice, neg_one, offsets, src, SCALE);
16949}
16950
16951/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16952///
16953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
16954#[inline]
16955#[target_feature(enable = "avx512f")]
16956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16957#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16958#[rustc_legacy_const_generics(4)]
16959pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
16960    slice: *mut u8,
16961    mask: __mmask16,
16962    offsets: __m512i,
16963    src: __m512,
16964) {
16965    static_assert_imm8_scale!(SCALE);
16966    let src = src.as_f32x16();
16967    let slice = slice as *mut i8;
16968    let offsets = offsets.as_i32x16();
16969    vscatterdps(slice, mask as i16, offsets, src, SCALE);
16970}
16971
16972/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16973///
16974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
16975#[inline]
16976#[target_feature(enable = "avx512f")]
16977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16978#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16979#[rustc_legacy_const_generics(3)]
16980pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
16981    slice: *mut u8,
16982    offsets: __m512i,
16983    src: __m256,
16984) {
16985    static_assert_imm8_scale!(SCALE);
16986    let src = src.as_f32x8();
16987    let neg_one = -1;
16988    let slice = slice as *mut i8;
16989    let offsets = offsets.as_i64x8();
16990    vscatterqps(slice, neg_one, offsets, src, SCALE);
16991}
16992
16993/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16994///
16995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
16996#[inline]
16997#[target_feature(enable = "avx512f")]
16998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16999#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17000#[rustc_legacy_const_generics(4)]
17001pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17002    slice: *mut u8,
17003    mask: __mmask8,
17004    offsets: __m512i,
17005    src: __m256,
17006) {
17007    static_assert_imm8_scale!(SCALE);
17008    let src = src.as_f32x8();
17009    let slice = slice as *mut i8;
17010    let offsets = offsets.as_i64x8();
17011    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17012}
17013
17014/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17015///
17016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17017#[inline]
17018#[target_feature(enable = "avx512f")]
17019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17020#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17021#[rustc_legacy_const_generics(3)]
17022pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17023    slice: *mut u8,
17024    offsets: __m256i,
17025    src: __m512i,
17026) {
17027    static_assert_imm8_scale!(SCALE);
17028    let src = src.as_i64x8();
17029    let neg_one = -1;
17030    let slice = slice as *mut i8;
17031    let offsets = offsets.as_i32x8();
17032    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17033}
17034
17035/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17036///
17037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17038#[inline]
17039#[target_feature(enable = "avx512f")]
17040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17041#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17042#[rustc_legacy_const_generics(4)]
17043pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17044    slice: *mut u8,
17045    mask: __mmask8,
17046    offsets: __m256i,
17047    src: __m512i,
17048) {
17049    static_assert_imm8_scale!(SCALE);
17050    let src = src.as_i64x8();
17051    let mask = mask as i8;
17052    let slice = slice as *mut i8;
17053    let offsets = offsets.as_i32x8();
17054    vpscatterdq(slice, mask, offsets, src, SCALE);
17055}
17056
17057/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17058///
17059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17060#[inline]
17061#[target_feature(enable = "avx512f")]
17062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17063#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17064#[rustc_legacy_const_generics(3)]
17065pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17066    slice: *mut u8,
17067    offsets: __m512i,
17068    src: __m512i,
17069) {
17070    static_assert_imm8_scale!(SCALE);
17071    let src = src.as_i64x8();
17072    let neg_one = -1;
17073    let slice = slice as *mut i8;
17074    let offsets = offsets.as_i64x8();
17075    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17076}
17077
17078/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17079///
17080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17081#[inline]
17082#[target_feature(enable = "avx512f")]
17083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17084#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17085#[rustc_legacy_const_generics(4)]
17086pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17087    slice: *mut u8,
17088    mask: __mmask8,
17089    offsets: __m512i,
17090    src: __m512i,
17091) {
17092    static_assert_imm8_scale!(SCALE);
17093    let src = src.as_i64x8();
17094    let mask = mask as i8;
17095    let slice = slice as *mut i8;
17096    let offsets = offsets.as_i64x8();
17097    vpscatterqq(slice, mask, offsets, src, SCALE);
17098}
17099
17100/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17101///
17102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17103#[inline]
17104#[target_feature(enable = "avx512f")]
17105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17106#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17107#[rustc_legacy_const_generics(3)]
17108pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17109    slice: *mut u8,
17110    offsets: __m512i,
17111    src: __m512i,
17112) {
17113    static_assert_imm8_scale!(SCALE);
17114    let src = src.as_i32x16();
17115    let neg_one = -1;
17116    let slice = slice as *mut i8;
17117    let offsets = offsets.as_i32x16();
17118    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17119}
17120
17121/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17122///
17123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17124#[inline]
17125#[target_feature(enable = "avx512f")]
17126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17127#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17128#[rustc_legacy_const_generics(4)]
17129pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17130    slice: *mut u8,
17131    mask: __mmask16,
17132    offsets: __m512i,
17133    src: __m512i,
17134) {
17135    static_assert_imm8_scale!(SCALE);
17136    let src = src.as_i32x16();
17137    let mask = mask as i16;
17138    let slice = slice as *mut i8;
17139    let offsets = offsets.as_i32x16();
17140    vpscatterdd(slice, mask, offsets, src, SCALE);
17141}
17142
17143/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17144///
17145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17146#[inline]
17147#[target_feature(enable = "avx512f")]
17148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17149#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17150#[rustc_legacy_const_generics(3)]
17151pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17152    slice: *mut u8,
17153    offsets: __m512i,
17154    src: __m256i,
17155) {
17156    static_assert_imm8_scale!(SCALE);
17157    let src = src.as_i32x8();
17158    let neg_one = -1;
17159    let slice = slice as *mut i8;
17160    let offsets = offsets.as_i64x8();
17161    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17162}
17163
17164/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17165///
17166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17167#[inline]
17168#[target_feature(enable = "avx512f")]
17169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17170#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17171#[rustc_legacy_const_generics(4)]
17172pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17173    slice: *mut u8,
17174    mask: __mmask8,
17175    offsets: __m512i,
17176    src: __m256i,
17177) {
17178    static_assert_imm8_scale!(SCALE);
17179    let src = src.as_i32x8();
17180    let mask = mask as i8;
17181    let slice = slice as *mut i8;
17182    let offsets = offsets.as_i64x8();
17183    vpscatterqd(slice, mask, offsets, src, SCALE);
17184}
17185
17186/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17187/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17188///
17189/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17190#[inline]
17191#[target_feature(enable = "avx512f")]
17192#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17193#[rustc_legacy_const_generics(2)]
17194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17195pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17196    vindex: __m512i,
17197    base_addr: *const u8,
17198) -> __m512i {
17199    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr as _)
17200}
17201
17202/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17203/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17204/// (elements are copied from src when the corresponding mask bit is not set).
17205///
17206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17207#[inline]
17208#[target_feature(enable = "avx512f")]
17209#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17210#[rustc_legacy_const_generics(4)]
17211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17212pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17213    src: __m512i,
17214    k: __mmask8,
17215    vindex: __m512i,
17216    base_addr: *const u8,
17217) -> __m512i {
17218    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr as _)
17219}
17220
17221/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17222/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17223///
17224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17225#[inline]
17226#[target_feature(enable = "avx512f")]
17227#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17228#[rustc_legacy_const_generics(2)]
17229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17230pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17231    vindex: __m512i,
17232    base_addr: *const u8,
17233) -> __m512d {
17234    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr as _)
17235}
17236
17237/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17238/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17239/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17240///
17241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17242#[inline]
17243#[target_feature(enable = "avx512f")]
17244#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17245#[rustc_legacy_const_generics(4)]
17246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17247pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17248    src: __m512d,
17249    k: __mmask8,
17250    vindex: __m512i,
17251    base_addr: *const u8,
17252) -> __m512d {
17253    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr as _)
17254}
17255
17256/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17257/// indices stored in the lower half of vindex scaled by scale.
17258///
17259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17260#[inline]
17261#[target_feature(enable = "avx512f")]
17262#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17263#[rustc_legacy_const_generics(3)]
17264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17265pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17266    base_addr: *mut u8,
17267    vindex: __m512i,
17268    a: __m512i,
17269) {
17270    _mm512_i32scatter_epi64::<SCALE>(base_addr as _, _mm512_castsi512_si256(vindex), a)
17271}
17272
17273/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17274/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17275/// mask bit is not set are not written to memory).
17276///
17277/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17278#[inline]
17279#[target_feature(enable = "avx512f")]
17280#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17281#[rustc_legacy_const_generics(4)]
17282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17283pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17284    base_addr: *mut u8,
17285    k: __mmask8,
17286    vindex: __m512i,
17287    a: __m512i,
17288) {
17289    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr as _, k, _mm512_castsi512_si256(vindex), a)
17290}
17291
17292/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17293/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17294///
17295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17296#[inline]
17297#[target_feature(enable = "avx512f")]
17298#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17299#[rustc_legacy_const_generics(3)]
17300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17301pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17302    base_addr: *mut u8,
17303    vindex: __m512i,
17304    a: __m512d,
17305) {
17306    _mm512_i32scatter_pd::<SCALE>(base_addr as _, _mm512_castsi512_si256(vindex), a)
17307}
17308
17309/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17310/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17311/// (elements whose corresponding mask bit is not set are not written to memory).
17312///
17313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17314#[inline]
17315#[target_feature(enable = "avx512f")]
17316#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17317#[rustc_legacy_const_generics(4)]
17318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17319pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17320    base_addr: *mut u8,
17321    k: __mmask8,
17322    vindex: __m512i,
17323    a: __m512d,
17324) {
17325    _mm512_mask_i32scatter_pd::<SCALE>(base_addr as _, k, _mm512_castsi512_si256(vindex), a)
17326}
17327
17328/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17329/// indices stored in vindex scaled by scale
17330///
17331/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17332#[inline]
17333#[target_feature(enable = "avx512f,avx512vl")]
17334#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17335#[rustc_legacy_const_generics(3)]
17336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17337pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17338    base_addr: *mut u8,
17339    vindex: __m256i,
17340    a: __m256i,
17341) {
17342    static_assert_imm8_scale!(SCALE);
17343    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17344}
17345
17346/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17347/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17348/// are not written to memory).
17349///
17350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17351#[inline]
17352#[target_feature(enable = "avx512f,avx512vl")]
17353#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17354#[rustc_legacy_const_generics(4)]
17355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17356pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17357    base_addr: *mut u8,
17358    k: __mmask8,
17359    vindex: __m256i,
17360    a: __m256i,
17361) {
17362    static_assert_imm8_scale!(SCALE);
17363    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17364}
17365
17366/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17367///
17368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17369#[inline]
17370#[target_feature(enable = "avx512f,avx512vl")]
17371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17372#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17373#[rustc_legacy_const_generics(3)]
17374pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17375    slice: *mut u8,
17376    offsets: __m128i,
17377    src: __m256i,
17378) {
17379    static_assert_imm8_scale!(SCALE);
17380    let src = src.as_i64x4();
17381    let slice = slice as *mut i8;
17382    let offsets = offsets.as_i32x4();
17383    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17384}
17385
17386/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17387/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17388/// are not written to memory).
17389///
17390/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17391#[inline]
17392#[target_feature(enable = "avx512f,avx512vl")]
17393#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17394#[rustc_legacy_const_generics(4)]
17395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17396pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17397    base_addr: *mut u8,
17398    k: __mmask8,
17399    vindex: __m128i,
17400    a: __m256i,
17401) {
17402    static_assert_imm8_scale!(SCALE);
17403    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17404}
17405
17406/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17407/// at packed 32-bit integer indices stored in vindex scaled by scale
17408///
17409/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17410#[inline]
17411#[target_feature(enable = "avx512f,avx512vl")]
17412#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17413#[rustc_legacy_const_generics(3)]
17414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17415pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17416    base_addr: *mut u8,
17417    vindex: __m128i,
17418    a: __m256d,
17419) {
17420    static_assert_imm8_scale!(SCALE);
17421    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17422}
17423
17424/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17425/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17426/// mask bit is not set are not written to memory).
17427///
17428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17429#[inline]
17430#[target_feature(enable = "avx512f,avx512vl")]
17431#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17432#[rustc_legacy_const_generics(4)]
17433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17434pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17435    base_addr: *mut u8,
17436    k: __mmask8,
17437    vindex: __m128i,
17438    a: __m256d,
17439) {
17440    static_assert_imm8_scale!(SCALE);
17441    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17442}
17443
17444/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17445/// at packed 32-bit integer indices stored in vindex scaled by scale
17446///
17447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17448#[inline]
17449#[target_feature(enable = "avx512f,avx512vl")]
17450#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17451#[rustc_legacy_const_generics(3)]
17452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17453pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17454    base_addr: *mut u8,
17455    vindex: __m256i,
17456    a: __m256,
17457) {
17458    static_assert_imm8_scale!(SCALE);
17459    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17460}
17461
17462/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17463/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17464/// mask bit is not set are not written to memory).
17465///
17466/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17467#[inline]
17468#[target_feature(enable = "avx512f,avx512vl")]
17469#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17470#[rustc_legacy_const_generics(4)]
17471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17472pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17473    base_addr: *mut u8,
17474    k: __mmask8,
17475    vindex: __m256i,
17476    a: __m256,
17477) {
17478    static_assert_imm8_scale!(SCALE);
17479    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17480}
17481
17482/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17483/// indices stored in vindex scaled by scale
17484///
17485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17486#[inline]
17487#[target_feature(enable = "avx512f,avx512vl")]
17488#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17489#[rustc_legacy_const_generics(3)]
17490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17491pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17492    base_addr: *mut u8,
17493    vindex: __m256i,
17494    a: __m128i,
17495) {
17496    static_assert_imm8_scale!(SCALE);
17497    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17498}
17499
17500/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17501/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17502/// are not written to memory).
17503///
17504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17505#[inline]
17506#[target_feature(enable = "avx512f,avx512vl")]
17507#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17508#[rustc_legacy_const_generics(4)]
17509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17510pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17511    base_addr: *mut u8,
17512    k: __mmask8,
17513    vindex: __m256i,
17514    a: __m128i,
17515) {
17516    static_assert_imm8_scale!(SCALE);
17517    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17518}
17519
17520/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17521/// indices stored in vindex scaled by scale
17522///
17523/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17524#[inline]
17525#[target_feature(enable = "avx512f,avx512vl")]
17526#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17527#[rustc_legacy_const_generics(3)]
17528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17529pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17530    base_addr: *mut u8,
17531    vindex: __m256i,
17532    a: __m256i,
17533) {
17534    static_assert_imm8_scale!(SCALE);
17535    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17536}
17537
17538/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17539/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17540/// are not written to memory).
17541///
17542/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17543#[inline]
17544#[target_feature(enable = "avx512f,avx512vl")]
17545#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17546#[rustc_legacy_const_generics(4)]
17547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17548pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17549    base_addr: *mut u8,
17550    k: __mmask8,
17551    vindex: __m256i,
17552    a: __m256i,
17553) {
17554    static_assert_imm8_scale!(SCALE);
17555    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17556}
17557
17558/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17559/// at packed 64-bit integer indices stored in vindex scaled by scale
17560///
17561/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17562#[inline]
17563#[target_feature(enable = "avx512f,avx512vl")]
17564#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17565#[rustc_legacy_const_generics(3)]
17566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17567pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17568    base_addr: *mut u8,
17569    vindex: __m256i,
17570    a: __m256d,
17571) {
17572    static_assert_imm8_scale!(SCALE);
17573    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17574}
17575
17576/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17577/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17578/// mask bit is not set are not written to memory).
17579///
17580/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17581#[inline]
17582#[target_feature(enable = "avx512f,avx512vl")]
17583#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17584#[rustc_legacy_const_generics(4)]
17585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17586pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17587    base_addr: *mut u8,
17588    k: __mmask8,
17589    vindex: __m256i,
17590    a: __m256d,
17591) {
17592    static_assert_imm8_scale!(SCALE);
17593    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17594}
17595
17596/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17597/// at packed 64-bit integer indices stored in vindex scaled by scale
17598///
17599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17600#[inline]
17601#[target_feature(enable = "avx512f,avx512vl")]
17602#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17603#[rustc_legacy_const_generics(3)]
17604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17605pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17606    base_addr: *mut u8,
17607    vindex: __m256i,
17608    a: __m128,
17609) {
17610    static_assert_imm8_scale!(SCALE);
17611    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17612}
17613
17614/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17615/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17616/// mask bit is not set are not written to memory).
17617///
17618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17619#[inline]
17620#[target_feature(enable = "avx512f,avx512vl")]
17621#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17622#[rustc_legacy_const_generics(4)]
17623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17624pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17625    base_addr: *mut u8,
17626    k: __mmask8,
17627    vindex: __m256i,
17628    a: __m128,
17629) {
17630    static_assert_imm8_scale!(SCALE);
17631    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17632}
17633
17634/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17635/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17636/// mask bit is not set).
17637///
17638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17639#[inline]
17640#[target_feature(enable = "avx512f,avx512vl")]
17641#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17642#[rustc_legacy_const_generics(4)]
17643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17644pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17645    src: __m256i,
17646    k: __mmask8,
17647    vindex: __m256i,
17648    base_addr: *const u8,
17649) -> __m256i {
17650    static_assert_imm8_scale!(SCALE);
17651    transmute(vpgatherdd_256(
17652        src.as_i32x8(),
17653        base_addr as _,
17654        vindex.as_i32x8(),
17655        k,
17656        SCALE,
17657    ))
17658}
17659
17660/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17661/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17662/// mask bit is not set).
17663///
17664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17665#[inline]
17666#[target_feature(enable = "avx512f,avx512vl")]
17667#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17668#[rustc_legacy_const_generics(4)]
17669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17670pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17671    src: __m256i,
17672    k: __mmask8,
17673    vindex: __m128i,
17674    base_addr: *const u8,
17675) -> __m256i {
17676    static_assert_imm8_scale!(SCALE);
17677    transmute(vpgatherdq_256(
17678        src.as_i64x4(),
17679        base_addr as _,
17680        vindex.as_i32x4(),
17681        k,
17682        SCALE,
17683    ))
17684}
17685
17686/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17687/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17688/// from src when the corresponding mask bit is not set).
17689///
17690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17691#[inline]
17692#[target_feature(enable = "avx512f,avx512vl")]
17693#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17694#[rustc_legacy_const_generics(4)]
17695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17696pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17697    src: __m256d,
17698    k: __mmask8,
17699    vindex: __m128i,
17700    base_addr: *const u8,
17701) -> __m256d {
17702    static_assert_imm8_scale!(SCALE);
17703    transmute(vgatherdpd_256(
17704        src.as_f64x4(),
17705        base_addr as _,
17706        vindex.as_i32x4(),
17707        k,
17708        SCALE,
17709    ))
17710}
17711
17712/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17713/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17714/// from src when the corresponding mask bit is not set).
17715///
17716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17717#[inline]
17718#[target_feature(enable = "avx512f,avx512vl")]
17719#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17720#[rustc_legacy_const_generics(4)]
17721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17722pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17723    src: __m256,
17724    k: __mmask8,
17725    vindex: __m256i,
17726    base_addr: *const u8,
17727) -> __m256 {
17728    static_assert_imm8_scale!(SCALE);
17729    transmute(vgatherdps_256(
17730        src.as_f32x8(),
17731        base_addr as _,
17732        vindex.as_i32x8(),
17733        k,
17734        SCALE,
17735    ))
17736}
17737
17738/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17739/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17740/// mask bit is not set).
17741///
17742/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17743#[inline]
17744#[target_feature(enable = "avx512f,avx512vl")]
17745#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17746#[rustc_legacy_const_generics(4)]
17747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17748pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17749    src: __m128i,
17750    k: __mmask8,
17751    vindex: __m256i,
17752    base_addr: *const u8,
17753) -> __m128i {
17754    static_assert_imm8_scale!(SCALE);
17755    transmute(vpgatherqd_256(
17756        src.as_i32x4(),
17757        base_addr as _,
17758        vindex.as_i64x4(),
17759        k,
17760        SCALE,
17761    ))
17762}
17763
17764/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17765/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17766/// mask bit is not set).
17767///
17768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17769#[inline]
17770#[target_feature(enable = "avx512f,avx512vl")]
17771#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17772#[rustc_legacy_const_generics(4)]
17773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17774pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17775    src: __m256i,
17776    k: __mmask8,
17777    vindex: __m256i,
17778    base_addr: *const u8,
17779) -> __m256i {
17780    static_assert_imm8_scale!(SCALE);
17781    transmute(vpgatherqq_256(
17782        src.as_i64x4(),
17783        base_addr as _,
17784        vindex.as_i64x4(),
17785        k,
17786        SCALE,
17787    ))
17788}
17789
17790/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17791/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17792/// from src when the corresponding mask bit is not set).
17793///
17794/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17795#[inline]
17796#[target_feature(enable = "avx512f,avx512vl")]
17797#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17798#[rustc_legacy_const_generics(4)]
17799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17800pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17801    src: __m256d,
17802    k: __mmask8,
17803    vindex: __m256i,
17804    base_addr: *const u8,
17805) -> __m256d {
17806    static_assert_imm8_scale!(SCALE);
17807    transmute(vgatherqpd_256(
17808        src.as_f64x4(),
17809        base_addr as _,
17810        vindex.as_i64x4(),
17811        k,
17812        SCALE,
17813    ))
17814}
17815
17816/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17817/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17818/// from src when the corresponding mask bit is not set).
17819///
17820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17821#[inline]
17822#[target_feature(enable = "avx512f,avx512vl")]
17823#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17824#[rustc_legacy_const_generics(4)]
17825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17826pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17827    src: __m128,
17828    k: __mmask8,
17829    vindex: __m256i,
17830    base_addr: *const u8,
17831) -> __m128 {
17832    static_assert_imm8_scale!(SCALE);
17833    transmute(vgatherqps_256(
17834        src.as_f32x4(),
17835        base_addr as _,
17836        vindex.as_i64x4(),
17837        k,
17838        SCALE,
17839    ))
17840}
17841
17842/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17843/// indices stored in vindex scaled by scale
17844///
17845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17846#[inline]
17847#[target_feature(enable = "avx512f,avx512vl")]
17848#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17849#[rustc_legacy_const_generics(3)]
17850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17851pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17852    base_addr: *mut u8,
17853    vindex: __m128i,
17854    a: __m128i,
17855) {
17856    static_assert_imm8_scale!(SCALE);
17857    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17858}
17859
17860/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17861/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17862/// are not written to memory).
17863///
17864/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17865#[inline]
17866#[target_feature(enable = "avx512f,avx512vl")]
17867#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17868#[rustc_legacy_const_generics(4)]
17869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17870pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17871    base_addr: *mut u8,
17872    k: __mmask8,
17873    vindex: __m128i,
17874    a: __m128i,
17875) {
17876    static_assert_imm8_scale!(SCALE);
17877    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17878}
17879
17880/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17881/// indices stored in vindex scaled by scale
17882///
17883/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17884#[inline]
17885#[target_feature(enable = "avx512f,avx512vl")]
17886#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17887#[rustc_legacy_const_generics(3)]
17888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17889pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17890    base_addr: *mut u8,
17891    vindex: __m128i,
17892    a: __m128i,
17893) {
17894    static_assert_imm8_scale!(SCALE);
17895    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17896}
17897
17898/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17899/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17900/// are not written to memory).
17901///
17902/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17903#[inline]
17904#[target_feature(enable = "avx512f,avx512vl")]
17905#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17906#[rustc_legacy_const_generics(4)]
17907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17908pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17909    base_addr: *mut u8,
17910    k: __mmask8,
17911    vindex: __m128i,
17912    a: __m128i,
17913) {
17914    static_assert_imm8_scale!(SCALE);
17915    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17916}
17917
17918/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17919/// at packed 32-bit integer indices stored in vindex scaled by scale
17920///
17921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17922#[inline]
17923#[target_feature(enable = "avx512f,avx512vl")]
17924#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17925#[rustc_legacy_const_generics(3)]
17926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17927pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
17928    static_assert_imm8_scale!(SCALE);
17929    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
17930}
17931
17932/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17933/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17934/// mask bit is not set are not written to memory).
17935///
17936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
17937#[inline]
17938#[target_feature(enable = "avx512f,avx512vl")]
17939#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17940#[rustc_legacy_const_generics(4)]
17941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17942pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
17943    base_addr: *mut u8,
17944    k: __mmask8,
17945    vindex: __m128i,
17946    a: __m128d,
17947) {
17948    static_assert_imm8_scale!(SCALE);
17949    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
17950}
17951
17952/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17953/// at packed 32-bit integer indices stored in vindex scaled by scale
17954///
17955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
17956#[inline]
17957#[target_feature(enable = "avx512f,avx512vl")]
17958#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17959#[rustc_legacy_const_generics(3)]
17960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17961pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
17962    static_assert_imm8_scale!(SCALE);
17963    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
17964}
17965
17966/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17967/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17968/// mask bit is not set are not written to memory).
17969///
17970/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
17971#[inline]
17972#[target_feature(enable = "avx512f,avx512vl")]
17973#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17974#[rustc_legacy_const_generics(4)]
17975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17976pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
17977    base_addr: *mut u8,
17978    k: __mmask8,
17979    vindex: __m128i,
17980    a: __m128,
17981) {
17982    static_assert_imm8_scale!(SCALE);
17983    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
17984}
17985
17986/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17987/// indices stored in vindex scaled by scale
17988///
17989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
17990#[inline]
17991#[target_feature(enable = "avx512f,avx512vl")]
17992#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17993#[rustc_legacy_const_generics(3)]
17994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17995pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
17996    base_addr: *mut u8,
17997    vindex: __m128i,
17998    a: __m128i,
17999) {
18000    static_assert_imm8_scale!(SCALE);
18001    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18002}
18003
18004/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18005/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18006/// are not written to memory).
18007///
18008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18009#[inline]
18010#[target_feature(enable = "avx512f,avx512vl")]
18011#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18012#[rustc_legacy_const_generics(4)]
18013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18014pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18015    base_addr: *mut u8,
18016    k: __mmask8,
18017    vindex: __m128i,
18018    a: __m128i,
18019) {
18020    static_assert_imm8_scale!(SCALE);
18021    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18022}
18023
18024/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18025/// indices stored in vindex scaled by scale
18026///
18027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18028#[inline]
18029#[target_feature(enable = "avx512f,avx512vl")]
18030#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18031#[rustc_legacy_const_generics(3)]
18032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18033pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18034    base_addr: *mut u8,
18035    vindex: __m128i,
18036    a: __m128i,
18037) {
18038    static_assert_imm8_scale!(SCALE);
18039    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18040}
18041
18042/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18043/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18044/// are not written to memory).
18045///
18046/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18047#[inline]
18048#[target_feature(enable = "avx512f,avx512vl")]
18049#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18050#[rustc_legacy_const_generics(4)]
18051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18052pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18053    base_addr: *mut u8,
18054    k: __mmask8,
18055    vindex: __m128i,
18056    a: __m128i,
18057) {
18058    static_assert_imm8_scale!(SCALE);
18059    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18060}
18061
18062/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18063/// at packed 64-bit integer indices stored in vindex scaled by scale
18064///
18065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18066#[inline]
18067#[target_feature(enable = "avx512f,avx512vl")]
18068#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18069#[rustc_legacy_const_generics(3)]
18070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18071pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
18072    static_assert_imm8_scale!(SCALE);
18073    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18074}
18075
18076/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18077/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18078/// mask bit is not set are not written to memory).
18079///
18080/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18081#[inline]
18082#[target_feature(enable = "avx512f,avx512vl")]
18083#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18084#[rustc_legacy_const_generics(4)]
18085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18086pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18087    base_addr: *mut u8,
18088    k: __mmask8,
18089    vindex: __m128i,
18090    a: __m128d,
18091) {
18092    static_assert_imm8_scale!(SCALE);
18093    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18094}
18095
18096/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18097/// at packed 64-bit integer indices stored in vindex scaled by scale
18098///
18099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18100#[inline]
18101#[target_feature(enable = "avx512f,avx512vl")]
18102#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18103#[rustc_legacy_const_generics(3)]
18104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18105pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
18106    static_assert_imm8_scale!(SCALE);
18107    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18108}
18109
18110/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18111/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18112///
18113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18114#[inline]
18115#[target_feature(enable = "avx512f,avx512vl")]
18116#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18117#[rustc_legacy_const_generics(4)]
18118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18119pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18120    base_addr: *mut u8,
18121    k: __mmask8,
18122    vindex: __m128i,
18123    a: __m128,
18124) {
18125    static_assert_imm8_scale!(SCALE);
18126    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18127}
18128
18129/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18130/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18131/// mask bit is not set).
18132///
18133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18134#[inline]
18135#[target_feature(enable = "avx512f,avx512vl")]
18136#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18137#[rustc_legacy_const_generics(4)]
18138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18139pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18140    src: __m128i,
18141    k: __mmask8,
18142    vindex: __m128i,
18143    base_addr: *const u8,
18144) -> __m128i {
18145    static_assert_imm8_scale!(SCALE);
18146    transmute(vpgatherdd_128(
18147        src.as_i32x4(),
18148        base_addr as _,
18149        vindex.as_i32x4(),
18150        k,
18151        SCALE,
18152    ))
18153}
18154
18155/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18156/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18157/// mask bit is not set).
18158///
18159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18160#[inline]
18161#[target_feature(enable = "avx512f,avx512vl")]
18162#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18163#[rustc_legacy_const_generics(4)]
18164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18165pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18166    src: __m128i,
18167    k: __mmask8,
18168    vindex: __m128i,
18169    base_addr: *const u8,
18170) -> __m128i {
18171    static_assert_imm8_scale!(SCALE);
18172    transmute(vpgatherdq_128(
18173        src.as_i64x2(),
18174        base_addr as _,
18175        vindex.as_i32x4(),
18176        k,
18177        SCALE,
18178    ))
18179}
18180
18181/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18182/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18183/// from src when the corresponding mask bit is not set).
18184///
18185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18186#[inline]
18187#[target_feature(enable = "avx512f,avx512vl")]
18188#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18189#[rustc_legacy_const_generics(4)]
18190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18191pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18192    src: __m128d,
18193    k: __mmask8,
18194    vindex: __m128i,
18195    base_addr: *const u8,
18196) -> __m128d {
18197    static_assert_imm8_scale!(SCALE);
18198    transmute(vgatherdpd_128(
18199        src.as_f64x2(),
18200        base_addr as _,
18201        vindex.as_i32x4(),
18202        k,
18203        SCALE,
18204    ))
18205}
18206
18207/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18208/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18209/// from src when the corresponding mask bit is not set).
18210///
18211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18212#[inline]
18213#[target_feature(enable = "avx512f,avx512vl")]
18214#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18215#[rustc_legacy_const_generics(4)]
18216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18217pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18218    src: __m128,
18219    k: __mmask8,
18220    vindex: __m128i,
18221    base_addr: *const u8,
18222) -> __m128 {
18223    static_assert_imm8_scale!(SCALE);
18224    transmute(vgatherdps_128(
18225        src.as_f32x4(),
18226        base_addr as _,
18227        vindex.as_i32x4(),
18228        k,
18229        SCALE,
18230    ))
18231}
18232
18233/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18234/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18235/// mask bit is not set).
18236///
18237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18238#[inline]
18239#[target_feature(enable = "avx512f,avx512vl")]
18240#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18241#[rustc_legacy_const_generics(4)]
18242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18243pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18244    src: __m128i,
18245    k: __mmask8,
18246    vindex: __m128i,
18247    base_addr: *const u8,
18248) -> __m128i {
18249    static_assert_imm8_scale!(SCALE);
18250    transmute(vpgatherqd_128(
18251        src.as_i32x4(),
18252        base_addr as _,
18253        vindex.as_i64x2(),
18254        k,
18255        SCALE,
18256    ))
18257}
18258
18259/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18260/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18261/// mask bit is not set).
18262///
18263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18264#[inline]
18265#[target_feature(enable = "avx512f,avx512vl")]
18266#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18267#[rustc_legacy_const_generics(4)]
18268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18269pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18270    src: __m128i,
18271    k: __mmask8,
18272    vindex: __m128i,
18273    base_addr: *const u8,
18274) -> __m128i {
18275    static_assert_imm8_scale!(SCALE);
18276    transmute(vpgatherqq_128(
18277        src.as_i64x2(),
18278        base_addr as _,
18279        vindex.as_i64x2(),
18280        k,
18281        SCALE,
18282    ))
18283}
18284
18285/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18286/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18287/// from src when the corresponding mask bit is not set).
18288///
18289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18290#[inline]
18291#[target_feature(enable = "avx512f,avx512vl")]
18292#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18293#[rustc_legacy_const_generics(4)]
18294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18295pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18296    src: __m128d,
18297    k: __mmask8,
18298    vindex: __m128i,
18299    base_addr: *const u8,
18300) -> __m128d {
18301    static_assert_imm8_scale!(SCALE);
18302    transmute(vgatherqpd_128(
18303        src.as_f64x2(),
18304        base_addr as _,
18305        vindex.as_i64x2(),
18306        k,
18307        SCALE,
18308    ))
18309}
18310
18311/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18312/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18313/// from src when the corresponding mask bit is not set).
18314///
18315/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18316#[inline]
18317#[target_feature(enable = "avx512f,avx512vl")]
18318#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18319#[rustc_legacy_const_generics(4)]
18320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18321pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18322    src: __m128,
18323    k: __mmask8,
18324    vindex: __m128i,
18325    base_addr: *const u8,
18326) -> __m128 {
18327    static_assert_imm8_scale!(SCALE);
18328    transmute(vgatherqps_128(
18329        src.as_f32x4(),
18330        base_addr as _,
18331        vindex.as_i64x2(),
18332        k,
18333        SCALE,
18334    ))
18335}
18336
18337/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18338///
18339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18340#[inline]
18341#[target_feature(enable = "avx512f")]
18342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18343#[cfg_attr(test, assert_instr(vpcompressd))]
18344pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18345    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18346}
18347
18348/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18349///
18350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18351#[inline]
18352#[target_feature(enable = "avx512f")]
18353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18354#[cfg_attr(test, assert_instr(vpcompressd))]
18355pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18356    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18357}
18358
18359/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18360///
18361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18362#[inline]
18363#[target_feature(enable = "avx512f,avx512vl")]
18364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18365#[cfg_attr(test, assert_instr(vpcompressd))]
18366pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18367    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18368}
18369
18370/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18371///
18372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18373#[inline]
18374#[target_feature(enable = "avx512f,avx512vl")]
18375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18376#[cfg_attr(test, assert_instr(vpcompressd))]
18377pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18378    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18379}
18380
18381/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18382///
18383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18384#[inline]
18385#[target_feature(enable = "avx512f,avx512vl")]
18386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18387#[cfg_attr(test, assert_instr(vpcompressd))]
18388pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18389    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18390}
18391
18392/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18393///
18394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18395#[inline]
18396#[target_feature(enable = "avx512f,avx512vl")]
18397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18398#[cfg_attr(test, assert_instr(vpcompressd))]
18399pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18400    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18401}
18402
18403/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18404///
18405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18406#[inline]
18407#[target_feature(enable = "avx512f")]
18408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18409#[cfg_attr(test, assert_instr(vpcompressq))]
18410pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18411    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18412}
18413
18414/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18415///
18416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18417#[inline]
18418#[target_feature(enable = "avx512f")]
18419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18420#[cfg_attr(test, assert_instr(vpcompressq))]
18421pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18422    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18423}
18424
18425/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18426///
18427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18428#[inline]
18429#[target_feature(enable = "avx512f,avx512vl")]
18430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18431#[cfg_attr(test, assert_instr(vpcompressq))]
18432pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18433    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18434}
18435
18436/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18437///
18438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18439#[inline]
18440#[target_feature(enable = "avx512f,avx512vl")]
18441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18442#[cfg_attr(test, assert_instr(vpcompressq))]
18443pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18444    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18445}
18446
18447/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18448///
18449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18450#[inline]
18451#[target_feature(enable = "avx512f,avx512vl")]
18452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18453#[cfg_attr(test, assert_instr(vpcompressq))]
18454pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18455    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18456}
18457
18458/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18459///
18460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18461#[inline]
18462#[target_feature(enable = "avx512f,avx512vl")]
18463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18464#[cfg_attr(test, assert_instr(vpcompressq))]
18465pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18466    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18467}
18468
18469/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18470///
18471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18472#[inline]
18473#[target_feature(enable = "avx512f")]
18474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18475#[cfg_attr(test, assert_instr(vcompressps))]
18476pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18477    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18478}
18479
18480/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18481///
18482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18483#[inline]
18484#[target_feature(enable = "avx512f")]
18485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18486#[cfg_attr(test, assert_instr(vcompressps))]
18487pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18488    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18489}
18490
18491/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18492///
18493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18494#[inline]
18495#[target_feature(enable = "avx512f,avx512vl")]
18496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18497#[cfg_attr(test, assert_instr(vcompressps))]
18498pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18499    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18500}
18501
18502/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18503///
18504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18505#[inline]
18506#[target_feature(enable = "avx512f,avx512vl")]
18507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18508#[cfg_attr(test, assert_instr(vcompressps))]
18509pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18510    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18511}
18512
18513/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18514///
18515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18516#[inline]
18517#[target_feature(enable = "avx512f,avx512vl")]
18518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18519#[cfg_attr(test, assert_instr(vcompressps))]
18520pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18521    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
18522}
18523
18524/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18525///
18526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18527#[inline]
18528#[target_feature(enable = "avx512f,avx512vl")]
18529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18530#[cfg_attr(test, assert_instr(vcompressps))]
18531pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18532    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
18533}
18534
18535/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18536///
18537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18538#[inline]
18539#[target_feature(enable = "avx512f")]
18540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18541#[cfg_attr(test, assert_instr(vcompresspd))]
18542pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18543    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
18544}
18545
18546/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18547///
18548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18549#[inline]
18550#[target_feature(enable = "avx512f")]
18551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18552#[cfg_attr(test, assert_instr(vcompresspd))]
18553pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18554    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
18555}
18556
18557/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18558///
18559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18560#[inline]
18561#[target_feature(enable = "avx512f,avx512vl")]
18562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18563#[cfg_attr(test, assert_instr(vcompresspd))]
18564pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18565    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
18566}
18567
18568/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18569///
18570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18571#[inline]
18572#[target_feature(enable = "avx512f,avx512vl")]
18573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18574#[cfg_attr(test, assert_instr(vcompresspd))]
18575pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18576    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
18577}
18578
18579/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18580///
18581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18582#[inline]
18583#[target_feature(enable = "avx512f,avx512vl")]
18584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18585#[cfg_attr(test, assert_instr(vcompresspd))]
18586pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18587    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
18588}
18589
18590/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18591///
18592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18593#[inline]
18594#[target_feature(enable = "avx512f,avx512vl")]
18595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18596#[cfg_attr(test, assert_instr(vcompresspd))]
18597pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18598    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
18599}
18600
18601/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18602///
18603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18604#[inline]
18605#[target_feature(enable = "avx512f")]
18606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18607#[cfg_attr(test, assert_instr(vpcompressd))]
18608pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
18609    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
18610}
18611
18612/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18613///
18614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18615#[inline]
18616#[target_feature(enable = "avx512f,avx512vl")]
18617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18618#[cfg_attr(test, assert_instr(vpcompressd))]
18619pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
18620    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
18621}
18622
18623/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18624///
18625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18626#[inline]
18627#[target_feature(enable = "avx512f,avx512vl")]
18628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18629#[cfg_attr(test, assert_instr(vpcompressd))]
18630pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
18631    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
18632}
18633
18634/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18635///
18636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18637#[inline]
18638#[target_feature(enable = "avx512f")]
18639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18640#[cfg_attr(test, assert_instr(vpcompressq))]
18641pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
18642    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
18643}
18644
18645/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18646///
18647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18648#[inline]
18649#[target_feature(enable = "avx512f,avx512vl")]
18650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18651#[cfg_attr(test, assert_instr(vpcompressq))]
18652pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
18653    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
18654}
18655
18656/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18657///
18658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18659#[inline]
18660#[target_feature(enable = "avx512f,avx512vl")]
18661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18662#[cfg_attr(test, assert_instr(vpcompressq))]
18663pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
18664    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
18665}
18666
18667/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18668///
18669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18670#[inline]
18671#[target_feature(enable = "avx512f")]
18672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18673#[cfg_attr(test, assert_instr(vcompressps))]
18674pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
18675    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
18676}
18677
18678/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18679///
18680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18681#[inline]
18682#[target_feature(enable = "avx512f,avx512vl")]
18683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18684#[cfg_attr(test, assert_instr(vcompressps))]
18685pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
18686    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
18687}
18688
18689/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18690///
18691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18692#[inline]
18693#[target_feature(enable = "avx512f,avx512vl")]
18694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18695#[cfg_attr(test, assert_instr(vcompressps))]
18696pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
18697    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
18698}
18699
18700/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18701///
18702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18703#[inline]
18704#[target_feature(enable = "avx512f")]
18705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18706#[cfg_attr(test, assert_instr(vcompresspd))]
18707pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
18708    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
18709}
18710
18711/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18712///
18713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18714#[inline]
18715#[target_feature(enable = "avx512f,avx512vl")]
18716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18717#[cfg_attr(test, assert_instr(vcompresspd))]
18718pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
18719    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
18720}
18721
18722/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18723///
18724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18725#[inline]
18726#[target_feature(enable = "avx512f,avx512vl")]
18727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18728#[cfg_attr(test, assert_instr(vcompresspd))]
18729pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
18730    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
18731}
18732
18733/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18734///
18735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18736#[inline]
18737#[target_feature(enable = "avx512f")]
18738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18739#[cfg_attr(test, assert_instr(vpexpandd))]
18740pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18741    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
18742}
18743
18744/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18745///
18746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18747#[inline]
18748#[target_feature(enable = "avx512f")]
18749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18750#[cfg_attr(test, assert_instr(vpexpandd))]
18751pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18752    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
18753}
18754
18755/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18756///
18757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18758#[inline]
18759#[target_feature(enable = "avx512f,avx512vl")]
18760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18761#[cfg_attr(test, assert_instr(vpexpandd))]
18762pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18763    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
18764}
18765
18766/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18767///
18768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18769#[inline]
18770#[target_feature(enable = "avx512f,avx512vl")]
18771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18772#[cfg_attr(test, assert_instr(vpexpandd))]
18773pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18774    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
18775}
18776
18777/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18778///
18779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18780#[inline]
18781#[target_feature(enable = "avx512f,avx512vl")]
18782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18783#[cfg_attr(test, assert_instr(vpexpandd))]
18784pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18785    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
18786}
18787
18788/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18789///
18790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18791#[inline]
18792#[target_feature(enable = "avx512f,avx512vl")]
18793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18794#[cfg_attr(test, assert_instr(vpexpandd))]
18795pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18796    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
18797}
18798
18799/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18800///
18801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18802#[inline]
18803#[target_feature(enable = "avx512f")]
18804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18805#[cfg_attr(test, assert_instr(vpexpandq))]
18806pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18807    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
18808}
18809
18810/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18811///
18812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18813#[inline]
18814#[target_feature(enable = "avx512f")]
18815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18816#[cfg_attr(test, assert_instr(vpexpandq))]
18817pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18818    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
18819}
18820
18821/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18822///
18823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18824#[inline]
18825#[target_feature(enable = "avx512f,avx512vl")]
18826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18827#[cfg_attr(test, assert_instr(vpexpandq))]
18828pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18829    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
18830}
18831
18832/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18833///
18834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18835#[inline]
18836#[target_feature(enable = "avx512f,avx512vl")]
18837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18838#[cfg_attr(test, assert_instr(vpexpandq))]
18839pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18840    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
18841}
18842
18843/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18844///
18845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18846#[inline]
18847#[target_feature(enable = "avx512f,avx512vl")]
18848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18849#[cfg_attr(test, assert_instr(vpexpandq))]
18850pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18851    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
18852}
18853
18854/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18855///
18856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18857#[inline]
18858#[target_feature(enable = "avx512f,avx512vl")]
18859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18860#[cfg_attr(test, assert_instr(vpexpandq))]
18861pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18862    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
18863}
18864
18865/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18866///
18867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18868#[inline]
18869#[target_feature(enable = "avx512f")]
18870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18871#[cfg_attr(test, assert_instr(vexpandps))]
18872pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18873    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
18874}
18875
18876/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18877///
18878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18879#[inline]
18880#[target_feature(enable = "avx512f")]
18881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18882#[cfg_attr(test, assert_instr(vexpandps))]
18883pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18884    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
18885}
18886
18887/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18888///
18889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18890#[inline]
18891#[target_feature(enable = "avx512f,avx512vl")]
18892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18893#[cfg_attr(test, assert_instr(vexpandps))]
18894pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18895    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
18896}
18897
18898/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18899///
18900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18901#[inline]
18902#[target_feature(enable = "avx512f,avx512vl")]
18903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18904#[cfg_attr(test, assert_instr(vexpandps))]
18905pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18906    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
18907}
18908
18909/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18910///
18911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18912#[inline]
18913#[target_feature(enable = "avx512f,avx512vl")]
18914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18915#[cfg_attr(test, assert_instr(vexpandps))]
18916pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18917    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
18918}
18919
18920/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18921///
18922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18923#[inline]
18924#[target_feature(enable = "avx512f,avx512vl")]
18925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18926#[cfg_attr(test, assert_instr(vexpandps))]
18927pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
18928    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
18929}
18930
18931/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18932///
18933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
18934#[inline]
18935#[target_feature(enable = "avx512f")]
18936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18937#[cfg_attr(test, assert_instr(vexpandpd))]
18938pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18939    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
18940}
18941
18942/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18943///
18944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
18945#[inline]
18946#[target_feature(enable = "avx512f")]
18947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18948#[cfg_attr(test, assert_instr(vexpandpd))]
18949pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
18950    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
18951}
18952
18953/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18954///
18955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
18956#[inline]
18957#[target_feature(enable = "avx512f,avx512vl")]
18958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18959#[cfg_attr(test, assert_instr(vexpandpd))]
18960pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18961    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
18962}
18963
18964/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18965///
18966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
18967#[inline]
18968#[target_feature(enable = "avx512f,avx512vl")]
18969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18970#[cfg_attr(test, assert_instr(vexpandpd))]
18971pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
18972    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
18973}
18974
18975/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18976///
18977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
18978#[inline]
18979#[target_feature(enable = "avx512f,avx512vl")]
18980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18981#[cfg_attr(test, assert_instr(vexpandpd))]
18982pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18983    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
18984}
18985
18986/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18987///
18988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
18989#[inline]
18990#[target_feature(enable = "avx512f,avx512vl")]
18991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18992#[cfg_attr(test, assert_instr(vexpandpd))]
18993pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
18994    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
18995}
18996
18997/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18998///
18999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19000#[inline]
19001#[target_feature(enable = "avx512f")]
19002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19003#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19004#[rustc_legacy_const_generics(1)]
19005pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19006    unsafe {
19007        static_assert_uimm_bits!(IMM8, 8);
19008        let a = a.as_i32x16();
19009        let r = vprold(a, IMM8);
19010        transmute(r)
19011    }
19012}
19013
19014/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19015///
19016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19017#[inline]
19018#[target_feature(enable = "avx512f")]
19019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19020#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19021#[rustc_legacy_const_generics(3)]
19022pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19023    unsafe {
19024        static_assert_uimm_bits!(IMM8, 8);
19025        let a = a.as_i32x16();
19026        let r = vprold(a, IMM8);
19027        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19028    }
19029}
19030
19031/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19032///
19033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19034#[inline]
19035#[target_feature(enable = "avx512f")]
19036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19037#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19038#[rustc_legacy_const_generics(2)]
19039pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19040    unsafe {
19041        static_assert_uimm_bits!(IMM8, 8);
19042        let a = a.as_i32x16();
19043        let r = vprold(a, IMM8);
19044        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19045    }
19046}
19047
19048/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19049///
19050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19051#[inline]
19052#[target_feature(enable = "avx512f,avx512vl")]
19053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19054#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19055#[rustc_legacy_const_generics(1)]
19056pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19057    unsafe {
19058        static_assert_uimm_bits!(IMM8, 8);
19059        let a = a.as_i32x8();
19060        let r = vprold256(a, IMM8);
19061        transmute(r)
19062    }
19063}
19064
19065/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19066///
19067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19068#[inline]
19069#[target_feature(enable = "avx512f,avx512vl")]
19070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19071#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19072#[rustc_legacy_const_generics(3)]
19073pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19074    unsafe {
19075        static_assert_uimm_bits!(IMM8, 8);
19076        let a = a.as_i32x8();
19077        let r = vprold256(a, IMM8);
19078        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19079    }
19080}
19081
19082/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19083///
19084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19085#[inline]
19086#[target_feature(enable = "avx512f,avx512vl")]
19087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19088#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19089#[rustc_legacy_const_generics(2)]
19090pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19091    unsafe {
19092        static_assert_uimm_bits!(IMM8, 8);
19093        let a = a.as_i32x8();
19094        let r = vprold256(a, IMM8);
19095        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19096    }
19097}
19098
19099/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19100///
19101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19102#[inline]
19103#[target_feature(enable = "avx512f,avx512vl")]
19104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19105#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19106#[rustc_legacy_const_generics(1)]
19107pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19108    unsafe {
19109        static_assert_uimm_bits!(IMM8, 8);
19110        let a = a.as_i32x4();
19111        let r = vprold128(a, IMM8);
19112        transmute(r)
19113    }
19114}
19115
19116/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19117///
19118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19119#[inline]
19120#[target_feature(enable = "avx512f,avx512vl")]
19121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19122#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19123#[rustc_legacy_const_generics(3)]
19124pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19125    unsafe {
19126        static_assert_uimm_bits!(IMM8, 8);
19127        let a = a.as_i32x4();
19128        let r = vprold128(a, IMM8);
19129        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19130    }
19131}
19132
19133/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19134///
19135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19136#[inline]
19137#[target_feature(enable = "avx512f,avx512vl")]
19138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19139#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19140#[rustc_legacy_const_generics(2)]
19141pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19142    unsafe {
19143        static_assert_uimm_bits!(IMM8, 8);
19144        let a = a.as_i32x4();
19145        let r = vprold128(a, IMM8);
19146        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19147    }
19148}
19149
19150/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19151///
19152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19153#[inline]
19154#[target_feature(enable = "avx512f")]
19155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19156#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19157#[rustc_legacy_const_generics(1)]
19158pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19159    unsafe {
19160        static_assert_uimm_bits!(IMM8, 8);
19161        let a = a.as_i32x16();
19162        let r = vprord(a, IMM8);
19163        transmute(r)
19164    }
19165}
19166
19167/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19168///
19169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19170#[inline]
19171#[target_feature(enable = "avx512f")]
19172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19173#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19174#[rustc_legacy_const_generics(3)]
19175pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19176    unsafe {
19177        static_assert_uimm_bits!(IMM8, 8);
19178        let a = a.as_i32x16();
19179        let r = vprord(a, IMM8);
19180        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19181    }
19182}
19183
19184/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19185///
19186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19187#[inline]
19188#[target_feature(enable = "avx512f")]
19189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19190#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19191#[rustc_legacy_const_generics(2)]
19192pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19193    unsafe {
19194        static_assert_uimm_bits!(IMM8, 8);
19195        let a = a.as_i32x16();
19196        let r = vprord(a, IMM8);
19197        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19198    }
19199}
19200
19201/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19202///
19203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19204#[inline]
19205#[target_feature(enable = "avx512f,avx512vl")]
19206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19207#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19208#[rustc_legacy_const_generics(1)]
19209pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19210    unsafe {
19211        static_assert_uimm_bits!(IMM8, 8);
19212        let a = a.as_i32x8();
19213        let r = vprord256(a, IMM8);
19214        transmute(r)
19215    }
19216}
19217
19218/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19219///
19220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19221#[inline]
19222#[target_feature(enable = "avx512f,avx512vl")]
19223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19224#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19225#[rustc_legacy_const_generics(3)]
19226pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19227    unsafe {
19228        static_assert_uimm_bits!(IMM8, 8);
19229        let a = a.as_i32x8();
19230        let r = vprord256(a, IMM8);
19231        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19232    }
19233}
19234
19235/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19236///
19237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19238#[inline]
19239#[target_feature(enable = "avx512f,avx512vl")]
19240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19241#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19242#[rustc_legacy_const_generics(2)]
19243pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19244    unsafe {
19245        static_assert_uimm_bits!(IMM8, 8);
19246        let a = a.as_i32x8();
19247        let r = vprord256(a, IMM8);
19248        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19249    }
19250}
19251
19252/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19253///
19254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19255#[inline]
19256#[target_feature(enable = "avx512f,avx512vl")]
19257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19258#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19259#[rustc_legacy_const_generics(1)]
19260pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19261    unsafe {
19262        static_assert_uimm_bits!(IMM8, 8);
19263        let a = a.as_i32x4();
19264        let r = vprord128(a, IMM8);
19265        transmute(r)
19266    }
19267}
19268
19269/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19270///
19271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19272#[inline]
19273#[target_feature(enable = "avx512f,avx512vl")]
19274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19275#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19276#[rustc_legacy_const_generics(3)]
19277pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19278    unsafe {
19279        static_assert_uimm_bits!(IMM8, 8);
19280        let a = a.as_i32x4();
19281        let r = vprord128(a, IMM8);
19282        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19283    }
19284}
19285
19286/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19287///
19288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19289#[inline]
19290#[target_feature(enable = "avx512f,avx512vl")]
19291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19292#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19293#[rustc_legacy_const_generics(2)]
19294pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19295    unsafe {
19296        static_assert_uimm_bits!(IMM8, 8);
19297        let a = a.as_i32x4();
19298        let r = vprord128(a, IMM8);
19299        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19300    }
19301}
19302
19303/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19304///
19305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19306#[inline]
19307#[target_feature(enable = "avx512f")]
19308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19309#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19310#[rustc_legacy_const_generics(1)]
19311pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19312    unsafe {
19313        static_assert_uimm_bits!(IMM8, 8);
19314        let a = a.as_i64x8();
19315        let r = vprolq(a, IMM8);
19316        transmute(r)
19317    }
19318}
19319
19320/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19321///
19322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19323#[inline]
19324#[target_feature(enable = "avx512f")]
19325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19326#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19327#[rustc_legacy_const_generics(3)]
19328pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19329    unsafe {
19330        static_assert_uimm_bits!(IMM8, 8);
19331        let a = a.as_i64x8();
19332        let r = vprolq(a, IMM8);
19333        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19334    }
19335}
19336
19337/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338///
19339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19340#[inline]
19341#[target_feature(enable = "avx512f")]
19342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19343#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19344#[rustc_legacy_const_generics(2)]
19345pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19346    unsafe {
19347        static_assert_uimm_bits!(IMM8, 8);
19348        let a = a.as_i64x8();
19349        let r = vprolq(a, IMM8);
19350        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19351    }
19352}
19353
19354/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19355///
19356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19357#[inline]
19358#[target_feature(enable = "avx512f,avx512vl")]
19359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19360#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19361#[rustc_legacy_const_generics(1)]
19362pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19363    unsafe {
19364        static_assert_uimm_bits!(IMM8, 8);
19365        let a = a.as_i64x4();
19366        let r = vprolq256(a, IMM8);
19367        transmute(r)
19368    }
19369}
19370
19371/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19372///
19373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19374#[inline]
19375#[target_feature(enable = "avx512f,avx512vl")]
19376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19377#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19378#[rustc_legacy_const_generics(3)]
19379pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19380    unsafe {
19381        static_assert_uimm_bits!(IMM8, 8);
19382        let a = a.as_i64x4();
19383        let r = vprolq256(a, IMM8);
19384        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19385    }
19386}
19387
19388/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19389///
19390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19391#[inline]
19392#[target_feature(enable = "avx512f,avx512vl")]
19393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19394#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19395#[rustc_legacy_const_generics(2)]
19396pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19397    unsafe {
19398        static_assert_uimm_bits!(IMM8, 8);
19399        let a = a.as_i64x4();
19400        let r = vprolq256(a, IMM8);
19401        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19402    }
19403}
19404
19405/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19406///
19407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19408#[inline]
19409#[target_feature(enable = "avx512f,avx512vl")]
19410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19411#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19412#[rustc_legacy_const_generics(1)]
19413pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19414    unsafe {
19415        static_assert_uimm_bits!(IMM8, 8);
19416        let a = a.as_i64x2();
19417        let r = vprolq128(a, IMM8);
19418        transmute(r)
19419    }
19420}
19421
19422/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19423///
19424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19425#[inline]
19426#[target_feature(enable = "avx512f,avx512vl")]
19427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19428#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19429#[rustc_legacy_const_generics(3)]
19430pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19431    unsafe {
19432        static_assert_uimm_bits!(IMM8, 8);
19433        let a = a.as_i64x2();
19434        let r = vprolq128(a, IMM8);
19435        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19436    }
19437}
19438
19439/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19440///
19441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19442#[inline]
19443#[target_feature(enable = "avx512f,avx512vl")]
19444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19445#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19446#[rustc_legacy_const_generics(2)]
19447pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19448    unsafe {
19449        static_assert_uimm_bits!(IMM8, 8);
19450        let a = a.as_i64x2();
19451        let r = vprolq128(a, IMM8);
19452        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19453    }
19454}
19455
19456/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19457///
19458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19459#[inline]
19460#[target_feature(enable = "avx512f")]
19461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19462#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19463#[rustc_legacy_const_generics(1)]
19464pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19465    unsafe {
19466        static_assert_uimm_bits!(IMM8, 8);
19467        let a = a.as_i64x8();
19468        let r = vprorq(a, IMM8);
19469        transmute(r)
19470    }
19471}
19472
19473/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19474///
19475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19476#[inline]
19477#[target_feature(enable = "avx512f")]
19478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19479#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19480#[rustc_legacy_const_generics(3)]
19481pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19482    unsafe {
19483        static_assert_uimm_bits!(IMM8, 8);
19484        let a = a.as_i64x8();
19485        let r = vprorq(a, IMM8);
19486        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19487    }
19488}
19489
19490/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19491///
19492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19493#[inline]
19494#[target_feature(enable = "avx512f")]
19495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19496#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19497#[rustc_legacy_const_generics(2)]
19498pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19499    unsafe {
19500        static_assert_uimm_bits!(IMM8, 8);
19501        let a = a.as_i64x8();
19502        let r = vprorq(a, IMM8);
19503        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19504    }
19505}
19506
19507/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19508///
19509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19510#[inline]
19511#[target_feature(enable = "avx512f,avx512vl")]
19512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19513#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19514#[rustc_legacy_const_generics(1)]
19515pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19516    unsafe {
19517        static_assert_uimm_bits!(IMM8, 8);
19518        let a = a.as_i64x4();
19519        let r = vprorq256(a, IMM8);
19520        transmute(r)
19521    }
19522}
19523
19524/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19525///
19526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19527#[inline]
19528#[target_feature(enable = "avx512f,avx512vl")]
19529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19530#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19531#[rustc_legacy_const_generics(3)]
19532pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19533    unsafe {
19534        static_assert_uimm_bits!(IMM8, 8);
19535        let a = a.as_i64x4();
19536        let r = vprorq256(a, IMM8);
19537        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19538    }
19539}
19540
19541/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19542///
19543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19544#[inline]
19545#[target_feature(enable = "avx512f,avx512vl")]
19546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19547#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19548#[rustc_legacy_const_generics(2)]
19549pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19550    unsafe {
19551        static_assert_uimm_bits!(IMM8, 8);
19552        let a = a.as_i64x4();
19553        let r = vprorq256(a, IMM8);
19554        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19555    }
19556}
19557
19558/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19559///
19560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19561#[inline]
19562#[target_feature(enable = "avx512f,avx512vl")]
19563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19564#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19565#[rustc_legacy_const_generics(1)]
19566pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19567    unsafe {
19568        static_assert_uimm_bits!(IMM8, 8);
19569        let a = a.as_i64x2();
19570        let r = vprorq128(a, IMM8);
19571        transmute(r)
19572    }
19573}
19574
19575/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19576///
19577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19578#[inline]
19579#[target_feature(enable = "avx512f,avx512vl")]
19580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19581#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19582#[rustc_legacy_const_generics(3)]
19583pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19584    unsafe {
19585        static_assert_uimm_bits!(IMM8, 8);
19586        let a = a.as_i64x2();
19587        let r = vprorq128(a, IMM8);
19588        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19589    }
19590}
19591
19592/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19593///
19594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19595#[inline]
19596#[target_feature(enable = "avx512f,avx512vl")]
19597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19598#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19599#[rustc_legacy_const_generics(2)]
19600pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19601    unsafe {
19602        static_assert_uimm_bits!(IMM8, 8);
19603        let a = a.as_i64x2();
19604        let r = vprorq128(a, IMM8);
19605        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19606    }
19607}
19608
19609/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19610///
19611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19612#[inline]
19613#[target_feature(enable = "avx512f")]
19614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19615#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19616#[rustc_legacy_const_generics(1)]
19617pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19618    unsafe {
19619        static_assert_uimm_bits!(IMM8, 8);
19620        if IMM8 >= 32 {
19621            _mm512_setzero_si512()
19622        } else {
19623            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
19624        }
19625    }
19626}
19627
19628/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629///
19630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19631#[inline]
19632#[target_feature(enable = "avx512f")]
19633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19634#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19635#[rustc_legacy_const_generics(3)]
19636pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19637    unsafe {
19638        static_assert_uimm_bits!(IMM8, 8);
19639        let shf = if IMM8 >= 32 {
19640            u32x16::ZERO
19641        } else {
19642            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
19643        };
19644        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19645    }
19646}
19647
19648/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19649///
19650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19651#[inline]
19652#[target_feature(enable = "avx512f")]
19653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19654#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19655#[rustc_legacy_const_generics(2)]
19656pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19657    unsafe {
19658        static_assert_uimm_bits!(IMM8, 8);
19659        if IMM8 >= 32 {
19660            _mm512_setzero_si512()
19661        } else {
19662            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
19663            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19664        }
19665    }
19666}
19667
19668/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19669///
19670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19671#[inline]
19672#[target_feature(enable = "avx512f,avx512vl")]
19673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19674#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19675#[rustc_legacy_const_generics(3)]
19676pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19677    unsafe {
19678        static_assert_uimm_bits!(IMM8, 8);
19679        let r = if IMM8 >= 32 {
19680            u32x8::ZERO
19681        } else {
19682            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
19683        };
19684        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19685    }
19686}
19687
19688/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19689///
19690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19691#[inline]
19692#[target_feature(enable = "avx512f,avx512vl")]
19693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19694#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19695#[rustc_legacy_const_generics(2)]
19696pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19697    unsafe {
19698        static_assert_uimm_bits!(IMM8, 8);
19699        if IMM8 >= 32 {
19700            _mm256_setzero_si256()
19701        } else {
19702            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
19703            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19704        }
19705    }
19706}
19707
19708/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19709///
19710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19711#[inline]
19712#[target_feature(enable = "avx512f,avx512vl")]
19713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19714#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19715#[rustc_legacy_const_generics(3)]
19716pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19717    unsafe {
19718        static_assert_uimm_bits!(IMM8, 8);
19719        let r = if IMM8 >= 32 {
19720            u32x4::ZERO
19721        } else {
19722            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
19723        };
19724        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19725    }
19726}
19727
19728/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19729///
19730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19731#[inline]
19732#[target_feature(enable = "avx512f,avx512vl")]
19733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19734#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19735#[rustc_legacy_const_generics(2)]
19736pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19737    unsafe {
19738        static_assert_uimm_bits!(IMM8, 8);
19739        if IMM8 >= 32 {
19740            _mm_setzero_si128()
19741        } else {
19742            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
19743            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19744        }
19745    }
19746}
19747
19748/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19749///
19750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19751#[inline]
19752#[target_feature(enable = "avx512f")]
19753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19754#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19755#[rustc_legacy_const_generics(1)]
19756pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19757    unsafe {
19758        static_assert_uimm_bits!(IMM8, 8);
19759        if IMM8 >= 32 {
19760            _mm512_setzero_si512()
19761        } else {
19762            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
19763        }
19764    }
19765}
19766
19767/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19768///
19769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19770#[inline]
19771#[target_feature(enable = "avx512f")]
19772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19773#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19774#[rustc_legacy_const_generics(3)]
19775pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19776    unsafe {
19777        static_assert_uimm_bits!(IMM8, 8);
19778        let shf = if IMM8 >= 32 {
19779            u32x16::ZERO
19780        } else {
19781            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
19782        };
19783        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19784    }
19785}
19786
19787/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19788///
19789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19790#[inline]
19791#[target_feature(enable = "avx512f")]
19792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19793#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19794#[rustc_legacy_const_generics(2)]
19795pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19796    unsafe {
19797        static_assert_uimm_bits!(IMM8, 8);
19798        if IMM8 >= 32 {
19799            _mm512_setzero_si512()
19800        } else {
19801            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
19802            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19803        }
19804    }
19805}
19806
19807/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19808///
19809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19810#[inline]
19811#[target_feature(enable = "avx512f,avx512vl")]
19812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19813#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19814#[rustc_legacy_const_generics(3)]
19815pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19816    unsafe {
19817        static_assert_uimm_bits!(IMM8, 8);
19818        let r = if IMM8 >= 32 {
19819            u32x8::ZERO
19820        } else {
19821            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
19822        };
19823        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19824    }
19825}
19826
19827/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19828///
19829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19830#[inline]
19831#[target_feature(enable = "avx512f,avx512vl")]
19832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19833#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19834#[rustc_legacy_const_generics(2)]
19835pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19836    unsafe {
19837        static_assert_uimm_bits!(IMM8, 8);
19838        if IMM8 >= 32 {
19839            _mm256_setzero_si256()
19840        } else {
19841            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
19842            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19843        }
19844    }
19845}
19846
19847/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19848///
19849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19850#[inline]
19851#[target_feature(enable = "avx512f,avx512vl")]
19852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19853#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19854#[rustc_legacy_const_generics(3)]
19855pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19856    unsafe {
19857        static_assert_uimm_bits!(IMM8, 8);
19858        let r = if IMM8 >= 32 {
19859            u32x4::ZERO
19860        } else {
19861            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
19862        };
19863        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19864    }
19865}
19866
19867/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19868///
19869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19870#[inline]
19871#[target_feature(enable = "avx512f,avx512vl")]
19872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19873#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19874#[rustc_legacy_const_generics(2)]
19875pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19876    unsafe {
19877        static_assert_uimm_bits!(IMM8, 8);
19878        if IMM8 >= 32 {
19879            _mm_setzero_si128()
19880        } else {
19881            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
19882            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19883        }
19884    }
19885}
19886
19887/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19888///
19889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19890#[inline]
19891#[target_feature(enable = "avx512f")]
19892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19893#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19894#[rustc_legacy_const_generics(1)]
19895pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19896    unsafe {
19897        static_assert_uimm_bits!(IMM8, 8);
19898        if IMM8 >= 64 {
19899            _mm512_setzero_si512()
19900        } else {
19901            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
19902        }
19903    }
19904}
19905
19906/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19907///
19908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19909#[inline]
19910#[target_feature(enable = "avx512f")]
19911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19912#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19913#[rustc_legacy_const_generics(3)]
19914pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19915    unsafe {
19916        static_assert_uimm_bits!(IMM8, 8);
19917        let shf = if IMM8 >= 64 {
19918            u64x8::ZERO
19919        } else {
19920            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
19921        };
19922        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
19923    }
19924}
19925
19926/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19927///
19928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
19929#[inline]
19930#[target_feature(enable = "avx512f")]
19931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19932#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19933#[rustc_legacy_const_generics(2)]
19934pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19935    unsafe {
19936        static_assert_uimm_bits!(IMM8, 8);
19937        if IMM8 >= 64 {
19938            _mm512_setzero_si512()
19939        } else {
19940            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
19941            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
19942        }
19943    }
19944}
19945
19946/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19947///
19948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
19949#[inline]
19950#[target_feature(enable = "avx512f,avx512vl")]
19951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19952#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19953#[rustc_legacy_const_generics(3)]
19954pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19955    unsafe {
19956        static_assert_uimm_bits!(IMM8, 8);
19957        let r = if IMM8 >= 64 {
19958            u64x4::ZERO
19959        } else {
19960            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
19961        };
19962        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
19963    }
19964}
19965
19966/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19967///
19968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
19969#[inline]
19970#[target_feature(enable = "avx512f,avx512vl")]
19971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19972#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19973#[rustc_legacy_const_generics(2)]
19974pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19975    unsafe {
19976        static_assert_uimm_bits!(IMM8, 8);
19977        if IMM8 >= 64 {
19978            _mm256_setzero_si256()
19979        } else {
19980            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
19981            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
19982        }
19983    }
19984}
19985
19986/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19987///
19988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
19989#[inline]
19990#[target_feature(enable = "avx512f,avx512vl")]
19991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19992#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19993#[rustc_legacy_const_generics(3)]
19994pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19995    unsafe {
19996        static_assert_uimm_bits!(IMM8, 8);
19997        let r = if IMM8 >= 64 {
19998            u64x2::ZERO
19999        } else {
20000            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20001        };
20002        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20003    }
20004}
20005
20006/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20007///
20008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20009#[inline]
20010#[target_feature(enable = "avx512f,avx512vl")]
20011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20012#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20013#[rustc_legacy_const_generics(2)]
20014pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20015    unsafe {
20016        static_assert_uimm_bits!(IMM8, 8);
20017        if IMM8 >= 64 {
20018            _mm_setzero_si128()
20019        } else {
20020            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20021            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20022        }
20023    }
20024}
20025
20026/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20027///
20028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20029#[inline]
20030#[target_feature(enable = "avx512f")]
20031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20032#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20033#[rustc_legacy_const_generics(1)]
20034pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20035    unsafe {
20036        static_assert_uimm_bits!(IMM8, 8);
20037        if IMM8 >= 64 {
20038            _mm512_setzero_si512()
20039        } else {
20040            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20041        }
20042    }
20043}
20044
20045/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20046///
20047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20048#[inline]
20049#[target_feature(enable = "avx512f")]
20050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20051#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20052#[rustc_legacy_const_generics(3)]
20053pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20054    unsafe {
20055        static_assert_uimm_bits!(IMM8, 8);
20056        let shf = if IMM8 >= 64 {
20057            u64x8::ZERO
20058        } else {
20059            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20060        };
20061        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20062    }
20063}
20064
20065/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20066///
20067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20068#[inline]
20069#[target_feature(enable = "avx512f")]
20070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20071#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20072#[rustc_legacy_const_generics(2)]
20073pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20074    unsafe {
20075        static_assert_uimm_bits!(IMM8, 8);
20076        if IMM8 >= 64 {
20077            _mm512_setzero_si512()
20078        } else {
20079            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20080            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20081        }
20082    }
20083}
20084
20085/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20086///
20087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20088#[inline]
20089#[target_feature(enable = "avx512f,avx512vl")]
20090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20091#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20092#[rustc_legacy_const_generics(3)]
20093pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20094    unsafe {
20095        static_assert_uimm_bits!(IMM8, 8);
20096        let r = if IMM8 >= 64 {
20097            u64x4::ZERO
20098        } else {
20099            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20100        };
20101        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20102    }
20103}
20104
20105/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20106///
20107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20108#[inline]
20109#[target_feature(enable = "avx512f,avx512vl")]
20110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20111#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20112#[rustc_legacy_const_generics(2)]
20113pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20114    unsafe {
20115        static_assert_uimm_bits!(IMM8, 8);
20116        if IMM8 >= 64 {
20117            _mm256_setzero_si256()
20118        } else {
20119            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20120            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20121        }
20122    }
20123}
20124
20125/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20126///
20127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20128#[inline]
20129#[target_feature(enable = "avx512f,avx512vl")]
20130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20131#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20132#[rustc_legacy_const_generics(3)]
20133pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20134    unsafe {
20135        static_assert_uimm_bits!(IMM8, 8);
20136        let r = if IMM8 >= 64 {
20137            u64x2::ZERO
20138        } else {
20139            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20140        };
20141        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20142    }
20143}
20144
20145/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20146///
20147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20148#[inline]
20149#[target_feature(enable = "avx512f,avx512vl")]
20150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20151#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20152#[rustc_legacy_const_generics(2)]
20153pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20154    unsafe {
20155        static_assert_uimm_bits!(IMM8, 8);
20156        if IMM8 >= 64 {
20157            _mm_setzero_si128()
20158        } else {
20159            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20160            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20161        }
20162    }
20163}
20164
20165/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20166///
20167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20168#[inline]
20169#[target_feature(enable = "avx512f")]
20170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20171#[cfg_attr(test, assert_instr(vpslld))]
20172pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20173    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20174}
20175
20176/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20177///
20178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20179#[inline]
20180#[target_feature(enable = "avx512f")]
20181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20182#[cfg_attr(test, assert_instr(vpslld))]
20183pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20184    unsafe {
20185        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20186        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20187    }
20188}
20189
20190/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20191///
20192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20193#[inline]
20194#[target_feature(enable = "avx512f")]
20195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20196#[cfg_attr(test, assert_instr(vpslld))]
20197pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20198    unsafe {
20199        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20200        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20201    }
20202}
20203
20204/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20205///
20206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20207#[inline]
20208#[target_feature(enable = "avx512f,avx512vl")]
20209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20210#[cfg_attr(test, assert_instr(vpslld))]
20211pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20212    unsafe {
20213        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20214        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20215    }
20216}
20217
20218/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20219///
20220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20221#[inline]
20222#[target_feature(enable = "avx512f,avx512vl")]
20223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20224#[cfg_attr(test, assert_instr(vpslld))]
20225pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20226    unsafe {
20227        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20228        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20229    }
20230}
20231
20232/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20233///
20234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20235#[inline]
20236#[target_feature(enable = "avx512f,avx512vl")]
20237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20238#[cfg_attr(test, assert_instr(vpslld))]
20239pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20240    unsafe {
20241        let shf = _mm_sll_epi32(a, count).as_i32x4();
20242        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20243    }
20244}
20245
20246/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20247///
20248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20249#[inline]
20250#[target_feature(enable = "avx512f,avx512vl")]
20251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20252#[cfg_attr(test, assert_instr(vpslld))]
20253pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20254    unsafe {
20255        let shf = _mm_sll_epi32(a, count).as_i32x4();
20256        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20257    }
20258}
20259
20260/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20261///
20262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20263#[inline]
20264#[target_feature(enable = "avx512f")]
20265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20266#[cfg_attr(test, assert_instr(vpsrld))]
20267pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20268    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20269}
20270
20271/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20272///
20273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20274#[inline]
20275#[target_feature(enable = "avx512f")]
20276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20277#[cfg_attr(test, assert_instr(vpsrld))]
20278pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20279    unsafe {
20280        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20281        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20282    }
20283}
20284
20285/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20286///
20287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20288#[inline]
20289#[target_feature(enable = "avx512f")]
20290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20291#[cfg_attr(test, assert_instr(vpsrld))]
20292pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20293    unsafe {
20294        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20295        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20296    }
20297}
20298
20299/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20300///
20301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20302#[inline]
20303#[target_feature(enable = "avx512f,avx512vl")]
20304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20305#[cfg_attr(test, assert_instr(vpsrld))]
20306pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20307    unsafe {
20308        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20309        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20310    }
20311}
20312
20313/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20314///
20315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20316#[inline]
20317#[target_feature(enable = "avx512f,avx512vl")]
20318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20319#[cfg_attr(test, assert_instr(vpsrld))]
20320pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20321    unsafe {
20322        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20323        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20324    }
20325}
20326
20327/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20328///
20329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20330#[inline]
20331#[target_feature(enable = "avx512f,avx512vl")]
20332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20333#[cfg_attr(test, assert_instr(vpsrld))]
20334pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20335    unsafe {
20336        let shf = _mm_srl_epi32(a, count).as_i32x4();
20337        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20338    }
20339}
20340
20341/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20342///
20343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20344#[inline]
20345#[target_feature(enable = "avx512f,avx512vl")]
20346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20347#[cfg_attr(test, assert_instr(vpsrld))]
20348pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20349    unsafe {
20350        let shf = _mm_srl_epi32(a, count).as_i32x4();
20351        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20352    }
20353}
20354
20355/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20356///
20357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20358#[inline]
20359#[target_feature(enable = "avx512f")]
20360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20361#[cfg_attr(test, assert_instr(vpsllq))]
20362pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20363    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20364}
20365
20366/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20367///
20368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20369#[inline]
20370#[target_feature(enable = "avx512f")]
20371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20372#[cfg_attr(test, assert_instr(vpsllq))]
20373pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20374    unsafe {
20375        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20376        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20377    }
20378}
20379
20380/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20381///
20382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20383#[inline]
20384#[target_feature(enable = "avx512f")]
20385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20386#[cfg_attr(test, assert_instr(vpsllq))]
20387pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20388    unsafe {
20389        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20390        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20391    }
20392}
20393
20394/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20395///
20396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20397#[inline]
20398#[target_feature(enable = "avx512f,avx512vl")]
20399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20400#[cfg_attr(test, assert_instr(vpsllq))]
20401pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20402    unsafe {
20403        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20404        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20405    }
20406}
20407
20408/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20409///
20410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20411#[inline]
20412#[target_feature(enable = "avx512f,avx512vl")]
20413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20414#[cfg_attr(test, assert_instr(vpsllq))]
20415pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20416    unsafe {
20417        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20418        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20419    }
20420}
20421
20422/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20423///
20424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20425#[inline]
20426#[target_feature(enable = "avx512f,avx512vl")]
20427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20428#[cfg_attr(test, assert_instr(vpsllq))]
20429pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20430    unsafe {
20431        let shf = _mm_sll_epi64(a, count).as_i64x2();
20432        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20433    }
20434}
20435
20436/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20437///
20438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20439#[inline]
20440#[target_feature(enable = "avx512f,avx512vl")]
20441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20442#[cfg_attr(test, assert_instr(vpsllq))]
20443pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20444    unsafe {
20445        let shf = _mm_sll_epi64(a, count).as_i64x2();
20446        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20447    }
20448}
20449
20450/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20451///
20452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20453#[inline]
20454#[target_feature(enable = "avx512f")]
20455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20456#[cfg_attr(test, assert_instr(vpsrlq))]
20457pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20458    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20459}
20460
20461/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20462///
20463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20464#[inline]
20465#[target_feature(enable = "avx512f")]
20466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20467#[cfg_attr(test, assert_instr(vpsrlq))]
20468pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20469    unsafe {
20470        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20471        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20472    }
20473}
20474
20475/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20476///
20477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20478#[inline]
20479#[target_feature(enable = "avx512f")]
20480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20481#[cfg_attr(test, assert_instr(vpsrlq))]
20482pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20483    unsafe {
20484        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20485        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20486    }
20487}
20488
20489/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20490///
20491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20492#[inline]
20493#[target_feature(enable = "avx512f,avx512vl")]
20494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20495#[cfg_attr(test, assert_instr(vpsrlq))]
20496pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20497    unsafe {
20498        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20499        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20500    }
20501}
20502
20503/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20504///
20505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20506#[inline]
20507#[target_feature(enable = "avx512f,avx512vl")]
20508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20509#[cfg_attr(test, assert_instr(vpsrlq))]
20510pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20511    unsafe {
20512        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20513        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20514    }
20515}
20516
20517/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20518///
20519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20520#[inline]
20521#[target_feature(enable = "avx512f,avx512vl")]
20522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20523#[cfg_attr(test, assert_instr(vpsrlq))]
20524pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20525    unsafe {
20526        let shf = _mm_srl_epi64(a, count).as_i64x2();
20527        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20528    }
20529}
20530
20531/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20532///
20533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20534#[inline]
20535#[target_feature(enable = "avx512f,avx512vl")]
20536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20537#[cfg_attr(test, assert_instr(vpsrlq))]
20538pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20539    unsafe {
20540        let shf = _mm_srl_epi64(a, count).as_i64x2();
20541        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20542    }
20543}
20544
20545/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20546///
20547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20548#[inline]
20549#[target_feature(enable = "avx512f")]
20550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20551#[cfg_attr(test, assert_instr(vpsrad))]
20552pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20553    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
20554}
20555
20556/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20559#[inline]
20560#[target_feature(enable = "avx512f")]
20561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20562#[cfg_attr(test, assert_instr(vpsrad))]
20563pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20564    unsafe {
20565        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20566        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20567    }
20568}
20569
20570/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20571///
20572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20573#[inline]
20574#[target_feature(enable = "avx512f")]
20575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20576#[cfg_attr(test, assert_instr(vpsrad))]
20577pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20578    unsafe {
20579        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20580        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20581    }
20582}
20583
20584/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20585///
20586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20587#[inline]
20588#[target_feature(enable = "avx512f,avx512vl")]
20589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20590#[cfg_attr(test, assert_instr(vpsrad))]
20591pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20592    unsafe {
20593        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20594        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20595    }
20596}
20597
20598/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20599///
20600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20601#[inline]
20602#[target_feature(enable = "avx512f,avx512vl")]
20603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20604#[cfg_attr(test, assert_instr(vpsrad))]
20605pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20606    unsafe {
20607        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20608        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20609    }
20610}
20611
20612/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20613///
20614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20615#[inline]
20616#[target_feature(enable = "avx512f,avx512vl")]
20617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20618#[cfg_attr(test, assert_instr(vpsrad))]
20619pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20620    unsafe {
20621        let shf = _mm_sra_epi32(a, count).as_i32x4();
20622        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20623    }
20624}
20625
20626/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20627///
20628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20629#[inline]
20630#[target_feature(enable = "avx512f,avx512vl")]
20631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20632#[cfg_attr(test, assert_instr(vpsrad))]
20633pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20634    unsafe {
20635        let shf = _mm_sra_epi32(a, count).as_i32x4();
20636        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20637    }
20638}
20639
20640/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20641///
20642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20643#[inline]
20644#[target_feature(enable = "avx512f")]
20645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20646#[cfg_attr(test, assert_instr(vpsraq))]
20647pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20648    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
20649}
20650
20651/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20652///
20653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20654#[inline]
20655#[target_feature(enable = "avx512f")]
20656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20657#[cfg_attr(test, assert_instr(vpsraq))]
20658pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20659    unsafe {
20660        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20661        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20662    }
20663}
20664
20665/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20666///
20667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20668#[inline]
20669#[target_feature(enable = "avx512f")]
20670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20671#[cfg_attr(test, assert_instr(vpsraq))]
20672pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20673    unsafe {
20674        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20675        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20676    }
20677}
20678
20679/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20680///
20681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20682#[inline]
20683#[target_feature(enable = "avx512f,avx512vl")]
20684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20685#[cfg_attr(test, assert_instr(vpsraq))]
20686pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20687    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20688}
20689
20690/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20691///
20692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20693#[inline]
20694#[target_feature(enable = "avx512f,avx512vl")]
20695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20696#[cfg_attr(test, assert_instr(vpsraq))]
20697pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20698    unsafe {
20699        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20700        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20701    }
20702}
20703
20704/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20705///
20706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20707#[inline]
20708#[target_feature(enable = "avx512f,avx512vl")]
20709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20710#[cfg_attr(test, assert_instr(vpsraq))]
20711pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20712    unsafe {
20713        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20714        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20715    }
20716}
20717
20718/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20719///
20720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20721#[inline]
20722#[target_feature(enable = "avx512f,avx512vl")]
20723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20724#[cfg_attr(test, assert_instr(vpsraq))]
20725pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20726    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20727}
20728
20729/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20730///
20731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20732#[inline]
20733#[target_feature(enable = "avx512f,avx512vl")]
20734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20735#[cfg_attr(test, assert_instr(vpsraq))]
20736pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737    unsafe {
20738        let shf = _mm_sra_epi64(a, count).as_i64x2();
20739        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20740    }
20741}
20742
20743/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20744///
20745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20746#[inline]
20747#[target_feature(enable = "avx512f,avx512vl")]
20748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20749#[cfg_attr(test, assert_instr(vpsraq))]
20750pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20751    unsafe {
20752        let shf = _mm_sra_epi64(a, count).as_i64x2();
20753        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20754    }
20755}
20756
20757/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20758///
20759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20760#[inline]
20761#[target_feature(enable = "avx512f")]
20762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20763#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20764#[rustc_legacy_const_generics(1)]
20765pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20766    unsafe {
20767        static_assert_uimm_bits!(IMM8, 8);
20768        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
20769    }
20770}
20771
20772/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20773///
20774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20775#[inline]
20776#[target_feature(enable = "avx512f")]
20777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20778#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20779#[rustc_legacy_const_generics(3)]
20780pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20781    unsafe {
20782        static_assert_uimm_bits!(IMM8, 8);
20783        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20784        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
20785    }
20786}
20787
20788/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20789///
20790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20791#[inline]
20792#[target_feature(enable = "avx512f")]
20793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20794#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20795#[rustc_legacy_const_generics(2)]
20796pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20797    unsafe {
20798        static_assert_uimm_bits!(IMM8, 8);
20799        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20800        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
20801    }
20802}
20803
20804/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20805///
20806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20807#[inline]
20808#[target_feature(enable = "avx512f,avx512vl")]
20809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20810#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20811#[rustc_legacy_const_generics(3)]
20812pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20813    unsafe {
20814        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20815        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
20816    }
20817}
20818
20819/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20820///
20821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20822#[inline]
20823#[target_feature(enable = "avx512f,avx512vl")]
20824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20825#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20826#[rustc_legacy_const_generics(2)]
20827pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20828    unsafe {
20829        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20830        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
20831    }
20832}
20833
20834/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20835///
20836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20837#[inline]
20838#[target_feature(enable = "avx512f,avx512vl")]
20839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20840#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20841#[rustc_legacy_const_generics(3)]
20842pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20843    unsafe {
20844        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20845        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
20846    }
20847}
20848
20849/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20850///
20851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20852#[inline]
20853#[target_feature(enable = "avx512f,avx512vl")]
20854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20855#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20856#[rustc_legacy_const_generics(2)]
20857pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20858    unsafe {
20859        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20860        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
20861    }
20862}
20863
20864/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20865///
20866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20867#[inline]
20868#[target_feature(enable = "avx512f")]
20869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20870#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20871#[rustc_legacy_const_generics(1)]
20872pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20873    unsafe {
20874        static_assert_uimm_bits!(IMM8, 8);
20875        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
20876    }
20877}
20878
20879/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20880///
20881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20882#[inline]
20883#[target_feature(enable = "avx512f")]
20884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20885#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20886#[rustc_legacy_const_generics(3)]
20887pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20888    unsafe {
20889        static_assert_uimm_bits!(IMM8, 8);
20890        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20891        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20892    }
20893}
20894
20895/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20896///
20897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20898#[inline]
20899#[target_feature(enable = "avx512f")]
20900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20901#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20902#[rustc_legacy_const_generics(2)]
20903pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20904    unsafe {
20905        static_assert_uimm_bits!(IMM8, 8);
20906        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20907        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20908    }
20909}
20910
20911/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20912///
20913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20914#[inline]
20915#[target_feature(enable = "avx512f,avx512vl")]
20916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20917#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20918#[rustc_legacy_const_generics(1)]
20919pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20920    unsafe {
20921        static_assert_uimm_bits!(IMM8, 8);
20922        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
20923    }
20924}
20925
20926/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20927///
20928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
20929#[inline]
20930#[target_feature(enable = "avx512f,avx512vl")]
20931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20932#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20933#[rustc_legacy_const_generics(3)]
20934pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20935    unsafe {
20936        static_assert_uimm_bits!(IMM8, 8);
20937        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
20938        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20939    }
20940}
20941
20942/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20943///
20944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
20945#[inline]
20946#[target_feature(enable = "avx512f,avx512vl")]
20947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20948#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20949#[rustc_legacy_const_generics(2)]
20950pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20951    unsafe {
20952        static_assert_uimm_bits!(IMM8, 8);
20953        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
20954        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20955    }
20956}
20957
20958/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20959///
20960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
20961#[inline]
20962#[target_feature(enable = "avx512f,avx512vl")]
20963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20964#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20965#[rustc_legacy_const_generics(1)]
20966pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
20967    unsafe {
20968        static_assert_uimm_bits!(IMM8, 8);
20969        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
20970    }
20971}
20972
20973/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20974///
20975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
20976#[inline]
20977#[target_feature(enable = "avx512f,avx512vl")]
20978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20979#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20980#[rustc_legacy_const_generics(3)]
20981pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20982    unsafe {
20983        static_assert_uimm_bits!(IMM8, 8);
20984        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
20985        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20986    }
20987}
20988
20989/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20990///
20991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
20992#[inline]
20993#[target_feature(enable = "avx512f,avx512vl")]
20994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20995#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20996#[rustc_legacy_const_generics(2)]
20997pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20998    unsafe {
20999        static_assert_uimm_bits!(IMM8, 8);
21000        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21001        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21002    }
21003}
21004
21005/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21006///
21007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21008#[inline]
21009#[target_feature(enable = "avx512f")]
21010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21011#[cfg_attr(test, assert_instr(vpsravd))]
21012pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21013    unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
21014}
21015
21016/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21017///
21018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21019#[inline]
21020#[target_feature(enable = "avx512f")]
21021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21022#[cfg_attr(test, assert_instr(vpsravd))]
21023pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21024    unsafe {
21025        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21026        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21027    }
21028}
21029
21030/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21031///
21032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21033#[inline]
21034#[target_feature(enable = "avx512f")]
21035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21036#[cfg_attr(test, assert_instr(vpsravd))]
21037pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21038    unsafe {
21039        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21040        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21041    }
21042}
21043
21044/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21045///
21046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21047#[inline]
21048#[target_feature(enable = "avx512f,avx512vl")]
21049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21050#[cfg_attr(test, assert_instr(vpsravd))]
21051pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21052    unsafe {
21053        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21054        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21055    }
21056}
21057
21058/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21059///
21060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21061#[inline]
21062#[target_feature(enable = "avx512f,avx512vl")]
21063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21064#[cfg_attr(test, assert_instr(vpsravd))]
21065pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21066    unsafe {
21067        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21068        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21069    }
21070}
21071
21072/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21073///
21074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21075#[inline]
21076#[target_feature(enable = "avx512f,avx512vl")]
21077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21078#[cfg_attr(test, assert_instr(vpsravd))]
21079pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21080    unsafe {
21081        let shf = _mm_srav_epi32(a, count).as_i32x4();
21082        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21083    }
21084}
21085
21086/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21087///
21088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21089#[inline]
21090#[target_feature(enable = "avx512f,avx512vl")]
21091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21092#[cfg_attr(test, assert_instr(vpsravd))]
21093pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21094    unsafe {
21095        let shf = _mm_srav_epi32(a, count).as_i32x4();
21096        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21097    }
21098}
21099
21100/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21101///
21102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21103#[inline]
21104#[target_feature(enable = "avx512f")]
21105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21106#[cfg_attr(test, assert_instr(vpsravq))]
21107pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21108    unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
21109}
21110
21111/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21112///
21113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21114#[inline]
21115#[target_feature(enable = "avx512f")]
21116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21117#[cfg_attr(test, assert_instr(vpsravq))]
21118pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21119    unsafe {
21120        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21121        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21122    }
21123}
21124
21125/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21126///
21127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21128#[inline]
21129#[target_feature(enable = "avx512f")]
21130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21131#[cfg_attr(test, assert_instr(vpsravq))]
21132pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21133    unsafe {
21134        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21135        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21136    }
21137}
21138
21139/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21140///
21141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21142#[inline]
21143#[target_feature(enable = "avx512f,avx512vl")]
21144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21145#[cfg_attr(test, assert_instr(vpsravq))]
21146pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21147    unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
21148}
21149
21150/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21151///
21152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21153#[inline]
21154#[target_feature(enable = "avx512f,avx512vl")]
21155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21156#[cfg_attr(test, assert_instr(vpsravq))]
21157pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21158    unsafe {
21159        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21160        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21161    }
21162}
21163
21164/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21165///
21166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21167#[inline]
21168#[target_feature(enable = "avx512f,avx512vl")]
21169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21170#[cfg_attr(test, assert_instr(vpsravq))]
21171pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21172    unsafe {
21173        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21174        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21175    }
21176}
21177
21178/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21179///
21180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21181#[inline]
21182#[target_feature(enable = "avx512f,avx512vl")]
21183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21184#[cfg_attr(test, assert_instr(vpsravq))]
21185pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21186    unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
21187}
21188
21189/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21190///
21191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21192#[inline]
21193#[target_feature(enable = "avx512f,avx512vl")]
21194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21195#[cfg_attr(test, assert_instr(vpsravq))]
21196pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21197    unsafe {
21198        let shf = _mm_srav_epi64(a, count).as_i64x2();
21199        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21200    }
21201}
21202
21203/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21204///
21205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21206#[inline]
21207#[target_feature(enable = "avx512f,avx512vl")]
21208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21209#[cfg_attr(test, assert_instr(vpsravq))]
21210pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21211    unsafe {
21212        let shf = _mm_srav_epi64(a, count).as_i64x2();
21213        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21214    }
21215}
21216
21217/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21218///
21219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21220#[inline]
21221#[target_feature(enable = "avx512f")]
21222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21223#[cfg_attr(test, assert_instr(vprolvd))]
21224pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21225    unsafe { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) }
21226}
21227
21228/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21229///
21230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21231#[inline]
21232#[target_feature(enable = "avx512f")]
21233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21234#[cfg_attr(test, assert_instr(vprolvd))]
21235pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21236    unsafe {
21237        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21238        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21239    }
21240}
21241
21242/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21243///
21244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21245#[inline]
21246#[target_feature(enable = "avx512f")]
21247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21248#[cfg_attr(test, assert_instr(vprolvd))]
21249pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21250    unsafe {
21251        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21252        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21253    }
21254}
21255
21256/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21257///
21258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21259#[inline]
21260#[target_feature(enable = "avx512f,avx512vl")]
21261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21262#[cfg_attr(test, assert_instr(vprolvd))]
21263pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21264    unsafe { transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) }
21265}
21266
21267/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21268///
21269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21270#[inline]
21271#[target_feature(enable = "avx512f,avx512vl")]
21272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21273#[cfg_attr(test, assert_instr(vprolvd))]
21274pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21275    unsafe {
21276        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21277        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21278    }
21279}
21280
21281/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21282///
21283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21284#[inline]
21285#[target_feature(enable = "avx512f,avx512vl")]
21286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21287#[cfg_attr(test, assert_instr(vprolvd))]
21288pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21289    unsafe {
21290        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21291        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21292    }
21293}
21294
21295/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21296///
21297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21298#[inline]
21299#[target_feature(enable = "avx512f,avx512vl")]
21300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21301#[cfg_attr(test, assert_instr(vprolvd))]
21302pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21303    unsafe { transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) }
21304}
21305
21306/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21307///
21308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21309#[inline]
21310#[target_feature(enable = "avx512f,avx512vl")]
21311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21312#[cfg_attr(test, assert_instr(vprolvd))]
21313pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21314    unsafe {
21315        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21316        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21317    }
21318}
21319
21320/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21321///
21322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21323#[inline]
21324#[target_feature(enable = "avx512f,avx512vl")]
21325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21326#[cfg_attr(test, assert_instr(vprolvd))]
21327pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21328    unsafe {
21329        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21330        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21331    }
21332}
21333
21334/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21335///
21336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21337#[inline]
21338#[target_feature(enable = "avx512f")]
21339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21340#[cfg_attr(test, assert_instr(vprorvd))]
21341pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21342    unsafe { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) }
21343}
21344
21345/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21346///
21347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21348#[inline]
21349#[target_feature(enable = "avx512f")]
21350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21351#[cfg_attr(test, assert_instr(vprorvd))]
21352pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21353    unsafe {
21354        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21355        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21356    }
21357}
21358
21359/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21360///
21361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21362#[inline]
21363#[target_feature(enable = "avx512f")]
21364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21365#[cfg_attr(test, assert_instr(vprorvd))]
21366pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21367    unsafe {
21368        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21369        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21370    }
21371}
21372
21373/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21374///
21375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21376#[inline]
21377#[target_feature(enable = "avx512f,avx512vl")]
21378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21379#[cfg_attr(test, assert_instr(vprorvd))]
21380pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21381    unsafe { transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) }
21382}
21383
21384/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21385///
21386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21387#[inline]
21388#[target_feature(enable = "avx512f,avx512vl")]
21389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21390#[cfg_attr(test, assert_instr(vprorvd))]
21391pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21392    unsafe {
21393        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21394        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
21395    }
21396}
21397
21398/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21399///
21400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21401#[inline]
21402#[target_feature(enable = "avx512f,avx512vl")]
21403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21404#[cfg_attr(test, assert_instr(vprorvd))]
21405pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21406    unsafe {
21407        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21408        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
21409    }
21410}
21411
21412/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21413///
21414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21415#[inline]
21416#[target_feature(enable = "avx512f,avx512vl")]
21417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21418#[cfg_attr(test, assert_instr(vprorvd))]
21419pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21420    unsafe { transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) }
21421}
21422
21423/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21424///
21425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21426#[inline]
21427#[target_feature(enable = "avx512f,avx512vl")]
21428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21429#[cfg_attr(test, assert_instr(vprorvd))]
21430pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21431    unsafe {
21432        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21433        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
21434    }
21435}
21436
21437/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21438///
21439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21440#[inline]
21441#[target_feature(enable = "avx512f,avx512vl")]
21442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21443#[cfg_attr(test, assert_instr(vprorvd))]
21444pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21445    unsafe {
21446        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21447        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
21448    }
21449}
21450
21451/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21452///
21453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21454#[inline]
21455#[target_feature(enable = "avx512f")]
21456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21457#[cfg_attr(test, assert_instr(vprolvq))]
21458pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21459    unsafe { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) }
21460}
21461
21462/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21463///
21464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21465#[inline]
21466#[target_feature(enable = "avx512f")]
21467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21468#[cfg_attr(test, assert_instr(vprolvq))]
21469pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21470    unsafe {
21471        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21472        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
21473    }
21474}
21475
21476/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21477///
21478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21479#[inline]
21480#[target_feature(enable = "avx512f")]
21481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21482#[cfg_attr(test, assert_instr(vprolvq))]
21483pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21484    unsafe {
21485        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21486        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
21487    }
21488}
21489
21490/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21493#[inline]
21494#[target_feature(enable = "avx512f,avx512vl")]
21495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21496#[cfg_attr(test, assert_instr(vprolvq))]
21497pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21498    unsafe { transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) }
21499}
21500
21501/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21502///
21503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21504#[inline]
21505#[target_feature(enable = "avx512f,avx512vl")]
21506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21507#[cfg_attr(test, assert_instr(vprolvq))]
21508pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21509    unsafe {
21510        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21511        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
21512    }
21513}
21514
21515/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21516///
21517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21518#[inline]
21519#[target_feature(enable = "avx512f,avx512vl")]
21520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21521#[cfg_attr(test, assert_instr(vprolvq))]
21522pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21523    unsafe {
21524        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21525        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
21526    }
21527}
21528
21529/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21530///
21531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21532#[inline]
21533#[target_feature(enable = "avx512f,avx512vl")]
21534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21535#[cfg_attr(test, assert_instr(vprolvq))]
21536pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21537    unsafe { transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) }
21538}
21539
21540/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21541///
21542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21543#[inline]
21544#[target_feature(enable = "avx512f,avx512vl")]
21545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21546#[cfg_attr(test, assert_instr(vprolvq))]
21547pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21548    unsafe {
21549        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21550        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
21551    }
21552}
21553
21554/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21555///
21556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21557#[inline]
21558#[target_feature(enable = "avx512f,avx512vl")]
21559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21560#[cfg_attr(test, assert_instr(vprolvq))]
21561pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21562    unsafe {
21563        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21564        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
21565    }
21566}
21567
21568/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21569///
21570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21571#[inline]
21572#[target_feature(enable = "avx512f")]
21573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21574#[cfg_attr(test, assert_instr(vprorvq))]
21575pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21576    unsafe { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) }
21577}
21578
21579/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21580///
21581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21582#[inline]
21583#[target_feature(enable = "avx512f")]
21584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21585#[cfg_attr(test, assert_instr(vprorvq))]
21586pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21587    unsafe {
21588        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21589        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
21590    }
21591}
21592
21593/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21594///
21595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21596#[inline]
21597#[target_feature(enable = "avx512f")]
21598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21599#[cfg_attr(test, assert_instr(vprorvq))]
21600pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21601    unsafe {
21602        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21603        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
21604    }
21605}
21606
21607/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21608///
21609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21610#[inline]
21611#[target_feature(enable = "avx512f,avx512vl")]
21612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21613#[cfg_attr(test, assert_instr(vprorvq))]
21614pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21615    unsafe { transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) }
21616}
21617
21618/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21619///
21620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21621#[inline]
21622#[target_feature(enable = "avx512f,avx512vl")]
21623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21624#[cfg_attr(test, assert_instr(vprorvq))]
21625pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21626    unsafe {
21627        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21628        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
21629    }
21630}
21631
21632/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21633///
21634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21635#[inline]
21636#[target_feature(enable = "avx512f,avx512vl")]
21637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21638#[cfg_attr(test, assert_instr(vprorvq))]
21639pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21640    unsafe {
21641        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21642        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
21643    }
21644}
21645
21646/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21647///
21648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21649#[inline]
21650#[target_feature(enable = "avx512f,avx512vl")]
21651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21652#[cfg_attr(test, assert_instr(vprorvq))]
21653pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21654    unsafe { transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) }
21655}
21656
21657/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21658///
21659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21660#[inline]
21661#[target_feature(enable = "avx512f,avx512vl")]
21662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21663#[cfg_attr(test, assert_instr(vprorvq))]
21664pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21665    unsafe {
21666        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21667        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
21668    }
21669}
21670
21671/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21672///
21673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21674#[inline]
21675#[target_feature(enable = "avx512f,avx512vl")]
21676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21677#[cfg_attr(test, assert_instr(vprorvq))]
21678pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21679    unsafe {
21680        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21681        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
21682    }
21683}
21684
21685/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21686///
21687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21688#[inline]
21689#[target_feature(enable = "avx512f")]
21690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21691#[cfg_attr(test, assert_instr(vpsllvd))]
21692pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21693    unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
21694}
21695
21696/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21697///
21698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21699#[inline]
21700#[target_feature(enable = "avx512f")]
21701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21702#[cfg_attr(test, assert_instr(vpsllvd))]
21703pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21704    unsafe {
21705        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21706        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21707    }
21708}
21709
21710/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21711///
21712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21713#[inline]
21714#[target_feature(enable = "avx512f")]
21715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21716#[cfg_attr(test, assert_instr(vpsllvd))]
21717pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21718    unsafe {
21719        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21720        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21721    }
21722}
21723
21724/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21725///
21726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21727#[inline]
21728#[target_feature(enable = "avx512f,avx512vl")]
21729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21730#[cfg_attr(test, assert_instr(vpsllvd))]
21731pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21732    unsafe {
21733        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21734        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21735    }
21736}
21737
21738/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21739///
21740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21741#[inline]
21742#[target_feature(enable = "avx512f,avx512vl")]
21743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21744#[cfg_attr(test, assert_instr(vpsllvd))]
21745pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21746    unsafe {
21747        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21748        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21749    }
21750}
21751
21752/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21753///
21754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21755#[inline]
21756#[target_feature(enable = "avx512f,avx512vl")]
21757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21758#[cfg_attr(test, assert_instr(vpsllvd))]
21759pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21760    unsafe {
21761        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21762        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21763    }
21764}
21765
21766/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21769#[inline]
21770#[target_feature(enable = "avx512f,avx512vl")]
21771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21772#[cfg_attr(test, assert_instr(vpsllvd))]
21773pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21774    unsafe {
21775        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21776        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21777    }
21778}
21779
21780/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21781///
21782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21783#[inline]
21784#[target_feature(enable = "avx512f")]
21785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21786#[cfg_attr(test, assert_instr(vpsrlvd))]
21787pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21788    unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
21789}
21790
21791/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21792///
21793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21794#[inline]
21795#[target_feature(enable = "avx512f")]
21796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21797#[cfg_attr(test, assert_instr(vpsrlvd))]
21798pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21799    unsafe {
21800        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21801        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21802    }
21803}
21804
21805/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21806///
21807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21808#[inline]
21809#[target_feature(enable = "avx512f")]
21810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21811#[cfg_attr(test, assert_instr(vpsrlvd))]
21812pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21813    unsafe {
21814        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21815        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21816    }
21817}
21818
21819/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21820///
21821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21822#[inline]
21823#[target_feature(enable = "avx512f,avx512vl")]
21824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21825#[cfg_attr(test, assert_instr(vpsrlvd))]
21826pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21827    unsafe {
21828        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21829        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21830    }
21831}
21832
21833/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21834///
21835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21836#[inline]
21837#[target_feature(enable = "avx512f,avx512vl")]
21838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21839#[cfg_attr(test, assert_instr(vpsrlvd))]
21840pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21841    unsafe {
21842        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21843        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21844    }
21845}
21846
21847/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21848///
21849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21850#[inline]
21851#[target_feature(enable = "avx512f,avx512vl")]
21852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21853#[cfg_attr(test, assert_instr(vpsrlvd))]
21854pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21855    unsafe {
21856        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21857        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21858    }
21859}
21860
21861/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21862///
21863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21864#[inline]
21865#[target_feature(enable = "avx512f,avx512vl")]
21866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21867#[cfg_attr(test, assert_instr(vpsrlvd))]
21868pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21869    unsafe {
21870        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21871        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21872    }
21873}
21874
21875/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21876///
21877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21878#[inline]
21879#[target_feature(enable = "avx512f")]
21880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21881#[cfg_attr(test, assert_instr(vpsllvq))]
21882pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21883    unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
21884}
21885
21886/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21887///
21888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21889#[inline]
21890#[target_feature(enable = "avx512f")]
21891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21892#[cfg_attr(test, assert_instr(vpsllvq))]
21893pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21894    unsafe {
21895        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21896        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21897    }
21898}
21899
21900/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21901///
21902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21903#[inline]
21904#[target_feature(enable = "avx512f")]
21905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21906#[cfg_attr(test, assert_instr(vpsllvq))]
21907pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21908    unsafe {
21909        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21910        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21911    }
21912}
21913
21914/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21915///
21916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21917#[inline]
21918#[target_feature(enable = "avx512f,avx512vl")]
21919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21920#[cfg_attr(test, assert_instr(vpsllvq))]
21921pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21922    unsafe {
21923        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21924        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21925    }
21926}
21927
21928/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21929///
21930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
21931#[inline]
21932#[target_feature(enable = "avx512f,avx512vl")]
21933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21934#[cfg_attr(test, assert_instr(vpsllvq))]
21935pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21936    unsafe {
21937        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21938        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21939    }
21940}
21941
21942/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21943///
21944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
21945#[inline]
21946#[target_feature(enable = "avx512f,avx512vl")]
21947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21948#[cfg_attr(test, assert_instr(vpsllvq))]
21949pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21950    unsafe {
21951        let shf = _mm_sllv_epi64(a, count).as_i64x2();
21952        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21953    }
21954}
21955
21956/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21957///
21958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
21959#[inline]
21960#[target_feature(enable = "avx512f,avx512vl")]
21961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21962#[cfg_attr(test, assert_instr(vpsllvq))]
21963pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21964    unsafe {
21965        let shf = _mm_sllv_epi64(a, count).as_i64x2();
21966        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21967    }
21968}
21969
21970/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21971///
21972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
21973#[inline]
21974#[target_feature(enable = "avx512f")]
21975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21976#[cfg_attr(test, assert_instr(vpsrlvq))]
21977pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
21978    unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
21979}
21980
21981/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21982///
21983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
21984#[inline]
21985#[target_feature(enable = "avx512f")]
21986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21987#[cfg_attr(test, assert_instr(vpsrlvq))]
21988pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21989    unsafe {
21990        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
21991        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21992    }
21993}
21994
21995/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21996///
21997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
21998#[inline]
21999#[target_feature(enable = "avx512f")]
22000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22001#[cfg_attr(test, assert_instr(vpsrlvq))]
22002pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22003    unsafe {
22004        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22005        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22006    }
22007}
22008
22009/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22010///
22011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22012#[inline]
22013#[target_feature(enable = "avx512f,avx512vl")]
22014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22015#[cfg_attr(test, assert_instr(vpsrlvq))]
22016pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22017    unsafe {
22018        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22019        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22020    }
22021}
22022
22023/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22024///
22025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22026#[inline]
22027#[target_feature(enable = "avx512f,avx512vl")]
22028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22029#[cfg_attr(test, assert_instr(vpsrlvq))]
22030pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22031    unsafe {
22032        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22033        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22034    }
22035}
22036
22037/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22038///
22039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22040#[inline]
22041#[target_feature(enable = "avx512f,avx512vl")]
22042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22043#[cfg_attr(test, assert_instr(vpsrlvq))]
22044pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22045    unsafe {
22046        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22047        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22048    }
22049}
22050
22051/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22052///
22053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22054#[inline]
22055#[target_feature(enable = "avx512f,avx512vl")]
22056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22057#[cfg_attr(test, assert_instr(vpsrlvq))]
22058pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22059    unsafe {
22060        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22061        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22062    }
22063}
22064
22065/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22066///
22067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22068#[inline]
22069#[target_feature(enable = "avx512f")]
22070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22071#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22072#[rustc_legacy_const_generics(1)]
22073pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22074    unsafe {
22075        static_assert_uimm_bits!(MASK, 8);
22076        simd_shuffle!(
22077            a,
22078            a,
22079            [
22080                MASK as u32 & 0b11,
22081                (MASK as u32 >> 2) & 0b11,
22082                ((MASK as u32 >> 4) & 0b11),
22083                ((MASK as u32 >> 6) & 0b11),
22084                (MASK as u32 & 0b11) + 4,
22085                ((MASK as u32 >> 2) & 0b11) + 4,
22086                ((MASK as u32 >> 4) & 0b11) + 4,
22087                ((MASK as u32 >> 6) & 0b11) + 4,
22088                (MASK as u32 & 0b11) + 8,
22089                ((MASK as u32 >> 2) & 0b11) + 8,
22090                ((MASK as u32 >> 4) & 0b11) + 8,
22091                ((MASK as u32 >> 6) & 0b11) + 8,
22092                (MASK as u32 & 0b11) + 12,
22093                ((MASK as u32 >> 2) & 0b11) + 12,
22094                ((MASK as u32 >> 4) & 0b11) + 12,
22095                ((MASK as u32 >> 6) & 0b11) + 12,
22096            ],
22097        )
22098    }
22099}
22100
22101/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22102///
22103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22104#[inline]
22105#[target_feature(enable = "avx512f")]
22106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22107#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22108#[rustc_legacy_const_generics(3)]
22109pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22110    unsafe {
22111        static_assert_uimm_bits!(MASK, 8);
22112        let r = _mm512_permute_ps::<MASK>(a);
22113        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22114    }
22115}
22116
22117/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118///
22119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22120#[inline]
22121#[target_feature(enable = "avx512f")]
22122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22123#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22124#[rustc_legacy_const_generics(2)]
22125pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22126    unsafe {
22127        static_assert_uimm_bits!(MASK, 8);
22128        let r = _mm512_permute_ps::<MASK>(a);
22129        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22130    }
22131}
22132
22133/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22134///
22135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22136#[inline]
22137#[target_feature(enable = "avx512f,avx512vl")]
22138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22139#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22140#[rustc_legacy_const_generics(3)]
22141pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22142    unsafe {
22143        let r = _mm256_permute_ps::<MASK>(a);
22144        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22145    }
22146}
22147
22148/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22149///
22150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22151#[inline]
22152#[target_feature(enable = "avx512f,avx512vl")]
22153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22154#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22155#[rustc_legacy_const_generics(2)]
22156pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22157    unsafe {
22158        let r = _mm256_permute_ps::<MASK>(a);
22159        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22160    }
22161}
22162
22163/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22164///
22165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22166#[inline]
22167#[target_feature(enable = "avx512f,avx512vl")]
22168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22169#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22170#[rustc_legacy_const_generics(3)]
22171pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22172    unsafe {
22173        let r = _mm_permute_ps::<MASK>(a);
22174        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22175    }
22176}
22177
22178/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22179///
22180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22181#[inline]
22182#[target_feature(enable = "avx512f,avx512vl")]
22183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22184#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22185#[rustc_legacy_const_generics(2)]
22186pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22187    unsafe {
22188        let r = _mm_permute_ps::<MASK>(a);
22189        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22190    }
22191}
22192
22193/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22194///
22195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22196#[inline]
22197#[target_feature(enable = "avx512f")]
22198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22199#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22200#[rustc_legacy_const_generics(1)]
22201pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22202    unsafe {
22203        static_assert_uimm_bits!(MASK, 8);
22204        simd_shuffle!(
22205            a,
22206            a,
22207            [
22208                MASK as u32 & 0b1,
22209                ((MASK as u32 >> 1) & 0b1),
22210                ((MASK as u32 >> 2) & 0b1) + 2,
22211                ((MASK as u32 >> 3) & 0b1) + 2,
22212                ((MASK as u32 >> 4) & 0b1) + 4,
22213                ((MASK as u32 >> 5) & 0b1) + 4,
22214                ((MASK as u32 >> 6) & 0b1) + 6,
22215                ((MASK as u32 >> 7) & 0b1) + 6,
22216            ],
22217        )
22218    }
22219}
22220
22221/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22222///
22223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22224#[inline]
22225#[target_feature(enable = "avx512f")]
22226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22227#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22228#[rustc_legacy_const_generics(3)]
22229pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22230    unsafe {
22231        static_assert_uimm_bits!(MASK, 8);
22232        let r = _mm512_permute_pd::<MASK>(a);
22233        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22234    }
22235}
22236
22237/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22238///
22239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22240#[inline]
22241#[target_feature(enable = "avx512f")]
22242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22243#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22244#[rustc_legacy_const_generics(2)]
22245pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22246    unsafe {
22247        static_assert_uimm_bits!(MASK, 8);
22248        let r = _mm512_permute_pd::<MASK>(a);
22249        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22250    }
22251}
22252
22253/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22254///
22255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22256#[inline]
22257#[target_feature(enable = "avx512f,avx512vl")]
22258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22259#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22260#[rustc_legacy_const_generics(3)]
22261pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22262    unsafe {
22263        static_assert_uimm_bits!(MASK, 4);
22264        let r = _mm256_permute_pd::<MASK>(a);
22265        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22266    }
22267}
22268
22269/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22270///
22271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22272#[inline]
22273#[target_feature(enable = "avx512f,avx512vl")]
22274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22275#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22276#[rustc_legacy_const_generics(2)]
22277pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22278    unsafe {
22279        static_assert_uimm_bits!(MASK, 4);
22280        let r = _mm256_permute_pd::<MASK>(a);
22281        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22282    }
22283}
22284
22285/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22286///
22287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22288#[inline]
22289#[target_feature(enable = "avx512f,avx512vl")]
22290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22291#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22292#[rustc_legacy_const_generics(3)]
22293pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22294    unsafe {
22295        static_assert_uimm_bits!(IMM2, 2);
22296        let r = _mm_permute_pd::<IMM2>(a);
22297        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
22298    }
22299}
22300
22301/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22302///
22303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22304#[inline]
22305#[target_feature(enable = "avx512f,avx512vl")]
22306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22307#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22308#[rustc_legacy_const_generics(2)]
22309pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22310    unsafe {
22311        static_assert_uimm_bits!(IMM2, 2);
22312        let r = _mm_permute_pd::<IMM2>(a);
22313        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
22314    }
22315}
22316
22317/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22318///
22319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22320#[inline]
22321#[target_feature(enable = "avx512f")]
22322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22323#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22324#[rustc_legacy_const_generics(1)]
22325pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22326    unsafe {
22327        static_assert_uimm_bits!(MASK, 8);
22328        simd_shuffle!(
22329            a,
22330            a,
22331            [
22332                MASK as u32 & 0b11,
22333                (MASK as u32 >> 2) & 0b11,
22334                ((MASK as u32 >> 4) & 0b11),
22335                ((MASK as u32 >> 6) & 0b11),
22336                (MASK as u32 & 0b11) + 4,
22337                ((MASK as u32 >> 2) & 0b11) + 4,
22338                ((MASK as u32 >> 4) & 0b11) + 4,
22339                ((MASK as u32 >> 6) & 0b11) + 4,
22340            ],
22341        )
22342    }
22343}
22344
22345/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22346///
22347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22348#[inline]
22349#[target_feature(enable = "avx512f")]
22350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22351#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22352#[rustc_legacy_const_generics(3)]
22353pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22354    src: __m512i,
22355    k: __mmask8,
22356    a: __m512i,
22357) -> __m512i {
22358    unsafe {
22359        static_assert_uimm_bits!(MASK, 8);
22360        let r = _mm512_permutex_epi64::<MASK>(a);
22361        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
22362    }
22363}
22364
22365/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22366///
22367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22368#[inline]
22369#[target_feature(enable = "avx512f")]
22370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22371#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22372#[rustc_legacy_const_generics(2)]
22373pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22374    unsafe {
22375        static_assert_uimm_bits!(MASK, 8);
22376        let r = _mm512_permutex_epi64::<MASK>(a);
22377        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
22378    }
22379}
22380
22381/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22382///
22383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22384#[inline]
22385#[target_feature(enable = "avx512f,avx512vl")]
22386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22387#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22388#[rustc_legacy_const_generics(1)]
22389pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22390    unsafe {
22391        static_assert_uimm_bits!(MASK, 8);
22392        simd_shuffle!(
22393            a,
22394            a,
22395            [
22396                MASK as u32 & 0b11,
22397                (MASK as u32 >> 2) & 0b11,
22398                ((MASK as u32 >> 4) & 0b11),
22399                ((MASK as u32 >> 6) & 0b11),
22400            ],
22401        )
22402    }
22403}
22404
22405/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22406///
22407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22408#[inline]
22409#[target_feature(enable = "avx512f,avx512vl")]
22410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22411#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22412#[rustc_legacy_const_generics(3)]
22413pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22414    src: __m256i,
22415    k: __mmask8,
22416    a: __m256i,
22417) -> __m256i {
22418    unsafe {
22419        static_assert_uimm_bits!(MASK, 8);
22420        let r = _mm256_permutex_epi64::<MASK>(a);
22421        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
22422    }
22423}
22424
22425/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22426///
22427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22428#[inline]
22429#[target_feature(enable = "avx512f,avx512vl")]
22430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22431#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22432#[rustc_legacy_const_generics(2)]
22433pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22434    unsafe {
22435        static_assert_uimm_bits!(MASK, 8);
22436        let r = _mm256_permutex_epi64::<MASK>(a);
22437        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
22438    }
22439}
22440
22441/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22442///
22443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22444#[inline]
22445#[target_feature(enable = "avx512f")]
22446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22447#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22448#[rustc_legacy_const_generics(1)]
22449pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22450    unsafe {
22451        static_assert_uimm_bits!(MASK, 8);
22452        simd_shuffle!(
22453            a,
22454            a,
22455            [
22456                MASK as u32 & 0b11,
22457                (MASK as u32 >> 2) & 0b11,
22458                ((MASK as u32 >> 4) & 0b11),
22459                ((MASK as u32 >> 6) & 0b11),
22460                (MASK as u32 & 0b11) + 4,
22461                ((MASK as u32 >> 2) & 0b11) + 4,
22462                ((MASK as u32 >> 4) & 0b11) + 4,
22463                ((MASK as u32 >> 6) & 0b11) + 4,
22464            ],
22465        )
22466    }
22467}
22468
22469/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22470///
22471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22472#[inline]
22473#[target_feature(enable = "avx512f")]
22474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22475#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22476#[rustc_legacy_const_generics(3)]
22477pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22478    unsafe {
22479        let r = _mm512_permutex_pd::<MASK>(a);
22480        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22481    }
22482}
22483
22484/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22485///
22486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22487#[inline]
22488#[target_feature(enable = "avx512f")]
22489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22490#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22491#[rustc_legacy_const_generics(2)]
22492pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22493    unsafe {
22494        let r = _mm512_permutex_pd::<MASK>(a);
22495        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22496    }
22497}
22498
22499/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22500///
22501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22502#[inline]
22503#[target_feature(enable = "avx512f,avx512vl")]
22504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22505#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22506#[rustc_legacy_const_generics(1)]
22507pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22508    unsafe {
22509        static_assert_uimm_bits!(MASK, 8);
22510        simd_shuffle!(
22511            a,
22512            a,
22513            [
22514                MASK as u32 & 0b11,
22515                (MASK as u32 >> 2) & 0b11,
22516                ((MASK as u32 >> 4) & 0b11),
22517                ((MASK as u32 >> 6) & 0b11),
22518            ],
22519        )
22520    }
22521}
22522
22523/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22524///
22525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22526#[inline]
22527#[target_feature(enable = "avx512f,avx512vl")]
22528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22529#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22530#[rustc_legacy_const_generics(3)]
22531pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22532    unsafe {
22533        static_assert_uimm_bits!(MASK, 8);
22534        let r = _mm256_permutex_pd::<MASK>(a);
22535        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22536    }
22537}
22538
22539/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22540///
22541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22542#[inline]
22543#[target_feature(enable = "avx512f,avx512vl")]
22544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22545#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22546#[rustc_legacy_const_generics(2)]
22547pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22548    unsafe {
22549        static_assert_uimm_bits!(MASK, 8);
22550        let r = _mm256_permutex_pd::<MASK>(a);
22551        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22552    }
22553}
22554
22555/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22556///
22557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22558#[inline]
22559#[target_feature(enable = "avx512f")]
22560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22561#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22562pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22563    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22564}
22565
22566/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22567///
22568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22569#[inline]
22570#[target_feature(enable = "avx512f")]
22571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22572#[cfg_attr(test, assert_instr(vpermd))]
22573pub fn _mm512_mask_permutevar_epi32(
22574    src: __m512i,
22575    k: __mmask16,
22576    idx: __m512i,
22577    a: __m512i,
22578) -> __m512i {
22579    unsafe {
22580        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
22581        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22582    }
22583}
22584
22585/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22586///
22587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22588#[inline]
22589#[target_feature(enable = "avx512f")]
22590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22591#[cfg_attr(test, assert_instr(vpermilps))]
22592pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22593    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
22594}
22595
22596/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22597///
22598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22599#[inline]
22600#[target_feature(enable = "avx512f")]
22601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22602#[cfg_attr(test, assert_instr(vpermilps))]
22603pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22604    unsafe {
22605        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22606        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22607    }
22608}
22609
22610/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22611///
22612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22613#[inline]
22614#[target_feature(enable = "avx512f")]
22615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22616#[cfg_attr(test, assert_instr(vpermilps))]
22617pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22618    unsafe {
22619        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22620        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22621    }
22622}
22623
22624/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22625///
22626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22627#[inline]
22628#[target_feature(enable = "avx512f,avx512vl")]
22629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22630#[cfg_attr(test, assert_instr(vpermilps))]
22631pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22632    unsafe {
22633        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22634        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
22635    }
22636}
22637
22638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22639///
22640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22641#[inline]
22642#[target_feature(enable = "avx512f,avx512vl")]
22643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22644#[cfg_attr(test, assert_instr(vpermilps))]
22645pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22646    unsafe {
22647        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22648        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
22649    }
22650}
22651
22652/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22653///
22654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22655#[inline]
22656#[target_feature(enable = "avx512f,avx512vl")]
22657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22658#[cfg_attr(test, assert_instr(vpermilps))]
22659pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22660    unsafe {
22661        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22662        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
22663    }
22664}
22665
22666/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22667///
22668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22669#[inline]
22670#[target_feature(enable = "avx512f,avx512vl")]
22671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22672#[cfg_attr(test, assert_instr(vpermilps))]
22673pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22674    unsafe {
22675        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22676        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
22677    }
22678}
22679
22680/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22681///
22682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22683#[inline]
22684#[target_feature(enable = "avx512f")]
22685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22686#[cfg_attr(test, assert_instr(vpermilpd))]
22687pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22688    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22689}
22690
22691/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22692///
22693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22694#[inline]
22695#[target_feature(enable = "avx512f")]
22696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22697#[cfg_attr(test, assert_instr(vpermilpd))]
22698pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22699    unsafe {
22700        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22701        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
22702    }
22703}
22704
22705/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22706///
22707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22708#[inline]
22709#[target_feature(enable = "avx512f")]
22710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22711#[cfg_attr(test, assert_instr(vpermilpd))]
22712pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22713    unsafe {
22714        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22715        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
22716    }
22717}
22718
22719/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22720///
22721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22722#[inline]
22723#[target_feature(enable = "avx512f,avx512vl")]
22724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22725#[cfg_attr(test, assert_instr(vpermilpd))]
22726pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22727    unsafe {
22728        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22729        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
22730    }
22731}
22732
22733/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22734///
22735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22736#[inline]
22737#[target_feature(enable = "avx512f,avx512vl")]
22738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22739#[cfg_attr(test, assert_instr(vpermilpd))]
22740pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22741    unsafe {
22742        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22743        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
22744    }
22745}
22746
22747/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22748///
22749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22750#[inline]
22751#[target_feature(enable = "avx512f,avx512vl")]
22752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22753#[cfg_attr(test, assert_instr(vpermilpd))]
22754pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22755    unsafe {
22756        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22757        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
22758    }
22759}
22760
22761/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22762///
22763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22764#[inline]
22765#[target_feature(enable = "avx512f,avx512vl")]
22766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22767#[cfg_attr(test, assert_instr(vpermilpd))]
22768pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22769    unsafe {
22770        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22771        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
22772    }
22773}
22774
22775/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22778#[inline]
22779#[target_feature(enable = "avx512f")]
22780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22781#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22782pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22783    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22784}
22785
22786/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22787///
22788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22789#[inline]
22790#[target_feature(enable = "avx512f")]
22791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22792#[cfg_attr(test, assert_instr(vpermd))]
22793pub fn _mm512_mask_permutexvar_epi32(
22794    src: __m512i,
22795    k: __mmask16,
22796    idx: __m512i,
22797    a: __m512i,
22798) -> __m512i {
22799    unsafe {
22800        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22801        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22802    }
22803}
22804
22805/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22806///
22807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22808#[inline]
22809#[target_feature(enable = "avx512f")]
22810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22811#[cfg_attr(test, assert_instr(vpermd))]
22812pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22813    unsafe {
22814        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22815        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
22816    }
22817}
22818
22819/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22820///
22821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22822#[inline]
22823#[target_feature(enable = "avx512f,avx512vl")]
22824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22825#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22826pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22827    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
22828}
22829
22830/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22831///
22832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22833#[inline]
22834#[target_feature(enable = "avx512f,avx512vl")]
22835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22836#[cfg_attr(test, assert_instr(vpermd))]
22837pub fn _mm256_mask_permutexvar_epi32(
22838    src: __m256i,
22839    k: __mmask8,
22840    idx: __m256i,
22841    a: __m256i,
22842) -> __m256i {
22843    unsafe {
22844        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22845        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
22846    }
22847}
22848
22849/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22850///
22851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22852#[inline]
22853#[target_feature(enable = "avx512f,avx512vl")]
22854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22855#[cfg_attr(test, assert_instr(vpermd))]
22856pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22857    unsafe {
22858        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22859        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
22860    }
22861}
22862
22863/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22864///
22865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22866#[inline]
22867#[target_feature(enable = "avx512f")]
22868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22869#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22870pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22871    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
22872}
22873
22874/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22875///
22876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22877#[inline]
22878#[target_feature(enable = "avx512f")]
22879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22880#[cfg_attr(test, assert_instr(vpermq))]
22881pub fn _mm512_mask_permutexvar_epi64(
22882    src: __m512i,
22883    k: __mmask8,
22884    idx: __m512i,
22885    a: __m512i,
22886) -> __m512i {
22887    unsafe {
22888        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22889        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
22890    }
22891}
22892
22893/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22894///
22895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22896#[inline]
22897#[target_feature(enable = "avx512f")]
22898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22899#[cfg_attr(test, assert_instr(vpermq))]
22900pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22901    unsafe {
22902        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22903        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
22904    }
22905}
22906
22907/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22908///
22909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22910#[inline]
22911#[target_feature(enable = "avx512f,avx512vl")]
22912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22913#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22914pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22915    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22916}
22917
22918/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22919///
22920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22921#[inline]
22922#[target_feature(enable = "avx512f,avx512vl")]
22923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22924#[cfg_attr(test, assert_instr(vpermq))]
22925pub fn _mm256_mask_permutexvar_epi64(
22926    src: __m256i,
22927    k: __mmask8,
22928    idx: __m256i,
22929    a: __m256i,
22930) -> __m256i {
22931    unsafe {
22932        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22933        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
22934    }
22935}
22936
22937/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22938///
22939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
22940#[inline]
22941#[target_feature(enable = "avx512f,avx512vl")]
22942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22943#[cfg_attr(test, assert_instr(vpermq))]
22944pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22945    unsafe {
22946        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22947        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
22948    }
22949}
22950
22951/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
22954#[inline]
22955#[target_feature(enable = "avx512f")]
22956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22957#[cfg_attr(test, assert_instr(vpermps))]
22958pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
22959    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
22960}
22961
22962/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22963///
22964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
22965#[inline]
22966#[target_feature(enable = "avx512f")]
22967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22968#[cfg_attr(test, assert_instr(vpermps))]
22969pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22970    unsafe {
22971        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
22972        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22973    }
22974}
22975
22976/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22977///
22978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
22979#[inline]
22980#[target_feature(enable = "avx512f")]
22981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22982#[cfg_attr(test, assert_instr(vpermps))]
22983pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22984    unsafe {
22985        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
22986        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22987    }
22988}
22989
22990/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22991///
22992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
22993#[inline]
22994#[target_feature(enable = "avx512f,avx512vl")]
22995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22996#[cfg_attr(test, assert_instr(vpermps))]
22997pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
22998    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
22999}
23000
23001/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23002///
23003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23004#[inline]
23005#[target_feature(enable = "avx512f,avx512vl")]
23006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23007#[cfg_attr(test, assert_instr(vpermps))]
23008pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23009    unsafe {
23010        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23011        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23012    }
23013}
23014
23015/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23016///
23017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23018#[inline]
23019#[target_feature(enable = "avx512f,avx512vl")]
23020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23021#[cfg_attr(test, assert_instr(vpermps))]
23022pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23023    unsafe {
23024        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23025        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23026    }
23027}
23028
23029/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23030///
23031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23032#[inline]
23033#[target_feature(enable = "avx512f")]
23034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23035#[cfg_attr(test, assert_instr(vpermpd))]
23036pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23037    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23038}
23039
23040/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23041///
23042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23043#[inline]
23044#[target_feature(enable = "avx512f")]
23045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23046#[cfg_attr(test, assert_instr(vpermpd))]
23047pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23048    unsafe {
23049        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23050        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23051    }
23052}
23053
23054/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23055///
23056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23057#[inline]
23058#[target_feature(enable = "avx512f")]
23059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23060#[cfg_attr(test, assert_instr(vpermpd))]
23061pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23062    unsafe {
23063        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23064        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23065    }
23066}
23067
23068/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23069///
23070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23071#[inline]
23072#[target_feature(enable = "avx512f,avx512vl")]
23073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23074#[cfg_attr(test, assert_instr(vpermpd))]
23075pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23076    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23077}
23078
23079/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23080///
23081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23082#[inline]
23083#[target_feature(enable = "avx512f,avx512vl")]
23084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23085#[cfg_attr(test, assert_instr(vpermpd))]
23086pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23087    unsafe {
23088        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23089        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23090    }
23091}
23092
23093/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23094///
23095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23096#[inline]
23097#[target_feature(enable = "avx512f,avx512vl")]
23098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23099#[cfg_attr(test, assert_instr(vpermpd))]
23100pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23101    unsafe {
23102        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23103        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23104    }
23105}
23106
23107/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23108///
23109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23110#[inline]
23111#[target_feature(enable = "avx512f")]
23112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23113#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23114pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23115    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23116}
23117
23118/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23119///
23120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23121#[inline]
23122#[target_feature(enable = "avx512f")]
23123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23124#[cfg_attr(test, assert_instr(vpermt2d))]
23125pub fn _mm512_mask_permutex2var_epi32(
23126    a: __m512i,
23127    k: __mmask16,
23128    idx: __m512i,
23129    b: __m512i,
23130) -> __m512i {
23131    unsafe {
23132        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23133        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23134    }
23135}
23136
23137/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23138///
23139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23140#[inline]
23141#[target_feature(enable = "avx512f")]
23142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23143#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23144pub fn _mm512_maskz_permutex2var_epi32(
23145    k: __mmask16,
23146    a: __m512i,
23147    idx: __m512i,
23148    b: __m512i,
23149) -> __m512i {
23150    unsafe {
23151        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23152        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23153    }
23154}
23155
23156/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23157///
23158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23159#[inline]
23160#[target_feature(enable = "avx512f")]
23161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23162#[cfg_attr(test, assert_instr(vpermi2d))]
23163pub fn _mm512_mask2_permutex2var_epi32(
23164    a: __m512i,
23165    idx: __m512i,
23166    k: __mmask16,
23167    b: __m512i,
23168) -> __m512i {
23169    unsafe {
23170        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23171        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
23172    }
23173}
23174
23175/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23176///
23177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23178#[inline]
23179#[target_feature(enable = "avx512f,avx512vl")]
23180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23181#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23182pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23183    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23184}
23185
23186/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23187///
23188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23189#[inline]
23190#[target_feature(enable = "avx512f,avx512vl")]
23191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23192#[cfg_attr(test, assert_instr(vpermt2d))]
23193pub fn _mm256_mask_permutex2var_epi32(
23194    a: __m256i,
23195    k: __mmask8,
23196    idx: __m256i,
23197    b: __m256i,
23198) -> __m256i {
23199    unsafe {
23200        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23201        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
23202    }
23203}
23204
23205/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23206///
23207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23208#[inline]
23209#[target_feature(enable = "avx512f,avx512vl")]
23210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23211#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23212pub fn _mm256_maskz_permutex2var_epi32(
23213    k: __mmask8,
23214    a: __m256i,
23215    idx: __m256i,
23216    b: __m256i,
23217) -> __m256i {
23218    unsafe {
23219        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23220        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23221    }
23222}
23223
23224/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23225///
23226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23227#[inline]
23228#[target_feature(enable = "avx512f,avx512vl")]
23229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23230#[cfg_attr(test, assert_instr(vpermi2d))]
23231pub fn _mm256_mask2_permutex2var_epi32(
23232    a: __m256i,
23233    idx: __m256i,
23234    k: __mmask8,
23235    b: __m256i,
23236) -> __m256i {
23237    unsafe {
23238        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23239        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
23240    }
23241}
23242
23243/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23244///
23245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23246#[inline]
23247#[target_feature(enable = "avx512f,avx512vl")]
23248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23249#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23250pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23251    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23252}
23253
23254/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23255///
23256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23257#[inline]
23258#[target_feature(enable = "avx512f,avx512vl")]
23259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23260#[cfg_attr(test, assert_instr(vpermt2d))]
23261pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23262    unsafe {
23263        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23264        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
23265    }
23266}
23267
23268/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23269///
23270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23271#[inline]
23272#[target_feature(enable = "avx512f,avx512vl")]
23273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23274#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23275pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23276    unsafe {
23277        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23278        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
23279    }
23280}
23281
23282/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23283///
23284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23285#[inline]
23286#[target_feature(enable = "avx512f,avx512vl")]
23287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23288#[cfg_attr(test, assert_instr(vpermi2d))]
23289pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23290    unsafe {
23291        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23292        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
23293    }
23294}
23295
23296/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23297///
23298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23299#[inline]
23300#[target_feature(enable = "avx512f")]
23301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23302#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23303pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23304    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23305}
23306
23307/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23308///
23309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23310#[inline]
23311#[target_feature(enable = "avx512f")]
23312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23313#[cfg_attr(test, assert_instr(vpermt2q))]
23314pub fn _mm512_mask_permutex2var_epi64(
23315    a: __m512i,
23316    k: __mmask8,
23317    idx: __m512i,
23318    b: __m512i,
23319) -> __m512i {
23320    unsafe {
23321        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23322        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
23323    }
23324}
23325
23326/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23327///
23328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23329#[inline]
23330#[target_feature(enable = "avx512f")]
23331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23332#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23333pub fn _mm512_maskz_permutex2var_epi64(
23334    k: __mmask8,
23335    a: __m512i,
23336    idx: __m512i,
23337    b: __m512i,
23338) -> __m512i {
23339    unsafe {
23340        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23341        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23342    }
23343}
23344
23345/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23346///
23347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23348#[inline]
23349#[target_feature(enable = "avx512f")]
23350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23351#[cfg_attr(test, assert_instr(vpermi2q))]
23352pub fn _mm512_mask2_permutex2var_epi64(
23353    a: __m512i,
23354    idx: __m512i,
23355    k: __mmask8,
23356    b: __m512i,
23357) -> __m512i {
23358    unsafe {
23359        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23360        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
23361    }
23362}
23363
23364/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23365///
23366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23367#[inline]
23368#[target_feature(enable = "avx512f,avx512vl")]
23369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23370#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23371pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23372    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23373}
23374
23375/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23376///
23377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23378#[inline]
23379#[target_feature(enable = "avx512f,avx512vl")]
23380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23381#[cfg_attr(test, assert_instr(vpermt2q))]
23382pub fn _mm256_mask_permutex2var_epi64(
23383    a: __m256i,
23384    k: __mmask8,
23385    idx: __m256i,
23386    b: __m256i,
23387) -> __m256i {
23388    unsafe {
23389        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23390        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
23391    }
23392}
23393
23394/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23395///
23396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23397#[inline]
23398#[target_feature(enable = "avx512f,avx512vl")]
23399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23400#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23401pub fn _mm256_maskz_permutex2var_epi64(
23402    k: __mmask8,
23403    a: __m256i,
23404    idx: __m256i,
23405    b: __m256i,
23406) -> __m256i {
23407    unsafe {
23408        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23409        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23410    }
23411}
23412
23413/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23414///
23415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23416#[inline]
23417#[target_feature(enable = "avx512f,avx512vl")]
23418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23419#[cfg_attr(test, assert_instr(vpermi2q))]
23420pub fn _mm256_mask2_permutex2var_epi64(
23421    a: __m256i,
23422    idx: __m256i,
23423    k: __mmask8,
23424    b: __m256i,
23425) -> __m256i {
23426    unsafe {
23427        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23428        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
23429    }
23430}
23431
23432/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23433///
23434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23435#[inline]
23436#[target_feature(enable = "avx512f,avx512vl")]
23437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23438#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23439pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23440    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23441}
23442
23443/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23444///
23445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23446#[inline]
23447#[target_feature(enable = "avx512f,avx512vl")]
23448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23449#[cfg_attr(test, assert_instr(vpermt2q))]
23450pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23451    unsafe {
23452        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23453        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
23454    }
23455}
23456
23457/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23458///
23459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23460#[inline]
23461#[target_feature(enable = "avx512f,avx512vl")]
23462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23463#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23464pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23465    unsafe {
23466        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23467        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
23468    }
23469}
23470
23471/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23472///
23473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23474#[inline]
23475#[target_feature(enable = "avx512f,avx512vl")]
23476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23477#[cfg_attr(test, assert_instr(vpermi2q))]
23478pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23479    unsafe {
23480        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23481        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
23482    }
23483}
23484
23485/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23486///
23487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23488#[inline]
23489#[target_feature(enable = "avx512f")]
23490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23491#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23492pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23493    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23494}
23495
23496/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23497///
23498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23499#[inline]
23500#[target_feature(enable = "avx512f")]
23501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23502#[cfg_attr(test, assert_instr(vpermt2ps))]
23503pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23504    unsafe {
23505        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23506        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
23507    }
23508}
23509
23510/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23511///
23512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23513#[inline]
23514#[target_feature(enable = "avx512f")]
23515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23516#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23517pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23518    unsafe {
23519        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23520        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23521    }
23522}
23523
23524/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23525///
23526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23527#[inline]
23528#[target_feature(enable = "avx512f")]
23529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23530#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23531pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23532    unsafe {
23533        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23534        let idx = _mm512_castsi512_ps(idx).as_f32x16();
23535        transmute(simd_select_bitmask(k, permute, idx))
23536    }
23537}
23538
23539/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23540///
23541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23542#[inline]
23543#[target_feature(enable = "avx512f,avx512vl")]
23544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23545#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23546pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23547    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23548}
23549
23550/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23551///
23552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23553#[inline]
23554#[target_feature(enable = "avx512f,avx512vl")]
23555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23556#[cfg_attr(test, assert_instr(vpermt2ps))]
23557pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23558    unsafe {
23559        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23560        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
23561    }
23562}
23563
23564/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23565///
23566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23567#[inline]
23568#[target_feature(enable = "avx512f,avx512vl")]
23569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23570#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23571pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23572    unsafe {
23573        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23574        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23575    }
23576}
23577
23578/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23579///
23580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23581#[inline]
23582#[target_feature(enable = "avx512f,avx512vl")]
23583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23584#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23585pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23586    unsafe {
23587        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23588        let idx = _mm256_castsi256_ps(idx).as_f32x8();
23589        transmute(simd_select_bitmask(k, permute, idx))
23590    }
23591}
23592
23593/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23594///
23595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23596#[inline]
23597#[target_feature(enable = "avx512f,avx512vl")]
23598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23599#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23600pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23601    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23602}
23603
23604/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23605///
23606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23607#[inline]
23608#[target_feature(enable = "avx512f,avx512vl")]
23609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23610#[cfg_attr(test, assert_instr(vpermt2ps))]
23611pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23612    unsafe {
23613        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23614        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
23615    }
23616}
23617
23618/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23619///
23620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23621#[inline]
23622#[target_feature(enable = "avx512f,avx512vl")]
23623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23624#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23625pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23626    unsafe {
23627        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23628        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23629    }
23630}
23631
23632/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23633///
23634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23635#[inline]
23636#[target_feature(enable = "avx512f,avx512vl")]
23637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23638#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23639pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23640    unsafe {
23641        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23642        let idx = _mm_castsi128_ps(idx).as_f32x4();
23643        transmute(simd_select_bitmask(k, permute, idx))
23644    }
23645}
23646
23647/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23648///
23649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23650#[inline]
23651#[target_feature(enable = "avx512f")]
23652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23653#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23654pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23655    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23656}
23657
23658/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23659///
23660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23661#[inline]
23662#[target_feature(enable = "avx512f")]
23663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23664#[cfg_attr(test, assert_instr(vpermt2pd))]
23665pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23666    unsafe {
23667        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23668        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
23669    }
23670}
23671
23672/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23673///
23674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23675#[inline]
23676#[target_feature(enable = "avx512f")]
23677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23678#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23679pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23680    unsafe {
23681        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23682        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23683    }
23684}
23685
23686/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23687///
23688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23689#[inline]
23690#[target_feature(enable = "avx512f")]
23691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23692#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23693pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23694    unsafe {
23695        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23696        let idx = _mm512_castsi512_pd(idx).as_f64x8();
23697        transmute(simd_select_bitmask(k, permute, idx))
23698    }
23699}
23700
23701/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23702///
23703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23704#[inline]
23705#[target_feature(enable = "avx512f,avx512vl")]
23706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23707#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23708pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23709    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23710}
23711
23712/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23713///
23714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23715#[inline]
23716#[target_feature(enable = "avx512f,avx512vl")]
23717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23718#[cfg_attr(test, assert_instr(vpermt2pd))]
23719pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23720    unsafe {
23721        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23722        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
23723    }
23724}
23725
23726/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23727///
23728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23729#[inline]
23730#[target_feature(enable = "avx512f,avx512vl")]
23731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23732#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23733pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23734    unsafe {
23735        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23736        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23737    }
23738}
23739
23740/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23741///
23742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23743#[inline]
23744#[target_feature(enable = "avx512f,avx512vl")]
23745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23746#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23747pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23748    unsafe {
23749        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23750        let idx = _mm256_castsi256_pd(idx).as_f64x4();
23751        transmute(simd_select_bitmask(k, permute, idx))
23752    }
23753}
23754
23755/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23756///
23757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23758#[inline]
23759#[target_feature(enable = "avx512f,avx512vl")]
23760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23761#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23762pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23763    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23764}
23765
23766/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23767///
23768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23769#[inline]
23770#[target_feature(enable = "avx512f,avx512vl")]
23771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23772#[cfg_attr(test, assert_instr(vpermt2pd))]
23773pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23774    unsafe {
23775        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23776        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
23777    }
23778}
23779
23780/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23781///
23782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23783#[inline]
23784#[target_feature(enable = "avx512f,avx512vl")]
23785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23786#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23787pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23788    unsafe {
23789        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23790        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23791    }
23792}
23793
23794/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23795///
23796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23797#[inline]
23798#[target_feature(enable = "avx512f,avx512vl")]
23799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23800#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23801pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23802    unsafe {
23803        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23804        let idx = _mm_castsi128_pd(idx).as_f64x2();
23805        transmute(simd_select_bitmask(k, permute, idx))
23806    }
23807}
23808
23809/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23810///
23811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23812#[inline]
23813#[target_feature(enable = "avx512f")]
23814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23815#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
23816#[rustc_legacy_const_generics(1)]
23817pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23818    unsafe {
23819        static_assert_uimm_bits!(MASK, 8);
23820        let r: i32x16 = simd_shuffle!(
23821            a.as_i32x16(),
23822            a.as_i32x16(),
23823            [
23824                MASK as u32 & 0b11,
23825                (MASK as u32 >> 2) & 0b11,
23826                (MASK as u32 >> 4) & 0b11,
23827                (MASK as u32 >> 6) & 0b11,
23828                (MASK as u32 & 0b11) + 4,
23829                ((MASK as u32 >> 2) & 0b11) + 4,
23830                ((MASK as u32 >> 4) & 0b11) + 4,
23831                ((MASK as u32 >> 6) & 0b11) + 4,
23832                (MASK as u32 & 0b11) + 8,
23833                ((MASK as u32 >> 2) & 0b11) + 8,
23834                ((MASK as u32 >> 4) & 0b11) + 8,
23835                ((MASK as u32 >> 6) & 0b11) + 8,
23836                (MASK as u32 & 0b11) + 12,
23837                ((MASK as u32 >> 2) & 0b11) + 12,
23838                ((MASK as u32 >> 4) & 0b11) + 12,
23839                ((MASK as u32 >> 6) & 0b11) + 12,
23840            ],
23841        );
23842        transmute(r)
23843    }
23844}
23845
23846/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23847///
23848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23849#[inline]
23850#[target_feature(enable = "avx512f")]
23851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23852#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23853#[rustc_legacy_const_generics(3)]
23854pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23855    src: __m512i,
23856    k: __mmask16,
23857    a: __m512i,
23858) -> __m512i {
23859    unsafe {
23860        static_assert_uimm_bits!(MASK, 8);
23861        let r = _mm512_shuffle_epi32::<MASK>(a);
23862        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
23863    }
23864}
23865
23866/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23867///
23868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23869#[inline]
23870#[target_feature(enable = "avx512f")]
23871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23872#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23873#[rustc_legacy_const_generics(2)]
23874pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23875    unsafe {
23876        static_assert_uimm_bits!(MASK, 8);
23877        let r = _mm512_shuffle_epi32::<MASK>(a);
23878        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
23879    }
23880}
23881
23882/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23883///
23884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23885#[inline]
23886#[target_feature(enable = "avx512f,avx512vl")]
23887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23888#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23889#[rustc_legacy_const_generics(3)]
23890pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23891    src: __m256i,
23892    k: __mmask8,
23893    a: __m256i,
23894) -> __m256i {
23895    unsafe {
23896        static_assert_uimm_bits!(MASK, 8);
23897        let r = _mm256_shuffle_epi32::<MASK>(a);
23898        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
23899    }
23900}
23901
23902/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23903///
23904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23905#[inline]
23906#[target_feature(enable = "avx512f,avx512vl")]
23907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23908#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23909#[rustc_legacy_const_generics(2)]
23910pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23911    unsafe {
23912        static_assert_uimm_bits!(MASK, 8);
23913        let r = _mm256_shuffle_epi32::<MASK>(a);
23914        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
23915    }
23916}
23917
23918/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23919///
23920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23921#[inline]
23922#[target_feature(enable = "avx512f,avx512vl")]
23923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23924#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23925#[rustc_legacy_const_generics(3)]
23926pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23927    src: __m128i,
23928    k: __mmask8,
23929    a: __m128i,
23930) -> __m128i {
23931    unsafe {
23932        static_assert_uimm_bits!(MASK, 8);
23933        let r = _mm_shuffle_epi32::<MASK>(a);
23934        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
23935    }
23936}
23937
23938/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23939///
23940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
23941#[inline]
23942#[target_feature(enable = "avx512f,avx512vl")]
23943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23944#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23945#[rustc_legacy_const_generics(2)]
23946pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
23947    unsafe {
23948        static_assert_uimm_bits!(MASK, 8);
23949        let r = _mm_shuffle_epi32::<MASK>(a);
23950        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
23951    }
23952}
23953
23954/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23955///
23956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
23957#[inline]
23958#[target_feature(enable = "avx512f")]
23959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23960#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
23961#[rustc_legacy_const_generics(2)]
23962pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
23963    unsafe {
23964        static_assert_uimm_bits!(MASK, 8);
23965        simd_shuffle!(
23966            a,
23967            b,
23968            [
23969                MASK as u32 & 0b11,
23970                (MASK as u32 >> 2) & 0b11,
23971                ((MASK as u32 >> 4) & 0b11) + 16,
23972                ((MASK as u32 >> 6) & 0b11) + 16,
23973                (MASK as u32 & 0b11) + 4,
23974                ((MASK as u32 >> 2) & 0b11) + 4,
23975                ((MASK as u32 >> 4) & 0b11) + 20,
23976                ((MASK as u32 >> 6) & 0b11) + 20,
23977                (MASK as u32 & 0b11) + 8,
23978                ((MASK as u32 >> 2) & 0b11) + 8,
23979                ((MASK as u32 >> 4) & 0b11) + 24,
23980                ((MASK as u32 >> 6) & 0b11) + 24,
23981                (MASK as u32 & 0b11) + 12,
23982                ((MASK as u32 >> 2) & 0b11) + 12,
23983                ((MASK as u32 >> 4) & 0b11) + 28,
23984                ((MASK as u32 >> 6) & 0b11) + 28,
23985            ],
23986        )
23987    }
23988}
23989
23990/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23991///
23992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
23993#[inline]
23994#[target_feature(enable = "avx512f")]
23995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23996#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
23997#[rustc_legacy_const_generics(4)]
23998pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
23999    src: __m512,
24000    k: __mmask16,
24001    a: __m512,
24002    b: __m512,
24003) -> __m512 {
24004    unsafe {
24005        static_assert_uimm_bits!(MASK, 8);
24006        let r = _mm512_shuffle_ps::<MASK>(a, b);
24007        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24008    }
24009}
24010
24011/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24012///
24013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24014#[inline]
24015#[target_feature(enable = "avx512f")]
24016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24017#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24018#[rustc_legacy_const_generics(3)]
24019pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24020    unsafe {
24021        static_assert_uimm_bits!(MASK, 8);
24022        let r = _mm512_shuffle_ps::<MASK>(a, b);
24023        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24024    }
24025}
24026
24027/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24028///
24029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24030#[inline]
24031#[target_feature(enable = "avx512f,avx512vl")]
24032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24033#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24034#[rustc_legacy_const_generics(4)]
24035pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24036    src: __m256,
24037    k: __mmask8,
24038    a: __m256,
24039    b: __m256,
24040) -> __m256 {
24041    unsafe {
24042        static_assert_uimm_bits!(MASK, 8);
24043        let r = _mm256_shuffle_ps::<MASK>(a, b);
24044        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24045    }
24046}
24047
24048/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24049///
24050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24051#[inline]
24052#[target_feature(enable = "avx512f,avx512vl")]
24053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24054#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24055#[rustc_legacy_const_generics(3)]
24056pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24057    unsafe {
24058        static_assert_uimm_bits!(MASK, 8);
24059        let r = _mm256_shuffle_ps::<MASK>(a, b);
24060        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24061    }
24062}
24063
24064/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065///
24066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24067#[inline]
24068#[target_feature(enable = "avx512f,avx512vl")]
24069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24070#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24071#[rustc_legacy_const_generics(4)]
24072pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24073    src: __m128,
24074    k: __mmask8,
24075    a: __m128,
24076    b: __m128,
24077) -> __m128 {
24078    unsafe {
24079        static_assert_uimm_bits!(MASK, 8);
24080        let r = _mm_shuffle_ps::<MASK>(a, b);
24081        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24082    }
24083}
24084
24085/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24086///
24087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24088#[inline]
24089#[target_feature(enable = "avx512f,avx512vl")]
24090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24091#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24092#[rustc_legacy_const_generics(3)]
24093pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24094    unsafe {
24095        static_assert_uimm_bits!(MASK, 8);
24096        let r = _mm_shuffle_ps::<MASK>(a, b);
24097        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24098    }
24099}
24100
24101/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24102///
24103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24104#[inline]
24105#[target_feature(enable = "avx512f")]
24106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24107#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24108#[rustc_legacy_const_generics(2)]
24109pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24110    unsafe {
24111        static_assert_uimm_bits!(MASK, 8);
24112        simd_shuffle!(
24113            a,
24114            b,
24115            [
24116                MASK as u32 & 0b1,
24117                ((MASK as u32 >> 1) & 0b1) + 8,
24118                ((MASK as u32 >> 2) & 0b1) + 2,
24119                ((MASK as u32 >> 3) & 0b1) + 10,
24120                ((MASK as u32 >> 4) & 0b1) + 4,
24121                ((MASK as u32 >> 5) & 0b1) + 12,
24122                ((MASK as u32 >> 6) & 0b1) + 6,
24123                ((MASK as u32 >> 7) & 0b1) + 14,
24124            ],
24125        )
24126    }
24127}
24128
24129/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24130///
24131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24132#[inline]
24133#[target_feature(enable = "avx512f")]
24134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24135#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24136#[rustc_legacy_const_generics(4)]
24137pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24138    src: __m512d,
24139    k: __mmask8,
24140    a: __m512d,
24141    b: __m512d,
24142) -> __m512d {
24143    unsafe {
24144        static_assert_uimm_bits!(MASK, 8);
24145        let r = _mm512_shuffle_pd::<MASK>(a, b);
24146        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24147    }
24148}
24149
24150/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24151///
24152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24153#[inline]
24154#[target_feature(enable = "avx512f")]
24155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24156#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24157#[rustc_legacy_const_generics(3)]
24158pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24159    unsafe {
24160        static_assert_uimm_bits!(MASK, 8);
24161        let r = _mm512_shuffle_pd::<MASK>(a, b);
24162        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24163    }
24164}
24165
24166/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24167///
24168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24169#[inline]
24170#[target_feature(enable = "avx512f,avx512vl")]
24171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24172#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24173#[rustc_legacy_const_generics(4)]
24174pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24175    src: __m256d,
24176    k: __mmask8,
24177    a: __m256d,
24178    b: __m256d,
24179) -> __m256d {
24180    unsafe {
24181        static_assert_uimm_bits!(MASK, 8);
24182        let r = _mm256_shuffle_pd::<MASK>(a, b);
24183        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24184    }
24185}
24186
24187/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24188///
24189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24190#[inline]
24191#[target_feature(enable = "avx512f,avx512vl")]
24192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24193#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24194#[rustc_legacy_const_generics(3)]
24195pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24196    unsafe {
24197        static_assert_uimm_bits!(MASK, 8);
24198        let r = _mm256_shuffle_pd::<MASK>(a, b);
24199        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24200    }
24201}
24202
24203/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24204///
24205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24206#[inline]
24207#[target_feature(enable = "avx512f,avx512vl")]
24208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24209#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24210#[rustc_legacy_const_generics(4)]
24211pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24212    src: __m128d,
24213    k: __mmask8,
24214    a: __m128d,
24215    b: __m128d,
24216) -> __m128d {
24217    unsafe {
24218        static_assert_uimm_bits!(MASK, 8);
24219        let r = _mm_shuffle_pd::<MASK>(a, b);
24220        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
24221    }
24222}
24223
24224/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24225///
24226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24227#[inline]
24228#[target_feature(enable = "avx512f,avx512vl")]
24229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24230#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24231#[rustc_legacy_const_generics(3)]
24232pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24233    unsafe {
24234        static_assert_uimm_bits!(MASK, 8);
24235        let r = _mm_shuffle_pd::<MASK>(a, b);
24236        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
24237    }
24238}
24239
24240/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24241///
24242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24243#[inline]
24244#[target_feature(enable = "avx512f")]
24245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24246#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
24247#[rustc_legacy_const_generics(2)]
24248pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24249    unsafe {
24250        static_assert_uimm_bits!(MASK, 8);
24251        let a = a.as_i32x16();
24252        let b = b.as_i32x16();
24253        let r: i32x16 = simd_shuffle!(
24254            a,
24255            b,
24256            [
24257                (MASK as u32 & 0b11) * 4 + 0,
24258                (MASK as u32 & 0b11) * 4 + 1,
24259                (MASK as u32 & 0b11) * 4 + 2,
24260                (MASK as u32 & 0b11) * 4 + 3,
24261                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24262                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24263                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24264                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24265                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24266                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24267                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24268                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24269                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24270                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24271                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24272                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24273            ],
24274        );
24275        transmute(r)
24276    }
24277}
24278
24279/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24280///
24281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24282#[inline]
24283#[target_feature(enable = "avx512f")]
24284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24285#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24286#[rustc_legacy_const_generics(4)]
24287pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24288    src: __m512i,
24289    k: __mmask16,
24290    a: __m512i,
24291    b: __m512i,
24292) -> __m512i {
24293    unsafe {
24294        static_assert_uimm_bits!(MASK, 8);
24295        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24296        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24297    }
24298}
24299
24300/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24301///
24302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24303#[inline]
24304#[target_feature(enable = "avx512f")]
24305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24306#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24307#[rustc_legacy_const_generics(3)]
24308pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24309    k: __mmask16,
24310    a: __m512i,
24311    b: __m512i,
24312) -> __m512i {
24313    unsafe {
24314        static_assert_uimm_bits!(MASK, 8);
24315        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24316        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24317    }
24318}
24319
24320/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24321///
24322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24323#[inline]
24324#[target_feature(enable = "avx512f,avx512vl")]
24325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24326#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
24327#[rustc_legacy_const_generics(2)]
24328pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24329    unsafe {
24330        static_assert_uimm_bits!(MASK, 8);
24331        let a = a.as_i32x8();
24332        let b = b.as_i32x8();
24333        let r: i32x8 = simd_shuffle!(
24334            a,
24335            b,
24336            [
24337                (MASK as u32 & 0b1) * 4 + 0,
24338                (MASK as u32 & 0b1) * 4 + 1,
24339                (MASK as u32 & 0b1) * 4 + 2,
24340                (MASK as u32 & 0b1) * 4 + 3,
24341                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24342                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24343                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24344                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24345            ],
24346        );
24347        transmute(r)
24348    }
24349}
24350
24351/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24352///
24353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24354#[inline]
24355#[target_feature(enable = "avx512f,avx512vl")]
24356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24357#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24358#[rustc_legacy_const_generics(4)]
24359pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24360    src: __m256i,
24361    k: __mmask8,
24362    a: __m256i,
24363    b: __m256i,
24364) -> __m256i {
24365    unsafe {
24366        static_assert_uimm_bits!(MASK, 8);
24367        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24368        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24369    }
24370}
24371
24372/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24373///
24374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24375#[inline]
24376#[target_feature(enable = "avx512f,avx512vl")]
24377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24378#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24379#[rustc_legacy_const_generics(3)]
24380pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24381    unsafe {
24382        static_assert_uimm_bits!(MASK, 8);
24383        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24384        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24385    }
24386}
24387
24388/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24389///
24390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24391#[inline]
24392#[target_feature(enable = "avx512f")]
24393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24394#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24395#[rustc_legacy_const_generics(2)]
24396pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24397    unsafe {
24398        static_assert_uimm_bits!(MASK, 8);
24399        let a = a.as_i64x8();
24400        let b = b.as_i64x8();
24401        let r: i64x8 = simd_shuffle!(
24402            a,
24403            b,
24404            [
24405                (MASK as u32 & 0b11) * 2 + 0,
24406                (MASK as u32 & 0b11) * 2 + 1,
24407                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24408                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24409                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24410                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24411                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24412                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24413            ],
24414        );
24415        transmute(r)
24416    }
24417}
24418
24419/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24420///
24421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24422#[inline]
24423#[target_feature(enable = "avx512f")]
24424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24425#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24426#[rustc_legacy_const_generics(4)]
24427pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24428    src: __m512i,
24429    k: __mmask8,
24430    a: __m512i,
24431    b: __m512i,
24432) -> __m512i {
24433    unsafe {
24434        static_assert_uimm_bits!(MASK, 8);
24435        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24436        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
24437    }
24438}
24439
24440/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24441///
24442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24443#[inline]
24444#[target_feature(enable = "avx512f")]
24445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24446#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24447#[rustc_legacy_const_generics(3)]
24448pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24449    unsafe {
24450        static_assert_uimm_bits!(MASK, 8);
24451        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24452        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
24453    }
24454}
24455
24456/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24457///
24458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24459#[inline]
24460#[target_feature(enable = "avx512f,avx512vl")]
24461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24462#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
24463#[rustc_legacy_const_generics(2)]
24464pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24465    unsafe {
24466        static_assert_uimm_bits!(MASK, 8);
24467        let a = a.as_i64x4();
24468        let b = b.as_i64x4();
24469        let r: i64x4 = simd_shuffle!(
24470            a,
24471            b,
24472            [
24473                (MASK as u32 & 0b1) * 2 + 0,
24474                (MASK as u32 & 0b1) * 2 + 1,
24475                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24476                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24477            ],
24478        );
24479        transmute(r)
24480    }
24481}
24482
24483/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24484///
24485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24486#[inline]
24487#[target_feature(enable = "avx512f,avx512vl")]
24488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24489#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24490#[rustc_legacy_const_generics(4)]
24491pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24492    src: __m256i,
24493    k: __mmask8,
24494    a: __m256i,
24495    b: __m256i,
24496) -> __m256i {
24497    unsafe {
24498        static_assert_uimm_bits!(MASK, 8);
24499        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24500        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24501    }
24502}
24503
24504/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24505///
24506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24507#[inline]
24508#[target_feature(enable = "avx512f,avx512vl")]
24509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24510#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24511#[rustc_legacy_const_generics(3)]
24512pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24513    unsafe {
24514        static_assert_uimm_bits!(MASK, 8);
24515        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24516        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24517    }
24518}
24519
24520/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24521///
24522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24523#[inline]
24524#[target_feature(enable = "avx512f")]
24525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24526#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
24527#[rustc_legacy_const_generics(2)]
24528pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24529    unsafe {
24530        static_assert_uimm_bits!(MASK, 8);
24531        let a = a.as_f32x16();
24532        let b = b.as_f32x16();
24533        let r: f32x16 = simd_shuffle!(
24534            a,
24535            b,
24536            [
24537                (MASK as u32 & 0b11) * 4 + 0,
24538                (MASK as u32 & 0b11) * 4 + 1,
24539                (MASK as u32 & 0b11) * 4 + 2,
24540                (MASK as u32 & 0b11) * 4 + 3,
24541                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24542                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24543                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24544                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24545                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24546                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24547                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24548                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24549                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24550                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24551                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24552                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24553            ],
24554        );
24555        transmute(r)
24556    }
24557}
24558
24559/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24560///
24561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24562#[inline]
24563#[target_feature(enable = "avx512f")]
24564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24565#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24566#[rustc_legacy_const_generics(4)]
24567pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24568    src: __m512,
24569    k: __mmask16,
24570    a: __m512,
24571    b: __m512,
24572) -> __m512 {
24573    unsafe {
24574        static_assert_uimm_bits!(MASK, 8);
24575        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24576        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24577    }
24578}
24579
24580/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24581///
24582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24583#[inline]
24584#[target_feature(enable = "avx512f")]
24585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24586#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24587#[rustc_legacy_const_generics(3)]
24588pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24589    unsafe {
24590        static_assert_uimm_bits!(MASK, 8);
24591        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24592        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24593    }
24594}
24595
24596/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24597///
24598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24599#[inline]
24600#[target_feature(enable = "avx512f,avx512vl")]
24601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24602#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
24603#[rustc_legacy_const_generics(2)]
24604pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24605    unsafe {
24606        static_assert_uimm_bits!(MASK, 8);
24607        let a = a.as_f32x8();
24608        let b = b.as_f32x8();
24609        let r: f32x8 = simd_shuffle!(
24610            a,
24611            b,
24612            [
24613                (MASK as u32 & 0b1) * 4 + 0,
24614                (MASK as u32 & 0b1) * 4 + 1,
24615                (MASK as u32 & 0b1) * 4 + 2,
24616                (MASK as u32 & 0b1) * 4 + 3,
24617                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24618                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24619                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24620                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24621            ],
24622        );
24623        transmute(r)
24624    }
24625}
24626
24627/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24628///
24629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24630#[inline]
24631#[target_feature(enable = "avx512f,avx512vl")]
24632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24633#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24634#[rustc_legacy_const_generics(4)]
24635pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24636    src: __m256,
24637    k: __mmask8,
24638    a: __m256,
24639    b: __m256,
24640) -> __m256 {
24641    unsafe {
24642        static_assert_uimm_bits!(MASK, 8);
24643        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24644        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24645    }
24646}
24647
24648/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24649///
24650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24651#[inline]
24652#[target_feature(enable = "avx512f,avx512vl")]
24653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24654#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24655#[rustc_legacy_const_generics(3)]
24656pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24657    unsafe {
24658        static_assert_uimm_bits!(MASK, 8);
24659        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24660        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24661    }
24662}
24663
24664/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24665///
24666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24667#[inline]
24668#[target_feature(enable = "avx512f")]
24669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24670#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24671#[rustc_legacy_const_generics(2)]
24672pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24673    unsafe {
24674        static_assert_uimm_bits!(MASK, 8);
24675        let a = a.as_f64x8();
24676        let b = b.as_f64x8();
24677        let r: f64x8 = simd_shuffle!(
24678            a,
24679            b,
24680            [
24681                (MASK as u32 & 0b11) * 2 + 0,
24682                (MASK as u32 & 0b11) * 2 + 1,
24683                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24684                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24685                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24686                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24687                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24688                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24689            ],
24690        );
24691        transmute(r)
24692    }
24693}
24694
24695/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24696///
24697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24698#[inline]
24699#[target_feature(enable = "avx512f")]
24700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24701#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24702#[rustc_legacy_const_generics(4)]
24703pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24704    src: __m512d,
24705    k: __mmask8,
24706    a: __m512d,
24707    b: __m512d,
24708) -> __m512d {
24709    unsafe {
24710        static_assert_uimm_bits!(MASK, 8);
24711        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24712        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24713    }
24714}
24715
24716/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24717///
24718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24719#[inline]
24720#[target_feature(enable = "avx512f")]
24721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24722#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24723#[rustc_legacy_const_generics(3)]
24724pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24725    unsafe {
24726        static_assert_uimm_bits!(MASK, 8);
24727        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24728        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24729    }
24730}
24731
24732/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24733///
24734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24735#[inline]
24736#[target_feature(enable = "avx512f,avx512vl")]
24737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24738#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
24739#[rustc_legacy_const_generics(2)]
24740pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24741    unsafe {
24742        static_assert_uimm_bits!(MASK, 8);
24743        let a = a.as_f64x4();
24744        let b = b.as_f64x4();
24745        let r: f64x4 = simd_shuffle!(
24746            a,
24747            b,
24748            [
24749                (MASK as u32 & 0b1) * 2 + 0,
24750                (MASK as u32 & 0b1) * 2 + 1,
24751                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24752                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24753            ],
24754        );
24755        transmute(r)
24756    }
24757}
24758
24759/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24760///
24761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24762#[inline]
24763#[target_feature(enable = "avx512f,avx512vl")]
24764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24765#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24766#[rustc_legacy_const_generics(4)]
24767pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24768    src: __m256d,
24769    k: __mmask8,
24770    a: __m256d,
24771    b: __m256d,
24772) -> __m256d {
24773    unsafe {
24774        static_assert_uimm_bits!(MASK, 8);
24775        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24776        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24777    }
24778}
24779
24780/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24781///
24782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24783#[inline]
24784#[target_feature(enable = "avx512f,avx512vl")]
24785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24786#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24787#[rustc_legacy_const_generics(3)]
24788pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24789    unsafe {
24790        static_assert_uimm_bits!(MASK, 8);
24791        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24792        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24793    }
24794}
24795
24796/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24797///
24798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24799#[inline]
24800#[target_feature(enable = "avx512f")]
24801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24802#[cfg_attr(
24803    all(test, not(target_env = "msvc")),
24804    assert_instr(vextractf32x4, IMM8 = 3)
24805)]
24806#[rustc_legacy_const_generics(1)]
24807pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24808    unsafe {
24809        static_assert_uimm_bits!(IMM8, 2);
24810        match IMM8 & 0x3 {
24811            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
24812            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
24813            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
24814            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
24815        }
24816    }
24817}
24818
24819/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24820///
24821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24822#[inline]
24823#[target_feature(enable = "avx512f")]
24824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24825#[cfg_attr(
24826    all(test, not(target_env = "msvc")),
24827    assert_instr(vextractf32x4, IMM8 = 3)
24828)]
24829#[rustc_legacy_const_generics(3)]
24830pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24831    unsafe {
24832        static_assert_uimm_bits!(IMM8, 2);
24833        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24834        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24835    }
24836}
24837
24838/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24839///
24840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24841#[inline]
24842#[target_feature(enable = "avx512f")]
24843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24844#[cfg_attr(
24845    all(test, not(target_env = "msvc")),
24846    assert_instr(vextractf32x4, IMM8 = 3)
24847)]
24848#[rustc_legacy_const_generics(2)]
24849pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24850    unsafe {
24851        static_assert_uimm_bits!(IMM8, 2);
24852        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24853        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24854    }
24855}
24856
24857/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24858///
24859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24860#[inline]
24861#[target_feature(enable = "avx512f,avx512vl")]
24862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24863#[cfg_attr(
24864    all(test, not(target_env = "msvc")),
24865    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
24866)]
24867#[rustc_legacy_const_generics(1)]
24868pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24869    unsafe {
24870        static_assert_uimm_bits!(IMM8, 1);
24871        match IMM8 & 0x1 {
24872            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
24873            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
24874        }
24875    }
24876}
24877
24878/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24879///
24880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24881#[inline]
24882#[target_feature(enable = "avx512f,avx512vl")]
24883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24884#[cfg_attr(
24885    all(test, not(target_env = "msvc")),
24886    assert_instr(vextractf32x4, IMM8 = 1)
24887)]
24888#[rustc_legacy_const_generics(3)]
24889pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24890    unsafe {
24891        static_assert_uimm_bits!(IMM8, 1);
24892        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24893        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24894    }
24895}
24896
24897/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24898///
24899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24900#[inline]
24901#[target_feature(enable = "avx512f,avx512vl")]
24902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24903#[cfg_attr(
24904    all(test, not(target_env = "msvc")),
24905    assert_instr(vextractf32x4, IMM8 = 1)
24906)]
24907#[rustc_legacy_const_generics(2)]
24908pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24909    unsafe {
24910        static_assert_uimm_bits!(IMM8, 1);
24911        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24912        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24913    }
24914}
24915
24916/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24917///
24918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24919#[inline]
24920#[target_feature(enable = "avx512f")]
24921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24922#[cfg_attr(
24923    all(test, not(target_env = "msvc")),
24924    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
24925)]
24926#[rustc_legacy_const_generics(1)]
24927pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24928    unsafe {
24929        static_assert_uimm_bits!(IMM1, 1);
24930        match IMM1 {
24931            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
24932            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
24933        }
24934    }
24935}
24936
24937/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24938///
24939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24940#[inline]
24941#[target_feature(enable = "avx512f")]
24942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24943#[cfg_attr(
24944    all(test, not(target_env = "msvc")),
24945    assert_instr(vextracti64x4, IMM1 = 1)
24946)]
24947#[rustc_legacy_const_generics(3)]
24948pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
24949    src: __m256i,
24950    k: __mmask8,
24951    a: __m512i,
24952) -> __m256i {
24953    unsafe {
24954        static_assert_uimm_bits!(IMM1, 1);
24955        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
24956        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24957    }
24958}
24959
24960/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24961///
24962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
24963#[inline]
24964#[target_feature(enable = "avx512f")]
24965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24966#[cfg_attr(
24967    all(test, not(target_env = "msvc")),
24968    assert_instr(vextracti64x4, IMM1 = 1)
24969)]
24970#[rustc_legacy_const_generics(2)]
24971pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
24972    unsafe {
24973        static_assert_uimm_bits!(IMM1, 1);
24974        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
24975        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24976    }
24977}
24978
24979/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24980///
24981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
24982#[inline]
24983#[target_feature(enable = "avx512f")]
24984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24985#[cfg_attr(
24986    all(test, not(target_env = "msvc")),
24987    assert_instr(vextractf64x4, IMM8 = 1)
24988)]
24989#[rustc_legacy_const_generics(1)]
24990pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
24991    unsafe {
24992        static_assert_uimm_bits!(IMM8, 1);
24993        match IMM8 & 0x1 {
24994            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
24995            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
24996        }
24997    }
24998}
24999
25000/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25001///
25002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25003#[inline]
25004#[target_feature(enable = "avx512f")]
25005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25006#[cfg_attr(
25007    all(test, not(target_env = "msvc")),
25008    assert_instr(vextractf64x4, IMM8 = 1)
25009)]
25010#[rustc_legacy_const_generics(3)]
25011pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25012    src: __m256d,
25013    k: __mmask8,
25014    a: __m512d,
25015) -> __m256d {
25016    unsafe {
25017        static_assert_uimm_bits!(IMM8, 1);
25018        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25019        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25020    }
25021}
25022
25023/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25024///
25025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25026#[inline]
25027#[target_feature(enable = "avx512f")]
25028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25029#[cfg_attr(
25030    all(test, not(target_env = "msvc")),
25031    assert_instr(vextractf64x4, IMM8 = 1)
25032)]
25033#[rustc_legacy_const_generics(2)]
25034pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25035    unsafe {
25036        static_assert_uimm_bits!(IMM8, 1);
25037        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25038        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25039    }
25040}
25041
25042/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25043///
25044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25045#[inline]
25046#[target_feature(enable = "avx512f")]
25047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25048#[cfg_attr(
25049    all(test, not(target_env = "msvc")),
25050    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25051)]
25052#[rustc_legacy_const_generics(1)]
25053pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25054    unsafe {
25055        static_assert_uimm_bits!(IMM2, 2);
25056        let a = a.as_i32x16();
25057        let zero = i32x16::ZERO;
25058        let extract: i32x4 = match IMM2 {
25059            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25060            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25061            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25062            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25063        };
25064        transmute(extract)
25065    }
25066}
25067
25068/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25069///
25070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25071#[inline]
25072#[target_feature(enable = "avx512f")]
25073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25074#[cfg_attr(
25075    all(test, not(target_env = "msvc")),
25076    assert_instr(vextracti32x4, IMM2 = 3)
25077)]
25078#[rustc_legacy_const_generics(3)]
25079pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25080    src: __m128i,
25081    k: __mmask8,
25082    a: __m512i,
25083) -> __m128i {
25084    unsafe {
25085        static_assert_uimm_bits!(IMM2, 2);
25086        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25087        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25088    }
25089}
25090
25091/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25092///
25093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25094#[inline]
25095#[target_feature(enable = "avx512f")]
25096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25097#[cfg_attr(
25098    all(test, not(target_env = "msvc")),
25099    assert_instr(vextracti32x4, IMM2 = 3)
25100)]
25101#[rustc_legacy_const_generics(2)]
25102pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25103    unsafe {
25104        static_assert_uimm_bits!(IMM2, 2);
25105        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25106        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25107    }
25108}
25109
25110/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25111///
25112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25113#[inline]
25114#[target_feature(enable = "avx512f,avx512vl")]
25115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25116#[cfg_attr(
25117    all(test, not(target_env = "msvc")),
25118    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
25119)]
25120#[rustc_legacy_const_generics(1)]
25121pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25122    unsafe {
25123        static_assert_uimm_bits!(IMM1, 1);
25124        let a = a.as_i32x8();
25125        let zero = i32x8::ZERO;
25126        let extract: i32x4 = match IMM1 {
25127            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25128            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25129        };
25130        transmute(extract)
25131    }
25132}
25133
25134/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25135///
25136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25137#[inline]
25138#[target_feature(enable = "avx512f,avx512vl")]
25139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25140#[cfg_attr(
25141    all(test, not(target_env = "msvc")),
25142    assert_instr(vextracti32x4, IMM1 = 1)
25143)]
25144#[rustc_legacy_const_generics(3)]
25145pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25146    src: __m128i,
25147    k: __mmask8,
25148    a: __m256i,
25149) -> __m128i {
25150    unsafe {
25151        static_assert_uimm_bits!(IMM1, 1);
25152        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25153        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25154    }
25155}
25156
25157/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25158///
25159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25160#[inline]
25161#[target_feature(enable = "avx512f,avx512vl")]
25162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25163#[cfg_attr(
25164    all(test, not(target_env = "msvc")),
25165    assert_instr(vextracti32x4, IMM1 = 1)
25166)]
25167#[rustc_legacy_const_generics(2)]
25168pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25169    unsafe {
25170        static_assert_uimm_bits!(IMM1, 1);
25171        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25172        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25173    }
25174}
25175
25176/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25177///
25178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25179#[inline]
25180#[target_feature(enable = "avx512f")]
25181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25182#[cfg_attr(test, assert_instr(vmovsldup))]
25183pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25184    unsafe {
25185        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25186        transmute(r)
25187    }
25188}
25189
25190/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25191///
25192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25193#[inline]
25194#[target_feature(enable = "avx512f")]
25195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25196#[cfg_attr(test, assert_instr(vmovsldup))]
25197pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25198    unsafe {
25199        let mov: f32x16 =
25200            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25201        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25202    }
25203}
25204
25205/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25206///
25207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25208#[inline]
25209#[target_feature(enable = "avx512f")]
25210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25211#[cfg_attr(test, assert_instr(vmovsldup))]
25212pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25213    unsafe {
25214        let mov: f32x16 =
25215            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25216        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25217    }
25218}
25219
25220/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25221///
25222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25223#[inline]
25224#[target_feature(enable = "avx512f,avx512vl")]
25225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25226#[cfg_attr(test, assert_instr(vmovsldup))]
25227pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25228    unsafe {
25229        let mov = _mm256_moveldup_ps(a);
25230        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25231    }
25232}
25233
25234/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25235///
25236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25237#[inline]
25238#[target_feature(enable = "avx512f,avx512vl")]
25239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25240#[cfg_attr(test, assert_instr(vmovsldup))]
25241pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25242    unsafe {
25243        let mov = _mm256_moveldup_ps(a);
25244        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25245    }
25246}
25247
25248/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25251#[inline]
25252#[target_feature(enable = "avx512f,avx512vl")]
25253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25254#[cfg_attr(test, assert_instr(vmovsldup))]
25255pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25256    unsafe {
25257        let mov = _mm_moveldup_ps(a);
25258        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25259    }
25260}
25261
25262/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25263///
25264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25265#[inline]
25266#[target_feature(enable = "avx512f,avx512vl")]
25267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25268#[cfg_attr(test, assert_instr(vmovsldup))]
25269pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25270    unsafe {
25271        let mov = _mm_moveldup_ps(a);
25272        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25273    }
25274}
25275
25276/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25277///
25278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25279#[inline]
25280#[target_feature(enable = "avx512f")]
25281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25282#[cfg_attr(test, assert_instr(vmovshdup))]
25283pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25284    unsafe {
25285        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25286        transmute(r)
25287    }
25288}
25289
25290/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25291///
25292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25293#[inline]
25294#[target_feature(enable = "avx512f")]
25295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25296#[cfg_attr(test, assert_instr(vmovshdup))]
25297pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25298    unsafe {
25299        let mov: f32x16 =
25300            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25301        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25302    }
25303}
25304
25305/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25306///
25307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25308#[inline]
25309#[target_feature(enable = "avx512f")]
25310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25311#[cfg_attr(test, assert_instr(vmovshdup))]
25312pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25313    unsafe {
25314        let mov: f32x16 =
25315            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25316        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25317    }
25318}
25319
25320/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25321///
25322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25323#[inline]
25324#[target_feature(enable = "avx512f,avx512vl")]
25325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25326#[cfg_attr(test, assert_instr(vmovshdup))]
25327pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25328    unsafe {
25329        let mov = _mm256_movehdup_ps(a);
25330        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25331    }
25332}
25333
25334/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25335///
25336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25337#[inline]
25338#[target_feature(enable = "avx512f,avx512vl")]
25339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25340#[cfg_attr(test, assert_instr(vmovshdup))]
25341pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25342    unsafe {
25343        let mov = _mm256_movehdup_ps(a);
25344        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25345    }
25346}
25347
25348/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25349///
25350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25351#[inline]
25352#[target_feature(enable = "avx512f,avx512vl")]
25353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25354#[cfg_attr(test, assert_instr(vmovshdup))]
25355pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25356    unsafe {
25357        let mov = _mm_movehdup_ps(a);
25358        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25359    }
25360}
25361
25362/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25363///
25364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25365#[inline]
25366#[target_feature(enable = "avx512f,avx512vl")]
25367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25368#[cfg_attr(test, assert_instr(vmovshdup))]
25369pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25370    unsafe {
25371        let mov = _mm_movehdup_ps(a);
25372        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25373    }
25374}
25375
25376/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25377///
25378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25379#[inline]
25380#[target_feature(enable = "avx512f")]
25381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25382#[cfg_attr(test, assert_instr(vmovddup))]
25383pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25384    unsafe {
25385        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25386        transmute(r)
25387    }
25388}
25389
25390/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25391///
25392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25393#[inline]
25394#[target_feature(enable = "avx512f")]
25395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25396#[cfg_attr(test, assert_instr(vmovddup))]
25397pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25398    unsafe {
25399        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25400        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
25401    }
25402}
25403
25404/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25405///
25406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25407#[inline]
25408#[target_feature(enable = "avx512f")]
25409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25410#[cfg_attr(test, assert_instr(vmovddup))]
25411pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25412    unsafe {
25413        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25414        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
25415    }
25416}
25417
25418/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25419///
25420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25421#[inline]
25422#[target_feature(enable = "avx512f,avx512vl")]
25423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25424#[cfg_attr(test, assert_instr(vmovddup))]
25425pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25426    unsafe {
25427        let mov = _mm256_movedup_pd(a);
25428        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
25429    }
25430}
25431
25432/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25433///
25434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25435#[inline]
25436#[target_feature(enable = "avx512f,avx512vl")]
25437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25438#[cfg_attr(test, assert_instr(vmovddup))]
25439pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25440    unsafe {
25441        let mov = _mm256_movedup_pd(a);
25442        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
25443    }
25444}
25445
25446/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25447///
25448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25449#[inline]
25450#[target_feature(enable = "avx512f,avx512vl")]
25451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25452#[cfg_attr(test, assert_instr(vmovddup))]
25453pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25454    unsafe {
25455        let mov = _mm_movedup_pd(a);
25456        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
25457    }
25458}
25459
25460/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25461///
25462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25463#[inline]
25464#[target_feature(enable = "avx512f,avx512vl")]
25465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25466#[cfg_attr(test, assert_instr(vmovddup))]
25467pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25468    unsafe {
25469        let mov = _mm_movedup_pd(a);
25470        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
25471    }
25472}
25473
25474/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25475///
25476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25477#[inline]
25478#[target_feature(enable = "avx512f")]
25479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25480#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
25481#[rustc_legacy_const_generics(2)]
25482pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25483    unsafe {
25484        static_assert_uimm_bits!(IMM8, 2);
25485        let a = a.as_i32x16();
25486        let b = _mm512_castsi128_si512(b).as_i32x16();
25487        let ret: i32x16 = match IMM8 & 0b11 {
25488            0 => {
25489                simd_shuffle!(
25490                    a,
25491                    b,
25492                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25493                )
25494            }
25495            1 => {
25496                simd_shuffle!(
25497                    a,
25498                    b,
25499                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25500                )
25501            }
25502            2 => {
25503                simd_shuffle!(
25504                    a,
25505                    b,
25506                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25507                )
25508            }
25509            _ => {
25510                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25511            }
25512        };
25513        transmute(ret)
25514    }
25515}
25516
25517/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25518///
25519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25520#[inline]
25521#[target_feature(enable = "avx512f")]
25522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25523#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25524#[rustc_legacy_const_generics(4)]
25525pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25526    src: __m512i,
25527    k: __mmask16,
25528    a: __m512i,
25529    b: __m128i,
25530) -> __m512i {
25531    unsafe {
25532        static_assert_uimm_bits!(IMM8, 2);
25533        let r = _mm512_inserti32x4::<IMM8>(a, b);
25534        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25535    }
25536}
25537
25538/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25539///
25540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25541#[inline]
25542#[target_feature(enable = "avx512f")]
25543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25544#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25545#[rustc_legacy_const_generics(3)]
25546pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25547    unsafe {
25548        static_assert_uimm_bits!(IMM8, 2);
25549        let r = _mm512_inserti32x4::<IMM8>(a, b);
25550        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25551    }
25552}
25553
25554/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25555///
25556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25557#[inline]
25558#[target_feature(enable = "avx512f,avx512vl")]
25559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25560#[cfg_attr(
25561    all(test, not(target_env = "msvc")),
25562    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
25563)]
25564#[rustc_legacy_const_generics(2)]
25565pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25566    unsafe {
25567        static_assert_uimm_bits!(IMM8, 1);
25568        let a = a.as_i32x8();
25569        let b = _mm256_castsi128_si256(b).as_i32x8();
25570        let ret: i32x8 = match IMM8 & 0b1 {
25571            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25572            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25573        };
25574        transmute(ret)
25575    }
25576}
25577
25578/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25579///
25580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25581#[inline]
25582#[target_feature(enable = "avx512f,avx512vl")]
25583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25584#[cfg_attr(
25585    all(test, not(target_env = "msvc")),
25586    assert_instr(vinserti32x4, IMM8 = 1)
25587)]
25588#[rustc_legacy_const_generics(4)]
25589pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25590    src: __m256i,
25591    k: __mmask8,
25592    a: __m256i,
25593    b: __m128i,
25594) -> __m256i {
25595    unsafe {
25596        static_assert_uimm_bits!(IMM8, 1);
25597        let r = _mm256_inserti32x4::<IMM8>(a, b);
25598        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25599    }
25600}
25601
25602/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25603///
25604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25605#[inline]
25606#[target_feature(enable = "avx512f,avx512vl")]
25607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25608#[cfg_attr(
25609    all(test, not(target_env = "msvc")),
25610    assert_instr(vinserti32x4, IMM8 = 1)
25611)]
25612#[rustc_legacy_const_generics(3)]
25613pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25614    unsafe {
25615        static_assert_uimm_bits!(IMM8, 1);
25616        let r = _mm256_inserti32x4::<IMM8>(a, b);
25617        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25618    }
25619}
25620
25621/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25622///
25623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25624#[inline]
25625#[target_feature(enable = "avx512f")]
25626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25627#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
25628#[rustc_legacy_const_generics(2)]
25629pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25630    unsafe {
25631        static_assert_uimm_bits!(IMM8, 1);
25632        let b = _mm512_castsi256_si512(b);
25633        match IMM8 & 0b1 {
25634            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25635            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25636        }
25637    }
25638}
25639
25640/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25641///
25642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25643#[inline]
25644#[target_feature(enable = "avx512f")]
25645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25646#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25647#[rustc_legacy_const_generics(4)]
25648pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25649    src: __m512i,
25650    k: __mmask8,
25651    a: __m512i,
25652    b: __m256i,
25653) -> __m512i {
25654    unsafe {
25655        static_assert_uimm_bits!(IMM8, 1);
25656        let r = _mm512_inserti64x4::<IMM8>(a, b);
25657        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25658    }
25659}
25660
25661/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25662///
25663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25664#[inline]
25665#[target_feature(enable = "avx512f")]
25666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25667#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25668#[rustc_legacy_const_generics(3)]
25669pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25670    unsafe {
25671        static_assert_uimm_bits!(IMM8, 1);
25672        let r = _mm512_inserti64x4::<IMM8>(a, b);
25673        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25674    }
25675}
25676
25677/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25678///
25679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25680#[inline]
25681#[target_feature(enable = "avx512f")]
25682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25683#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25684#[rustc_legacy_const_generics(2)]
25685pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25686    unsafe {
25687        static_assert_uimm_bits!(IMM8, 2);
25688        let b = _mm512_castps128_ps512(b);
25689        match IMM8 & 0b11 {
25690            0 => {
25691                simd_shuffle!(
25692                    a,
25693                    b,
25694                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25695                )
25696            }
25697            1 => {
25698                simd_shuffle!(
25699                    a,
25700                    b,
25701                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25702                )
25703            }
25704            2 => {
25705                simd_shuffle!(
25706                    a,
25707                    b,
25708                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25709                )
25710            }
25711            _ => {
25712                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25713            }
25714        }
25715    }
25716}
25717
25718/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25719///
25720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25721#[inline]
25722#[target_feature(enable = "avx512f")]
25723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25724#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25725#[rustc_legacy_const_generics(4)]
25726pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25727    src: __m512,
25728    k: __mmask16,
25729    a: __m512,
25730    b: __m128,
25731) -> __m512 {
25732    unsafe {
25733        static_assert_uimm_bits!(IMM8, 2);
25734        let r = _mm512_insertf32x4::<IMM8>(a, b);
25735        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25736    }
25737}
25738
25739/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25740///
25741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25742#[inline]
25743#[target_feature(enable = "avx512f")]
25744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25745#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25746#[rustc_legacy_const_generics(3)]
25747pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25748    unsafe {
25749        static_assert_uimm_bits!(IMM8, 2);
25750        let r = _mm512_insertf32x4::<IMM8>(a, b);
25751        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25752    }
25753}
25754
25755/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25756///
25757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25758#[inline]
25759#[target_feature(enable = "avx512f,avx512vl")]
25760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25761#[cfg_attr(
25762    all(test, not(target_env = "msvc")),
25763    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
25764)]
25765#[rustc_legacy_const_generics(2)]
25766pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25767    unsafe {
25768        static_assert_uimm_bits!(IMM8, 1);
25769        let b = _mm256_castps128_ps256(b);
25770        match IMM8 & 0b1 {
25771            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25772            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25773        }
25774    }
25775}
25776
25777/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25778///
25779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25780#[inline]
25781#[target_feature(enable = "avx512f,avx512vl")]
25782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25783#[cfg_attr(
25784    all(test, not(target_env = "msvc")),
25785    assert_instr(vinsertf32x4, IMM8 = 1)
25786)]
25787#[rustc_legacy_const_generics(4)]
25788pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25789    src: __m256,
25790    k: __mmask8,
25791    a: __m256,
25792    b: __m128,
25793) -> __m256 {
25794    unsafe {
25795        static_assert_uimm_bits!(IMM8, 1);
25796        let r = _mm256_insertf32x4::<IMM8>(a, b);
25797        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25798    }
25799}
25800
25801/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25802///
25803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25804#[inline]
25805#[target_feature(enable = "avx512f,avx512vl")]
25806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25807#[cfg_attr(
25808    all(test, not(target_env = "msvc")),
25809    assert_instr(vinsertf32x4, IMM8 = 1)
25810)]
25811#[rustc_legacy_const_generics(3)]
25812pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25813    unsafe {
25814        static_assert_uimm_bits!(IMM8, 1);
25815        let r = _mm256_insertf32x4::<IMM8>(a, b);
25816        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25817    }
25818}
25819
25820/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25821///
25822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25823#[inline]
25824#[target_feature(enable = "avx512f")]
25825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25826#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25827#[rustc_legacy_const_generics(2)]
25828pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25829    unsafe {
25830        static_assert_uimm_bits!(IMM8, 1);
25831        let b = _mm512_castpd256_pd512(b);
25832        match IMM8 & 0b1 {
25833            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25834            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25835        }
25836    }
25837}
25838
25839/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25840///
25841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25842#[inline]
25843#[target_feature(enable = "avx512f")]
25844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25845#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25846#[rustc_legacy_const_generics(4)]
25847pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25848    src: __m512d,
25849    k: __mmask8,
25850    a: __m512d,
25851    b: __m256d,
25852) -> __m512d {
25853    unsafe {
25854        static_assert_uimm_bits!(IMM8, 1);
25855        let r = _mm512_insertf64x4::<IMM8>(a, b);
25856        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25857    }
25858}
25859
25860/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25861///
25862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25863#[inline]
25864#[target_feature(enable = "avx512f")]
25865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25866#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25867#[rustc_legacy_const_generics(3)]
25868pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25869    unsafe {
25870        static_assert_uimm_bits!(IMM8, 1);
25871        let r = _mm512_insertf64x4::<IMM8>(a, b);
25872        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25873    }
25874}
25875
25876/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25877///
25878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25879#[inline]
25880#[target_feature(enable = "avx512f")]
25881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25882#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25883pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25884    unsafe {
25885        let a = a.as_i32x16();
25886        let b = b.as_i32x16();
25887        #[rustfmt::skip]
25888        let r: i32x16 = simd_shuffle!(
25889            a, b,
25890            [ 2, 18, 3, 19,
25891              2 + 4, 18 + 4, 3 + 4, 19 + 4,
25892              2 + 8, 18 + 8, 3 + 8, 19 + 8,
25893              2 + 12, 18 + 12, 3 + 12, 19 + 12],
25894        );
25895        transmute(r)
25896    }
25897}
25898
25899/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25900///
25901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25902#[inline]
25903#[target_feature(enable = "avx512f")]
25904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25905#[cfg_attr(test, assert_instr(vpunpckhdq))]
25906pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25907    unsafe {
25908        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25909        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
25910    }
25911}
25912
25913/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25914///
25915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25916#[inline]
25917#[target_feature(enable = "avx512f")]
25918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25919#[cfg_attr(test, assert_instr(vpunpckhdq))]
25920pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25921    unsafe {
25922        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25923        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
25924    }
25925}
25926
25927/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25928///
25929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25930#[inline]
25931#[target_feature(enable = "avx512f,avx512vl")]
25932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25933#[cfg_attr(test, assert_instr(vpunpckhdq))]
25934pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25935    unsafe {
25936        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25937        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
25938    }
25939}
25940
25941/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25942///
25943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25944#[inline]
25945#[target_feature(enable = "avx512f,avx512vl")]
25946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25947#[cfg_attr(test, assert_instr(vpunpckhdq))]
25948pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25949    unsafe {
25950        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25951        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
25952    }
25953}
25954
25955/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25956///
25957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25958#[inline]
25959#[target_feature(enable = "avx512f,avx512vl")]
25960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25961#[cfg_attr(test, assert_instr(vpunpckhdq))]
25962pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25963    unsafe {
25964        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25965        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
25966    }
25967}
25968
25969/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25970///
25971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25972#[inline]
25973#[target_feature(enable = "avx512f,avx512vl")]
25974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25975#[cfg_attr(test, assert_instr(vpunpckhdq))]
25976pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25977    unsafe {
25978        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25979        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
25980    }
25981}
25982
25983/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25984///
25985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
25986#[inline]
25987#[target_feature(enable = "avx512f")]
25988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25989#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
25990pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
25991    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
25992}
25993
25994/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25995///
25996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
25997#[inline]
25998#[target_feature(enable = "avx512f")]
25999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26000#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26001pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26002    unsafe {
26003        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26004        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
26005    }
26006}
26007
26008/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26009///
26010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26011#[inline]
26012#[target_feature(enable = "avx512f")]
26013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26014#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26015pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26016    unsafe {
26017        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26018        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
26019    }
26020}
26021
26022/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26023///
26024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
26025#[inline]
26026#[target_feature(enable = "avx512f,avx512vl")]
26027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26028#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26029pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26030    unsafe {
26031        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26032        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
26033    }
26034}
26035
26036/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26037///
26038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
26039#[inline]
26040#[target_feature(enable = "avx512f,avx512vl")]
26041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26042#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26043pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26044    unsafe {
26045        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26046        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
26047    }
26048}
26049
26050/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26051///
26052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26053#[inline]
26054#[target_feature(enable = "avx512f,avx512vl")]
26055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26056#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26057pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26058    unsafe {
26059        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26060        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
26061    }
26062}
26063
26064/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26065///
26066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26067#[inline]
26068#[target_feature(enable = "avx512f,avx512vl")]
26069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26070#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26071pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26072    unsafe {
26073        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26074        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
26075    }
26076}
26077
26078/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26079///
26080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26081#[inline]
26082#[target_feature(enable = "avx512f")]
26083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26084#[cfg_attr(test, assert_instr(vunpckhps))]
26085pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26086    unsafe {
26087        #[rustfmt::skip]
26088        simd_shuffle!(
26089            a, b,
26090            [ 2, 18, 3, 19,
26091              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26092              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26093              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26094        )
26095    }
26096}
26097
26098/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26104#[cfg_attr(test, assert_instr(vunpckhps))]
26105pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26106    unsafe {
26107        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26108        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
26109    }
26110}
26111
26112/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26113///
26114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26115#[inline]
26116#[target_feature(enable = "avx512f")]
26117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26118#[cfg_attr(test, assert_instr(vunpckhps))]
26119pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26120    unsafe {
26121        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26122        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
26123    }
26124}
26125
26126/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26127///
26128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26129#[inline]
26130#[target_feature(enable = "avx512f,avx512vl")]
26131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26132#[cfg_attr(test, assert_instr(vunpckhps))]
26133pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26134    unsafe {
26135        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26136        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
26137    }
26138}
26139
26140/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26141///
26142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26143#[inline]
26144#[target_feature(enable = "avx512f,avx512vl")]
26145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26146#[cfg_attr(test, assert_instr(vunpckhps))]
26147pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26148    unsafe {
26149        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26150        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
26151    }
26152}
26153
26154/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26155///
26156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26157#[inline]
26158#[target_feature(enable = "avx512f,avx512vl")]
26159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26160#[cfg_attr(test, assert_instr(vunpckhps))]
26161pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26162    unsafe {
26163        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26164        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
26165    }
26166}
26167
26168/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26169///
26170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26171#[inline]
26172#[target_feature(enable = "avx512f,avx512vl")]
26173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26174#[cfg_attr(test, assert_instr(vunpckhps))]
26175pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26176    unsafe {
26177        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26178        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
26179    }
26180}
26181
26182/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26183///
26184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26185#[inline]
26186#[target_feature(enable = "avx512f")]
26187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26188#[cfg_attr(test, assert_instr(vunpckhpd))]
26189pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26190    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26191}
26192
26193/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26194///
26195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26196#[inline]
26197#[target_feature(enable = "avx512f")]
26198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26199#[cfg_attr(test, assert_instr(vunpckhpd))]
26200pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26201    unsafe {
26202        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26203        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
26204    }
26205}
26206
26207/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26208///
26209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26210#[inline]
26211#[target_feature(enable = "avx512f")]
26212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26213#[cfg_attr(test, assert_instr(vunpckhpd))]
26214pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26215    unsafe {
26216        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26217        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
26218    }
26219}
26220
26221/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26222///
26223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26224#[inline]
26225#[target_feature(enable = "avx512f,avx512vl")]
26226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26227#[cfg_attr(test, assert_instr(vunpckhpd))]
26228pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26229    unsafe {
26230        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26231        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
26232    }
26233}
26234
26235/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26236///
26237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26238#[inline]
26239#[target_feature(enable = "avx512f,avx512vl")]
26240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26241#[cfg_attr(test, assert_instr(vunpckhpd))]
26242pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26243    unsafe {
26244        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26245        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
26246    }
26247}
26248
26249/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26250///
26251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26252#[inline]
26253#[target_feature(enable = "avx512f,avx512vl")]
26254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26255#[cfg_attr(test, assert_instr(vunpckhpd))]
26256pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26257    unsafe {
26258        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26259        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
26260    }
26261}
26262
26263/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26264///
26265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26266#[inline]
26267#[target_feature(enable = "avx512f,avx512vl")]
26268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26269#[cfg_attr(test, assert_instr(vunpckhpd))]
26270pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26271    unsafe {
26272        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26273        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
26274    }
26275}
26276
26277/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26278///
26279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26280#[inline]
26281#[target_feature(enable = "avx512f")]
26282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26283#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26284pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26285    unsafe {
26286        let a = a.as_i32x16();
26287        let b = b.as_i32x16();
26288        #[rustfmt::skip]
26289        let r: i32x16 = simd_shuffle!(
26290            a, b,
26291            [ 0, 16, 1, 17,
26292              0 + 4, 16 + 4, 1 + 4, 17 + 4,
26293              0 + 8, 16 + 8, 1 + 8, 17 + 8,
26294              0 + 12, 16 + 12, 1 + 12, 17 + 12],
26295        );
26296        transmute(r)
26297    }
26298}
26299
26300/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26301///
26302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26303#[inline]
26304#[target_feature(enable = "avx512f")]
26305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26306#[cfg_attr(test, assert_instr(vpunpckldq))]
26307pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26308    unsafe {
26309        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26310        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
26311    }
26312}
26313
26314/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26315///
26316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26317#[inline]
26318#[target_feature(enable = "avx512f")]
26319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26320#[cfg_attr(test, assert_instr(vpunpckldq))]
26321pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26322    unsafe {
26323        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26324        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
26325    }
26326}
26327
26328/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26329///
26330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26331#[inline]
26332#[target_feature(enable = "avx512f,avx512vl")]
26333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26334#[cfg_attr(test, assert_instr(vpunpckldq))]
26335pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26336    unsafe {
26337        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26338        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
26339    }
26340}
26341
26342/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26343///
26344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26345#[inline]
26346#[target_feature(enable = "avx512f,avx512vl")]
26347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26348#[cfg_attr(test, assert_instr(vpunpckldq))]
26349pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26350    unsafe {
26351        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26352        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
26353    }
26354}
26355
26356/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26357///
26358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26359#[inline]
26360#[target_feature(enable = "avx512f,avx512vl")]
26361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26362#[cfg_attr(test, assert_instr(vpunpckldq))]
26363pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26364    unsafe {
26365        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26366        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
26367    }
26368}
26369
26370/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26371///
26372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26373#[inline]
26374#[target_feature(enable = "avx512f,avx512vl")]
26375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26376#[cfg_attr(test, assert_instr(vpunpckldq))]
26377pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26378    unsafe {
26379        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26380        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
26381    }
26382}
26383
26384/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26385///
26386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26387#[inline]
26388#[target_feature(enable = "avx512f")]
26389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26390#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26391pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26392    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26393}
26394
26395/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26396///
26397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26398#[inline]
26399#[target_feature(enable = "avx512f")]
26400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26401#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26402pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26403    unsafe {
26404        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26405        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
26406    }
26407}
26408
26409/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26410///
26411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26412#[inline]
26413#[target_feature(enable = "avx512f")]
26414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26415#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26416pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26417    unsafe {
26418        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26419        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
26420    }
26421}
26422
26423/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26424///
26425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26426#[inline]
26427#[target_feature(enable = "avx512f,avx512vl")]
26428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26429#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26430pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26431    unsafe {
26432        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26433        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
26434    }
26435}
26436
26437/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26438///
26439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26440#[inline]
26441#[target_feature(enable = "avx512f,avx512vl")]
26442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26443#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26444pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26445    unsafe {
26446        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26447        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
26448    }
26449}
26450
26451/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26452///
26453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26454#[inline]
26455#[target_feature(enable = "avx512f,avx512vl")]
26456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26457#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26458pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26459    unsafe {
26460        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26461        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
26462    }
26463}
26464
26465/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26466///
26467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26468#[inline]
26469#[target_feature(enable = "avx512f,avx512vl")]
26470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26471#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26472pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26473    unsafe {
26474        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26475        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
26476    }
26477}
26478
26479/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26480///
26481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26482#[inline]
26483#[target_feature(enable = "avx512f")]
26484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26485#[cfg_attr(test, assert_instr(vunpcklps))]
26486pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26487    unsafe {
26488        #[rustfmt::skip]
26489        simd_shuffle!(a, b,
26490                       [ 0, 16, 1, 17,
26491                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
26492                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
26493                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
26494        )
26495    }
26496}
26497
26498/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26499///
26500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26501#[inline]
26502#[target_feature(enable = "avx512f")]
26503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26504#[cfg_attr(test, assert_instr(vunpcklps))]
26505pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26506    unsafe {
26507        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26508        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
26509    }
26510}
26511
26512/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26513///
26514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26515#[inline]
26516#[target_feature(enable = "avx512f")]
26517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26518#[cfg_attr(test, assert_instr(vunpcklps))]
26519pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26520    unsafe {
26521        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26522        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
26523    }
26524}
26525
26526/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26527///
26528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26529#[inline]
26530#[target_feature(enable = "avx512f,avx512vl")]
26531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26532#[cfg_attr(test, assert_instr(vunpcklps))]
26533pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26534    unsafe {
26535        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26536        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
26537    }
26538}
26539
26540/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26541///
26542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26543#[inline]
26544#[target_feature(enable = "avx512f,avx512vl")]
26545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26546#[cfg_attr(test, assert_instr(vunpcklps))]
26547pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26548    unsafe {
26549        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26550        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
26551    }
26552}
26553
26554/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26555///
26556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26557#[inline]
26558#[target_feature(enable = "avx512f,avx512vl")]
26559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26560#[cfg_attr(test, assert_instr(vunpcklps))]
26561pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26562    unsafe {
26563        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26564        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
26565    }
26566}
26567
26568/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26569///
26570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26571#[inline]
26572#[target_feature(enable = "avx512f,avx512vl")]
26573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26574#[cfg_attr(test, assert_instr(vunpcklps))]
26575pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26576    unsafe {
26577        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26578        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
26579    }
26580}
26581
26582/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26583///
26584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26585#[inline]
26586#[target_feature(enable = "avx512f")]
26587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26588#[cfg_attr(test, assert_instr(vunpcklpd))]
26589pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26590    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26591}
26592
26593/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26594///
26595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26596#[inline]
26597#[target_feature(enable = "avx512f")]
26598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26599#[cfg_attr(test, assert_instr(vunpcklpd))]
26600pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26601    unsafe {
26602        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26603        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
26604    }
26605}
26606
26607/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26608///
26609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26610#[inline]
26611#[target_feature(enable = "avx512f")]
26612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26613#[cfg_attr(test, assert_instr(vunpcklpd))]
26614pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26615    unsafe {
26616        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26617        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
26618    }
26619}
26620
26621/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26622///
26623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26624#[inline]
26625#[target_feature(enable = "avx512f,avx512vl")]
26626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26627#[cfg_attr(test, assert_instr(vunpcklpd))]
26628pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26629    unsafe {
26630        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26631        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
26632    }
26633}
26634
26635/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26636///
26637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26638#[inline]
26639#[target_feature(enable = "avx512f,avx512vl")]
26640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26641#[cfg_attr(test, assert_instr(vunpcklpd))]
26642pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26643    unsafe {
26644        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26645        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
26646    }
26647}
26648
26649/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26650///
26651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26652#[inline]
26653#[target_feature(enable = "avx512f,avx512vl")]
26654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26655#[cfg_attr(test, assert_instr(vunpcklpd))]
26656pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26657    unsafe {
26658        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26659        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
26660    }
26661}
26662
26663/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26664///
26665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26666#[inline]
26667#[target_feature(enable = "avx512f,avx512vl")]
26668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26669#[cfg_attr(test, assert_instr(vunpcklpd))]
26670pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26671    unsafe {
26672        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26673        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
26674    }
26675}
26676
26677/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26678///
26679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26680#[inline]
26681#[target_feature(enable = "avx512f")]
26682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26683pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26684    unsafe {
26685        simd_shuffle!(
26686            a,
26687            _mm_undefined_ps(),
26688            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26689        )
26690    }
26691}
26692
26693/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26694///
26695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26696#[inline]
26697#[target_feature(enable = "avx512f")]
26698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26699pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26700    unsafe {
26701        simd_shuffle!(
26702            a,
26703            _mm256_undefined_ps(),
26704            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26705        )
26706    }
26707}
26708
26709/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26710///
26711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26712#[inline]
26713#[target_feature(enable = "avx512f")]
26714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26715pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26716    unsafe {
26717        simd_shuffle!(
26718            a,
26719            _mm_set1_ps(0.),
26720            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26721        )
26722    }
26723}
26724
26725/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26726///
26727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26728#[inline]
26729#[target_feature(enable = "avx512f")]
26730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26731pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26732    unsafe {
26733        simd_shuffle!(
26734            a,
26735            _mm256_set1_ps(0.),
26736            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26737        )
26738    }
26739}
26740
26741/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26742///
26743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26744#[inline]
26745#[target_feature(enable = "avx512f")]
26746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26747pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26748    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26749}
26750
26751/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26752///
26753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26754#[inline]
26755#[target_feature(enable = "avx512f")]
26756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26757pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26758    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
26759}
26760
26761/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26762///
26763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26764#[inline]
26765#[target_feature(enable = "avx512f")]
26766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26767pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26768    unsafe { transmute(a) }
26769}
26770
26771/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26777pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26778    unsafe { transmute(a) }
26779}
26780
26781/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26782///
26783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26784#[inline]
26785#[target_feature(enable = "avx512f")]
26786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26787pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26788    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26789}
26790
26791/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26797pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26798    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26799}
26800
26801/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26802///
26803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26804#[inline]
26805#[target_feature(enable = "avx512f")]
26806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26807pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26808    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
26809}
26810
26811/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26812///
26813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26814#[inline]
26815#[target_feature(enable = "avx512f")]
26816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26817pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26818    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
26819}
26820
26821/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26822///
26823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26824#[inline]
26825#[target_feature(enable = "avx512f")]
26826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26827pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26828    unsafe { simd_shuffle!(a, a, [0, 1]) }
26829}
26830
26831/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26832///
26833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26834#[inline]
26835#[target_feature(enable = "avx512f")]
26836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26837pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26838    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26839}
26840
26841/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26842///
26843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26844#[inline]
26845#[target_feature(enable = "avx512f")]
26846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26847pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26848    unsafe { transmute(a) }
26849}
26850
26851/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26852///
26853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26854#[inline]
26855#[target_feature(enable = "avx512f")]
26856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26857pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26858    unsafe { transmute(a) }
26859}
26860
26861/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26862///
26863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26864#[inline]
26865#[target_feature(enable = "avx512f")]
26866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26867pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26868    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26869}
26870
26871/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26872///
26873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26874#[inline]
26875#[target_feature(enable = "avx512f")]
26876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26877pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26878    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26879}
26880
26881/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26882///
26883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26884#[inline]
26885#[target_feature(enable = "avx512f")]
26886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26887pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26888    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26889}
26890
26891/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26892///
26893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26894#[inline]
26895#[target_feature(enable = "avx512f")]
26896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26897pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26898    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26899}
26900
26901/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26902///
26903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26904#[inline]
26905#[target_feature(enable = "avx512f")]
26906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26907pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26908    unsafe { simd_shuffle!(a, a, [0, 1]) }
26909}
26910
26911/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26912///
26913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26914#[inline]
26915#[target_feature(enable = "avx512f")]
26916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26917pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26918    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26919}
26920
26921/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26922///
26923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26924#[inline]
26925#[target_feature(enable = "avx512f")]
26926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26927pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26928    unsafe { transmute(a) }
26929}
26930
26931/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26932///
26933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26934#[inline]
26935#[target_feature(enable = "avx512f")]
26936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26937pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26938    unsafe { transmute(a) }
26939}
26940
26941/// Copy the lower 32-bit integer in a to dst.
26942///
26943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26944#[inline]
26945#[target_feature(enable = "avx512f")]
26946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26947#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(vmovd))]
26948pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26949    unsafe { simd_extract!(a.as_i32x16(), 0) }
26950}
26951
26952/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26953///
26954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26955#[inline]
26956#[target_feature(enable = "avx512f")]
26957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26958pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26959    unsafe { simd_extract!(a, 0) }
26960}
26961
26962/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26963///
26964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26965#[inline]
26966#[target_feature(enable = "avx512f")]
26967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26968pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26969    unsafe { simd_extract!(a, 0) }
26970}
26971
26972/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26973///
26974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26975#[inline]
26976#[target_feature(enable = "avx512f")]
26977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26978#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26979pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
26980    unsafe {
26981        let a = _mm512_castsi128_si512(a).as_i32x16();
26982        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
26983        transmute(ret)
26984    }
26985}
26986
26987/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26988///
26989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
26990#[inline]
26991#[target_feature(enable = "avx512f")]
26992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26993#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26994pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
26995    unsafe {
26996        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
26997        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
26998    }
26999}
27000
27001/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27002///
27003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
27004#[inline]
27005#[target_feature(enable = "avx512f")]
27006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27007#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27008pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
27009    unsafe {
27010        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
27011        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27012    }
27013}
27014
27015/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27016///
27017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
27018#[inline]
27019#[target_feature(enable = "avx512f,avx512vl")]
27020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27021#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27022pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27023    unsafe {
27024        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27025        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27026    }
27027}
27028
27029/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27030///
27031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
27032#[inline]
27033#[target_feature(enable = "avx512f,avx512vl")]
27034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27035#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27036pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
27037    unsafe {
27038        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27039        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27040    }
27041}
27042
27043/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27044///
27045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27046#[inline]
27047#[target_feature(enable = "avx512f,avx512vl")]
27048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27049#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27050pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27051    unsafe {
27052        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27053        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
27054    }
27055}
27056
27057/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27058///
27059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27060#[inline]
27061#[target_feature(enable = "avx512f,avx512vl")]
27062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27063#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27064pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27065    unsafe {
27066        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27067        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
27068    }
27069}
27070
27071/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27072///
27073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27074#[inline]
27075#[target_feature(enable = "avx512f")]
27076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27077#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27078pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27079    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27080}
27081
27082/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27083///
27084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27085#[inline]
27086#[target_feature(enable = "avx512f")]
27087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27088#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27089pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27090    unsafe {
27091        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27092        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27093    }
27094}
27095
27096/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27097///
27098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27099#[inline]
27100#[target_feature(enable = "avx512f")]
27101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27102#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27103pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27104    unsafe {
27105        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27106        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27107    }
27108}
27109
27110/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27111///
27112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27113#[inline]
27114#[target_feature(enable = "avx512f,avx512vl")]
27115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27116#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27117pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27118    unsafe {
27119        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27120        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
27121    }
27122}
27123
27124/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27125///
27126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27127#[inline]
27128#[target_feature(enable = "avx512f,avx512vl")]
27129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27130#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27131pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27132    unsafe {
27133        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27134        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
27135    }
27136}
27137
27138/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27139///
27140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27141#[inline]
27142#[target_feature(enable = "avx512f,avx512vl")]
27143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27144#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27145pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27146    unsafe {
27147        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27148        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
27149    }
27150}
27151
27152/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27153///
27154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27155#[inline]
27156#[target_feature(enable = "avx512f,avx512vl")]
27157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27158#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27159pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27160    unsafe {
27161        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27162        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
27163    }
27164}
27165
27166/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27169#[inline]
27170#[target_feature(enable = "avx512f")]
27171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27172#[cfg_attr(test, assert_instr(vbroadcastss))]
27173pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27174    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
27175}
27176
27177/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27178///
27179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27180#[inline]
27181#[target_feature(enable = "avx512f")]
27182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27183#[cfg_attr(test, assert_instr(vbroadcastss))]
27184pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27185    unsafe {
27186        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27187        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27188    }
27189}
27190
27191/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27192///
27193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27194#[inline]
27195#[target_feature(enable = "avx512f")]
27196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27197#[cfg_attr(test, assert_instr(vbroadcastss))]
27198pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27199    unsafe {
27200        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27201        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27202    }
27203}
27204
27205/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27206///
27207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27208#[inline]
27209#[target_feature(enable = "avx512f,avx512vl")]
27210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27211#[cfg_attr(test, assert_instr(vbroadcastss))]
27212pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27213    unsafe {
27214        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27215        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27216    }
27217}
27218
27219/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27220///
27221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27222#[inline]
27223#[target_feature(enable = "avx512f,avx512vl")]
27224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27225#[cfg_attr(test, assert_instr(vbroadcastss))]
27226pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27227    unsafe {
27228        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27229        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27230    }
27231}
27232
27233/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27234///
27235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27236#[inline]
27237#[target_feature(enable = "avx512f,avx512vl")]
27238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27239#[cfg_attr(test, assert_instr(vbroadcastss))]
27240pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27241    unsafe {
27242        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27243        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
27244    }
27245}
27246
27247/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27248///
27249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27250#[inline]
27251#[target_feature(enable = "avx512f,avx512vl")]
27252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27253#[cfg_attr(test, assert_instr(vbroadcastss))]
27254pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27255    unsafe {
27256        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27257        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
27258    }
27259}
27260
27261/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27262///
27263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27264#[inline]
27265#[target_feature(enable = "avx512f")]
27266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27267#[cfg_attr(test, assert_instr(vbroadcastsd))]
27268pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27269    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27270}
27271
27272/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27273///
27274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27275#[inline]
27276#[target_feature(enable = "avx512f")]
27277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27278#[cfg_attr(test, assert_instr(vbroadcastsd))]
27279pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27280    unsafe {
27281        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27282        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27283    }
27284}
27285
27286/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27287///
27288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27289#[inline]
27290#[target_feature(enable = "avx512f")]
27291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27292#[cfg_attr(test, assert_instr(vbroadcastsd))]
27293pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27294    unsafe {
27295        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27296        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27297    }
27298}
27299
27300/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27301///
27302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27303#[inline]
27304#[target_feature(enable = "avx512f,avx512vl")]
27305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27306#[cfg_attr(test, assert_instr(vbroadcastsd))]
27307pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27308    unsafe {
27309        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27310        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
27311    }
27312}
27313
27314/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27315///
27316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27317#[inline]
27318#[target_feature(enable = "avx512f,avx512vl")]
27319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27320#[cfg_attr(test, assert_instr(vbroadcastsd))]
27321pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27322    unsafe {
27323        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27324        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
27325    }
27326}
27327
27328/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27329///
27330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27331#[inline]
27332#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27334pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27335    unsafe {
27336        let a = a.as_i32x4();
27337        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
27338        transmute(ret)
27339    }
27340}
27341
27342/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343///
27344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27345#[inline]
27346#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27348pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27349    unsafe {
27350        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27351        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
27352    }
27353}
27354
27355/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27356///
27357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27358#[inline]
27359#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27361pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27362    unsafe {
27363        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27364        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27365    }
27366}
27367
27368/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27369///
27370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27371#[inline]
27372#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27374pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27375    unsafe {
27376        let a = a.as_i32x4();
27377        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
27378        transmute(ret)
27379    }
27380}
27381
27382/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27383///
27384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27385#[inline]
27386#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27388pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27389    unsafe {
27390        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27391        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27392    }
27393}
27394
27395/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27396///
27397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27398#[inline]
27399#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27401pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27402    unsafe {
27403        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27404        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27405    }
27406}
27407
27408/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27409///
27410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27411#[inline]
27412#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27414pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27415    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27416}
27417
27418/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27419///
27420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27421#[inline]
27422#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27424pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27425    unsafe {
27426        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27427        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27428    }
27429}
27430
27431/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27432///
27433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27434#[inline]
27435#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27437pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27438    unsafe {
27439        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27440        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27441    }
27442}
27443
27444/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27445///
27446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27447#[inline]
27448#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27450pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27451    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
27452}
27453
27454/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455///
27456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27457#[inline]
27458#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27460pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27461    unsafe {
27462        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27463        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27464    }
27465}
27466
27467/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27468///
27469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27470#[inline]
27471#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27473pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27474    unsafe {
27475        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27476        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27477    }
27478}
27479
27480/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27481///
27482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27483#[inline]
27484#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27486pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27487    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27488}
27489
27490/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27491///
27492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27493#[inline]
27494#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27496pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27497    unsafe {
27498        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27499        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27500    }
27501}
27502
27503/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27504///
27505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27506#[inline]
27507#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27509pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27510    unsafe {
27511        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27512        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27513    }
27514}
27515
27516/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27517///
27518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27519#[inline]
27520#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27522pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27523    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27524}
27525
27526/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27527///
27528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27529#[inline]
27530#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27532pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27533    unsafe {
27534        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27535        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27536    }
27537}
27538
27539/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27540///
27541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27542#[inline]
27543#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27545pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27546    unsafe {
27547        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27548        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27549    }
27550}
27551
27552/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27553///
27554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27555#[inline]
27556#[target_feature(enable = "avx512f")]
27557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27558#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27559pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27560    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
27561}
27562
27563/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27564///
27565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27566#[inline]
27567#[target_feature(enable = "avx512f,avx512vl")]
27568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27569#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27570pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27571    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
27572}
27573
27574/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27575///
27576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27577#[inline]
27578#[target_feature(enable = "avx512f,avx512vl")]
27579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27580#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27581pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27582    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
27583}
27584
27585/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27586///
27587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27588#[inline]
27589#[target_feature(enable = "avx512f")]
27590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27591#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27592pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27593    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
27594}
27595
27596/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27597///
27598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27599#[inline]
27600#[target_feature(enable = "avx512f,avx512vl")]
27601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27602#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27603pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27604    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
27605}
27606
27607/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27608///
27609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27610#[inline]
27611#[target_feature(enable = "avx512f,avx512vl")]
27612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27613#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27614pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27615    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
27616}
27617
27618/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27619///
27620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27621#[inline]
27622#[target_feature(enable = "avx512f")]
27623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27624#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27625pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27626    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
27627}
27628
27629/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27630///
27631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27632#[inline]
27633#[target_feature(enable = "avx512f,avx512vl")]
27634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27635#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27636pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27637    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
27638}
27639
27640/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27641///
27642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27643#[inline]
27644#[target_feature(enable = "avx512f,avx512vl")]
27645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27646#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27647pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27648    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
27649}
27650
27651/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27652///
27653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27654#[inline]
27655#[target_feature(enable = "avx512f")]
27656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27657#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27658pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27659    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
27660}
27661
27662/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27663///
27664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27665#[inline]
27666#[target_feature(enable = "avx512f,avx512vl")]
27667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27668#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27669pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27670    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
27671}
27672
27673/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27674///
27675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27676#[inline]
27677#[target_feature(enable = "avx512f,avx512vl")]
27678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27679#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27680pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27681    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
27682}
27683
27684/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27685///
27686/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27687///
27688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27689#[inline]
27690#[target_feature(enable = "avx512f")]
27691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27692#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27693#[rustc_legacy_const_generics(2)]
27694pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27695    unsafe {
27696        static_assert_uimm_bits!(IMM8, 8);
27697        let a = a.as_i32x16();
27698        let b = b.as_i32x16();
27699        let imm8: i32 = IMM8 % 16;
27700        let r: i32x16 = match imm8 {
27701            0 => simd_shuffle!(
27702                a,
27703                b,
27704                [
27705                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
27706                ],
27707            ),
27708            1 => simd_shuffle!(
27709                a,
27710                b,
27711                [
27712                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
27713                ],
27714            ),
27715            2 => simd_shuffle!(
27716                a,
27717                b,
27718                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
27719            ),
27720            3 => simd_shuffle!(
27721                a,
27722                b,
27723                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
27724            ),
27725            4 => simd_shuffle!(
27726                a,
27727                b,
27728                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
27729            ),
27730            5 => simd_shuffle!(
27731                a,
27732                b,
27733                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
27734            ),
27735            6 => simd_shuffle!(
27736                a,
27737                b,
27738                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
27739            ),
27740            7 => simd_shuffle!(
27741                a,
27742                b,
27743                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
27744            ),
27745            8 => simd_shuffle!(
27746                a,
27747                b,
27748                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
27749            ),
27750            9 => simd_shuffle!(
27751                a,
27752                b,
27753                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
27754            ),
27755            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
27756            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
27757            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
27758            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
27759            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
27760            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
27761            _ => unreachable_unchecked(),
27762        };
27763        transmute(r)
27764    }
27765}
27766
27767/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27768///
27769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27770#[inline]
27771#[target_feature(enable = "avx512f")]
27772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27773#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27774#[rustc_legacy_const_generics(4)]
27775pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27776    src: __m512i,
27777    k: __mmask16,
27778    a: __m512i,
27779    b: __m512i,
27780) -> __m512i {
27781    unsafe {
27782        static_assert_uimm_bits!(IMM8, 8);
27783        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27784        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
27785    }
27786}
27787
27788/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27789///
27790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27791#[inline]
27792#[target_feature(enable = "avx512f")]
27793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27794#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27795#[rustc_legacy_const_generics(3)]
27796pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27797    unsafe {
27798        static_assert_uimm_bits!(IMM8, 8);
27799        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27800        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
27801    }
27802}
27803
27804/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27805///
27806/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27807///
27808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27809#[inline]
27810#[target_feature(enable = "avx512f,avx512vl")]
27811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27812#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27813#[rustc_legacy_const_generics(2)]
27814pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27815    unsafe {
27816        static_assert_uimm_bits!(IMM8, 8);
27817        let a = a.as_i32x8();
27818        let b = b.as_i32x8();
27819        let imm8: i32 = IMM8 % 8;
27820        let r: i32x8 = match imm8 {
27821            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27822            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27823            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27824            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27825            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27826            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27827            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27828            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27829            _ => unreachable_unchecked(),
27830        };
27831        transmute(r)
27832    }
27833}
27834
27835/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27836///
27837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27838#[inline]
27839#[target_feature(enable = "avx512f,avx512vl")]
27840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27841#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27842#[rustc_legacy_const_generics(4)]
27843pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27844    src: __m256i,
27845    k: __mmask8,
27846    a: __m256i,
27847    b: __m256i,
27848) -> __m256i {
27849    unsafe {
27850        static_assert_uimm_bits!(IMM8, 8);
27851        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27852        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
27853    }
27854}
27855
27856/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27857///
27858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27859#[inline]
27860#[target_feature(enable = "avx512f,avx512vl")]
27861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27862#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27863#[rustc_legacy_const_generics(3)]
27864pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27865    unsafe {
27866        static_assert_uimm_bits!(IMM8, 8);
27867        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27868        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
27869    }
27870}
27871
27872/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27873///
27874/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27875///
27876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27877#[inline]
27878#[target_feature(enable = "avx512f,avx512vl")]
27879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27880#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
27881#[rustc_legacy_const_generics(2)]
27882pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27883    unsafe {
27884        static_assert_uimm_bits!(IMM8, 8);
27885        let a = a.as_i32x4();
27886        let b = b.as_i32x4();
27887        let imm8: i32 = IMM8 % 4;
27888        let r: i32x4 = match imm8 {
27889            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27890            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27891            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27892            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27893            _ => unreachable_unchecked(),
27894        };
27895        transmute(r)
27896    }
27897}
27898
27899/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27900///
27901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27902#[inline]
27903#[target_feature(enable = "avx512f,avx512vl")]
27904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27905#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27906#[rustc_legacy_const_generics(4)]
27907pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27908    src: __m128i,
27909    k: __mmask8,
27910    a: __m128i,
27911    b: __m128i,
27912) -> __m128i {
27913    unsafe {
27914        static_assert_uimm_bits!(IMM8, 8);
27915        let r = _mm_alignr_epi32::<IMM8>(a, b);
27916        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
27917    }
27918}
27919
27920/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27921///
27922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27923#[inline]
27924#[target_feature(enable = "avx512f,avx512vl")]
27925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27926#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27927#[rustc_legacy_const_generics(3)]
27928pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27929    unsafe {
27930        static_assert_uimm_bits!(IMM8, 8);
27931        let r = _mm_alignr_epi32::<IMM8>(a, b);
27932        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
27933    }
27934}
27935
27936/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27937///
27938/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27939///
27940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27941#[inline]
27942#[target_feature(enable = "avx512f")]
27943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27944#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27945#[rustc_legacy_const_generics(2)]
27946pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27947    unsafe {
27948        static_assert_uimm_bits!(IMM8, 8);
27949        let imm8: i32 = IMM8 % 8;
27950        let r: i64x8 = match imm8 {
27951            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27952            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27953            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27954            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27955            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27956            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27957            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27958            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27959            _ => unreachable_unchecked(),
27960        };
27961        transmute(r)
27962    }
27963}
27964
27965/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27966///
27967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27968#[inline]
27969#[target_feature(enable = "avx512f")]
27970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27971#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27972#[rustc_legacy_const_generics(4)]
27973pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27974    src: __m512i,
27975    k: __mmask8,
27976    a: __m512i,
27977    b: __m512i,
27978) -> __m512i {
27979    unsafe {
27980        static_assert_uimm_bits!(IMM8, 8);
27981        let r = _mm512_alignr_epi64::<IMM8>(a, b);
27982        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
27983    }
27984}
27985
27986/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27987///
27988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
27989#[inline]
27990#[target_feature(enable = "avx512f")]
27991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27992#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27993#[rustc_legacy_const_generics(3)]
27994pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27995    unsafe {
27996        static_assert_uimm_bits!(IMM8, 8);
27997        let r = _mm512_alignr_epi64::<IMM8>(a, b);
27998        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
27999    }
28000}
28001
28002/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
28003///
28004/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
28005///
28006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
28007#[inline]
28008#[target_feature(enable = "avx512f,avx512vl")]
28009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28010#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28011#[rustc_legacy_const_generics(2)]
28012pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28013    unsafe {
28014        static_assert_uimm_bits!(IMM8, 8);
28015        let imm8: i32 = IMM8 % 4;
28016        let r: i64x4 = match imm8 {
28017            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
28018            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
28019            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
28020            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
28021            _ => unreachable_unchecked(),
28022        };
28023        transmute(r)
28024    }
28025}
28026
28027/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28028///
28029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
28030#[inline]
28031#[target_feature(enable = "avx512f,avx512vl")]
28032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28033#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28034#[rustc_legacy_const_generics(4)]
28035pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
28036    src: __m256i,
28037    k: __mmask8,
28038    a: __m256i,
28039    b: __m256i,
28040) -> __m256i {
28041    unsafe {
28042        static_assert_uimm_bits!(IMM8, 8);
28043        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28044        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
28045    }
28046}
28047
28048/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28049///
28050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28051#[inline]
28052#[target_feature(enable = "avx512f,avx512vl")]
28053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28054#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28055#[rustc_legacy_const_generics(3)]
28056pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28057    unsafe {
28058        static_assert_uimm_bits!(IMM8, 8);
28059        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28060        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
28061    }
28062}
28063
28064/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28065///
28066/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28067///
28068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28069#[inline]
28070#[target_feature(enable = "avx512f,avx512vl")]
28071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28072#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
28073#[rustc_legacy_const_generics(2)]
28074pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28075    unsafe {
28076        static_assert_uimm_bits!(IMM8, 8);
28077        let imm8: i32 = IMM8 % 2;
28078        let r: i64x2 = match imm8 {
28079            0 => simd_shuffle!(a, b, [2, 3]),
28080            1 => simd_shuffle!(a, b, [3, 0]),
28081            _ => unreachable_unchecked(),
28082        };
28083        transmute(r)
28084    }
28085}
28086
28087/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28088///
28089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28090#[inline]
28091#[target_feature(enable = "avx512f,avx512vl")]
28092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28093#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28094#[rustc_legacy_const_generics(4)]
28095pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28096    src: __m128i,
28097    k: __mmask8,
28098    a: __m128i,
28099    b: __m128i,
28100) -> __m128i {
28101    unsafe {
28102        static_assert_uimm_bits!(IMM8, 8);
28103        let r = _mm_alignr_epi64::<IMM8>(a, b);
28104        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
28105    }
28106}
28107
28108/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28109///
28110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28111#[inline]
28112#[target_feature(enable = "avx512f,avx512vl")]
28113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28114#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28115#[rustc_legacy_const_generics(3)]
28116pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28117    unsafe {
28118        static_assert_uimm_bits!(IMM8, 8);
28119        let r = _mm_alignr_epi64::<IMM8>(a, b);
28120        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
28121    }
28122}
28123
28124/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28125///
28126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28127#[inline]
28128#[target_feature(enable = "avx512f")]
28129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28130#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28131pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28132    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28133}
28134
28135/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136///
28137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28138#[inline]
28139#[target_feature(enable = "avx512f")]
28140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28141#[cfg_attr(test, assert_instr(vpandd))]
28142pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28143    unsafe {
28144        let and = _mm512_and_epi32(a, b).as_i32x16();
28145        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
28146    }
28147}
28148
28149/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28150///
28151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28152#[inline]
28153#[target_feature(enable = "avx512f")]
28154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28155#[cfg_attr(test, assert_instr(vpandd))]
28156pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28157    unsafe {
28158        let and = _mm512_and_epi32(a, b).as_i32x16();
28159        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
28160    }
28161}
28162
28163/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28164///
28165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28166#[inline]
28167#[target_feature(enable = "avx512f,avx512vl")]
28168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28169#[cfg_attr(test, assert_instr(vpandd))]
28170pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28171    unsafe {
28172        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28173        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
28174    }
28175}
28176
28177/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28178///
28179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28180#[inline]
28181#[target_feature(enable = "avx512f,avx512vl")]
28182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28183#[cfg_attr(test, assert_instr(vpandd))]
28184pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28185    unsafe {
28186        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28187        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
28188    }
28189}
28190
28191/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28192///
28193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28194#[inline]
28195#[target_feature(enable = "avx512f,avx512vl")]
28196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28197#[cfg_attr(test, assert_instr(vpandd))]
28198pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28199    unsafe {
28200        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28201        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
28202    }
28203}
28204
28205/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28206///
28207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28208#[inline]
28209#[target_feature(enable = "avx512f,avx512vl")]
28210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28211#[cfg_attr(test, assert_instr(vpandd))]
28212pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28213    unsafe {
28214        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28215        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
28216    }
28217}
28218
28219/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28220///
28221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28222#[inline]
28223#[target_feature(enable = "avx512f")]
28224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28225#[cfg_attr(test, assert_instr(vpandq))]
28226pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28227    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
28228}
28229
28230/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28231///
28232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28233#[inline]
28234#[target_feature(enable = "avx512f")]
28235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28236#[cfg_attr(test, assert_instr(vpandq))]
28237pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28238    unsafe {
28239        let and = _mm512_and_epi64(a, b).as_i64x8();
28240        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
28241    }
28242}
28243
28244/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28245///
28246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28247#[inline]
28248#[target_feature(enable = "avx512f")]
28249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28250#[cfg_attr(test, assert_instr(vpandq))]
28251pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28252    unsafe {
28253        let and = _mm512_and_epi64(a, b).as_i64x8();
28254        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
28255    }
28256}
28257
28258/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28259///
28260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28261#[inline]
28262#[target_feature(enable = "avx512f,avx512vl")]
28263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28264#[cfg_attr(test, assert_instr(vpandq))]
28265pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28266    unsafe {
28267        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28268        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
28269    }
28270}
28271
28272/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28273///
28274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28275#[inline]
28276#[target_feature(enable = "avx512f,avx512vl")]
28277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28278#[cfg_attr(test, assert_instr(vpandq))]
28279pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28280    unsafe {
28281        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28282        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
28283    }
28284}
28285
28286/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28287///
28288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28289#[inline]
28290#[target_feature(enable = "avx512f,avx512vl")]
28291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28292#[cfg_attr(test, assert_instr(vpandq))]
28293pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28294    unsafe {
28295        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28296        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
28297    }
28298}
28299
28300/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28301///
28302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28303#[inline]
28304#[target_feature(enable = "avx512f,avx512vl")]
28305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28306#[cfg_attr(test, assert_instr(vpandq))]
28307pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28308    unsafe {
28309        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28310        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
28311    }
28312}
28313
28314/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28315///
28316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28317#[inline]
28318#[target_feature(enable = "avx512f")]
28319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28320#[cfg_attr(test, assert_instr(vpandq))]
28321pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28322    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28323}
28324
28325/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28326///
28327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28328#[inline]
28329#[target_feature(enable = "avx512f")]
28330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28331#[cfg_attr(test, assert_instr(vporq))]
28332pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28333    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28334}
28335
28336/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28337///
28338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28339#[inline]
28340#[target_feature(enable = "avx512f")]
28341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28342#[cfg_attr(test, assert_instr(vpord))]
28343pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28344    unsafe {
28345        let or = _mm512_or_epi32(a, b).as_i32x16();
28346        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
28347    }
28348}
28349
28350/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28351///
28352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28353#[inline]
28354#[target_feature(enable = "avx512f")]
28355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28356#[cfg_attr(test, assert_instr(vpord))]
28357pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28358    unsafe {
28359        let or = _mm512_or_epi32(a, b).as_i32x16();
28360        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
28361    }
28362}
28363
28364/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28365///
28366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28367#[inline]
28368#[target_feature(enable = "avx512f,avx512vl")]
28369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28370#[cfg_attr(test, assert_instr(vor))] //should be vpord
28371pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28372    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
28373}
28374
28375/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28376///
28377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28378#[inline]
28379#[target_feature(enable = "avx512f,avx512vl")]
28380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28381#[cfg_attr(test, assert_instr(vpord))]
28382pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28383    unsafe {
28384        let or = _mm256_or_epi32(a, b).as_i32x8();
28385        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
28386    }
28387}
28388
28389/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28390///
28391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28392#[inline]
28393#[target_feature(enable = "avx512f,avx512vl")]
28394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28395#[cfg_attr(test, assert_instr(vpord))]
28396pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28397    unsafe {
28398        let or = _mm256_or_epi32(a, b).as_i32x8();
28399        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
28400    }
28401}
28402
28403/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28404///
28405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28406#[inline]
28407#[target_feature(enable = "avx512f,avx512vl")]
28408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28409#[cfg_attr(test, assert_instr(vor))] //should be vpord
28410pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28411    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
28412}
28413
28414/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28415///
28416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28417#[inline]
28418#[target_feature(enable = "avx512f,avx512vl")]
28419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28420#[cfg_attr(test, assert_instr(vpord))]
28421pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28422    unsafe {
28423        let or = _mm_or_epi32(a, b).as_i32x4();
28424        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
28425    }
28426}
28427
28428/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28429///
28430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28431#[inline]
28432#[target_feature(enable = "avx512f,avx512vl")]
28433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28434#[cfg_attr(test, assert_instr(vpord))]
28435pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28436    unsafe {
28437        let or = _mm_or_epi32(a, b).as_i32x4();
28438        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
28439    }
28440}
28441
28442/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28443///
28444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28445#[inline]
28446#[target_feature(enable = "avx512f")]
28447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28448#[cfg_attr(test, assert_instr(vporq))]
28449pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28450    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
28451}
28452
28453/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28454///
28455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28456#[inline]
28457#[target_feature(enable = "avx512f")]
28458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28459#[cfg_attr(test, assert_instr(vporq))]
28460pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28461    unsafe {
28462        let or = _mm512_or_epi64(a, b).as_i64x8();
28463        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
28464    }
28465}
28466
28467/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28468///
28469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28470#[inline]
28471#[target_feature(enable = "avx512f")]
28472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28473#[cfg_attr(test, assert_instr(vporq))]
28474pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28475    unsafe {
28476        let or = _mm512_or_epi64(a, b).as_i64x8();
28477        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
28478    }
28479}
28480
28481/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28482///
28483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28484#[inline]
28485#[target_feature(enable = "avx512f,avx512vl")]
28486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28487#[cfg_attr(test, assert_instr(vor))] //should be vporq
28488pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28489    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
28490}
28491
28492/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28493///
28494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28495#[inline]
28496#[target_feature(enable = "avx512f,avx512vl")]
28497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28498#[cfg_attr(test, assert_instr(vporq))]
28499pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28500    unsafe {
28501        let or = _mm256_or_epi64(a, b).as_i64x4();
28502        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
28503    }
28504}
28505
28506/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28507///
28508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28509#[inline]
28510#[target_feature(enable = "avx512f,avx512vl")]
28511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28512#[cfg_attr(test, assert_instr(vporq))]
28513pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28514    unsafe {
28515        let or = _mm256_or_epi64(a, b).as_i64x4();
28516        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
28517    }
28518}
28519
28520/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28521///
28522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28523#[inline]
28524#[target_feature(enable = "avx512f,avx512vl")]
28525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28526#[cfg_attr(test, assert_instr(vor))] //should be vporq
28527pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28528    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
28529}
28530
28531/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28532///
28533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28534#[inline]
28535#[target_feature(enable = "avx512f,avx512vl")]
28536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28537#[cfg_attr(test, assert_instr(vporq))]
28538pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28539    unsafe {
28540        let or = _mm_or_epi64(a, b).as_i64x2();
28541        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
28542    }
28543}
28544
28545/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28546///
28547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28548#[inline]
28549#[target_feature(enable = "avx512f,avx512vl")]
28550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28551#[cfg_attr(test, assert_instr(vporq))]
28552pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28553    unsafe {
28554        let or = _mm_or_epi64(a, b).as_i64x2();
28555        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
28556    }
28557}
28558
28559/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28560///
28561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28562#[inline]
28563#[target_feature(enable = "avx512f")]
28564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28565#[cfg_attr(test, assert_instr(vporq))]
28566pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28567    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28568}
28569
28570/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28571///
28572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28573#[inline]
28574#[target_feature(enable = "avx512f")]
28575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28576#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28577pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28578    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28579}
28580
28581/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28582///
28583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28584#[inline]
28585#[target_feature(enable = "avx512f")]
28586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28587#[cfg_attr(test, assert_instr(vpxord))]
28588pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28589    unsafe {
28590        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28591        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
28592    }
28593}
28594
28595/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28596///
28597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28598#[inline]
28599#[target_feature(enable = "avx512f")]
28600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28601#[cfg_attr(test, assert_instr(vpxord))]
28602pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28603    unsafe {
28604        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28605        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
28606    }
28607}
28608
28609/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28610///
28611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28612#[inline]
28613#[target_feature(enable = "avx512f,avx512vl")]
28614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28615#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28616pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28617    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
28618}
28619
28620/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28621///
28622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28623#[inline]
28624#[target_feature(enable = "avx512f,avx512vl")]
28625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28626#[cfg_attr(test, assert_instr(vpxord))]
28627pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28628    unsafe {
28629        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28630        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
28631    }
28632}
28633
28634/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28635///
28636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28637#[inline]
28638#[target_feature(enable = "avx512f,avx512vl")]
28639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28640#[cfg_attr(test, assert_instr(vpxord))]
28641pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28642    unsafe {
28643        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28644        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
28645    }
28646}
28647
28648/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28649///
28650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28651#[inline]
28652#[target_feature(enable = "avx512f,avx512vl")]
28653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28654#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28655pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28656    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
28657}
28658
28659/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28660///
28661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28662#[inline]
28663#[target_feature(enable = "avx512f,avx512vl")]
28664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28665#[cfg_attr(test, assert_instr(vpxord))]
28666pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28667    unsafe {
28668        let xor = _mm_xor_epi32(a, b).as_i32x4();
28669        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
28670    }
28671}
28672
28673/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28674///
28675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28676#[inline]
28677#[target_feature(enable = "avx512f,avx512vl")]
28678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28679#[cfg_attr(test, assert_instr(vpxord))]
28680pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28681    unsafe {
28682        let xor = _mm_xor_epi32(a, b).as_i32x4();
28683        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
28684    }
28685}
28686
28687/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28688///
28689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28690#[inline]
28691#[target_feature(enable = "avx512f")]
28692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28693#[cfg_attr(test, assert_instr(vpxorq))]
28694pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28695    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
28696}
28697
28698/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28699///
28700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28701#[inline]
28702#[target_feature(enable = "avx512f")]
28703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28704#[cfg_attr(test, assert_instr(vpxorq))]
28705pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28706    unsafe {
28707        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28708        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
28709    }
28710}
28711
28712/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28713///
28714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28715#[inline]
28716#[target_feature(enable = "avx512f")]
28717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28718#[cfg_attr(test, assert_instr(vpxorq))]
28719pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28720    unsafe {
28721        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28722        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
28723    }
28724}
28725
28726/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28727///
28728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28729#[inline]
28730#[target_feature(enable = "avx512f,avx512vl")]
28731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28732#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28733pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28734    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
28735}
28736
28737/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28738///
28739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28740#[inline]
28741#[target_feature(enable = "avx512f,avx512vl")]
28742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28743#[cfg_attr(test, assert_instr(vpxorq))]
28744pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28745    unsafe {
28746        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28747        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
28748    }
28749}
28750
28751/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28752///
28753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28754#[inline]
28755#[target_feature(enable = "avx512f,avx512vl")]
28756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28757#[cfg_attr(test, assert_instr(vpxorq))]
28758pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28759    unsafe {
28760        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28761        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
28762    }
28763}
28764
28765/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28766///
28767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28768#[inline]
28769#[target_feature(enable = "avx512f,avx512vl")]
28770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28771#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28772pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28773    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
28774}
28775
28776/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28777///
28778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28779#[inline]
28780#[target_feature(enable = "avx512f,avx512vl")]
28781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28782#[cfg_attr(test, assert_instr(vpxorq))]
28783pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28784    unsafe {
28785        let xor = _mm_xor_epi64(a, b).as_i64x2();
28786        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
28787    }
28788}
28789
28790/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28791///
28792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28793#[inline]
28794#[target_feature(enable = "avx512f,avx512vl")]
28795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28796#[cfg_attr(test, assert_instr(vpxorq))]
28797pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28798    unsafe {
28799        let xor = _mm_xor_epi64(a, b).as_i64x2();
28800        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
28801    }
28802}
28803
28804/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28805///
28806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28807#[inline]
28808#[target_feature(enable = "avx512f")]
28809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28810#[cfg_attr(test, assert_instr(vpxorq))]
28811pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28812    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28813}
28814
28815/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28816///
28817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28818#[inline]
28819#[target_feature(enable = "avx512f")]
28820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28821#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28822pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28823    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
28824}
28825
28826/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28827///
28828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28829#[inline]
28830#[target_feature(enable = "avx512f")]
28831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28832#[cfg_attr(test, assert_instr(vpandnd))]
28833pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28834    unsafe {
28835        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28836        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
28837    }
28838}
28839
28840/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28841///
28842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28843#[inline]
28844#[target_feature(enable = "avx512f")]
28845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28846#[cfg_attr(test, assert_instr(vpandnd))]
28847pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28848    unsafe {
28849        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28850        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
28851    }
28852}
28853
28854/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28855///
28856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28857#[inline]
28858#[target_feature(enable = "avx512f,avx512vl")]
28859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28860#[cfg_attr(test, assert_instr(vpandnd))]
28861pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28862    unsafe {
28863        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28864        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28865        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
28866    }
28867}
28868
28869/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28870///
28871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28872#[inline]
28873#[target_feature(enable = "avx512f,avx512vl")]
28874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28875#[cfg_attr(test, assert_instr(vpandnd))]
28876pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28877    unsafe {
28878        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28879        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28880        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
28881    }
28882}
28883
28884/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28885///
28886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28887#[inline]
28888#[target_feature(enable = "avx512f,avx512vl")]
28889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28890#[cfg_attr(test, assert_instr(vpandnd))]
28891pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28892    unsafe {
28893        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28894        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28895        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
28896    }
28897}
28898
28899/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28900///
28901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28902#[inline]
28903#[target_feature(enable = "avx512f,avx512vl")]
28904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28905#[cfg_attr(test, assert_instr(vpandnd))]
28906pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28907    unsafe {
28908        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28909        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28910        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
28911    }
28912}
28913
28914/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28915///
28916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28917#[inline]
28918#[target_feature(enable = "avx512f")]
28919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28920#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28921pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28922    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
28923}
28924
28925/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28926///
28927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28928#[inline]
28929#[target_feature(enable = "avx512f")]
28930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28931#[cfg_attr(test, assert_instr(vpandnq))]
28932pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28933    unsafe {
28934        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28935        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
28936    }
28937}
28938
28939/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28940///
28941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28942#[inline]
28943#[target_feature(enable = "avx512f")]
28944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28945#[cfg_attr(test, assert_instr(vpandnq))]
28946pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28947    unsafe {
28948        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28949        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
28950    }
28951}
28952
28953/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28954///
28955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28956#[inline]
28957#[target_feature(enable = "avx512f,avx512vl")]
28958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28959#[cfg_attr(test, assert_instr(vpandnq))]
28960pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28961    unsafe {
28962        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28963        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28964        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
28965    }
28966}
28967
28968/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28969///
28970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28971#[inline]
28972#[target_feature(enable = "avx512f,avx512vl")]
28973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28974#[cfg_attr(test, assert_instr(vpandnq))]
28975pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28976    unsafe {
28977        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28978        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28979        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
28980    }
28981}
28982
28983/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28984///
28985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
28986#[inline]
28987#[target_feature(enable = "avx512f,avx512vl")]
28988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28989#[cfg_attr(test, assert_instr(vpandnq))]
28990pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28991    unsafe {
28992        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
28993        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
28994        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
28995    }
28996}
28997
28998/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28999///
29000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
29001#[inline]
29002#[target_feature(enable = "avx512f,avx512vl")]
29003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29004#[cfg_attr(test, assert_instr(vpandnq))]
29005pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29006    unsafe {
29007        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
29008        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
29009        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
29010    }
29011}
29012
29013/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
29014///
29015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
29016#[inline]
29017#[target_feature(enable = "avx512f")]
29018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29019#[cfg_attr(test, assert_instr(vpandnq))]
29020pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
29021    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
29022}
29023
29024/// Convert 16-bit mask a into an integer value, and store the result in dst.
29025///
29026/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
29027#[inline]
29028#[target_feature(enable = "avx512f")]
29029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29030pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
29031    a as u32
29032}
29033
29034/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
29035///
29036/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
29037#[inline]
29038#[target_feature(enable = "avx512f")]
29039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29040pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29041    a as __mmask16
29042}
29043
29044/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29045///
29046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29047#[inline]
29048#[target_feature(enable = "avx512f")]
29049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29050#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29051pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29052    a & b
29053}
29054
29055/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29056///
29057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29058#[inline]
29059#[target_feature(enable = "avx512f")]
29060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29061#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29062pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29063    a & b
29064}
29065
29066/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29067///
29068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29069#[inline]
29070#[target_feature(enable = "avx512f")]
29071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29072#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29073pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29074    a | b
29075}
29076
29077/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29078///
29079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29080#[inline]
29081#[target_feature(enable = "avx512f")]
29082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29083#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29084pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29085    a | b
29086}
29087
29088/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29089///
29090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29091#[inline]
29092#[target_feature(enable = "avx512f")]
29093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29094#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29095pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29096    a ^ b
29097}
29098
29099/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29100///
29101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29102#[inline]
29103#[target_feature(enable = "avx512f")]
29104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29105#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29106pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29107    a ^ b
29108}
29109
29110/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29111///
29112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29113#[inline]
29114#[target_feature(enable = "avx512f")]
29115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29116pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29117    a ^ 0b11111111_11111111
29118}
29119
29120/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29121///
29122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29123#[inline]
29124#[target_feature(enable = "avx512f")]
29125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29126pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29127    a ^ 0b11111111_11111111
29128}
29129
29130/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29131///
29132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29133#[inline]
29134#[target_feature(enable = "avx512f")]
29135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29136#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29137pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29138    _mm512_kand(_mm512_knot(a), b)
29139}
29140
29141/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29142///
29143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29144#[inline]
29145#[target_feature(enable = "avx512f")]
29146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29147#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29148pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29149    _mm512_kand(_mm512_knot(a), b)
29150}
29151
29152/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29153///
29154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29155#[inline]
29156#[target_feature(enable = "avx512f")]
29157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29158#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29159pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29160    _mm512_knot(_mm512_kxor(a, b))
29161}
29162
29163/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29164///
29165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29166#[inline]
29167#[target_feature(enable = "avx512f")]
29168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29169#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29170pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29171    _mm512_knot(_mm512_kxor(a, b))
29172}
29173
29174/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29175/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29176///
29177/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29178#[inline]
29179#[target_feature(enable = "avx512f")]
29180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29181pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29182    let tmp = _kor_mask16(a, b);
29183    *all_ones = (tmp == 0xffff) as u8;
29184    (tmp == 0) as u8
29185}
29186
29187/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29188/// store 0 in dst.
29189///
29190/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29191#[inline]
29192#[target_feature(enable = "avx512f")]
29193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29194pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29195    (_kor_mask16(a, b) == 0xffff) as u8
29196}
29197
29198/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29199/// store 0 in dst.
29200///
29201/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29202#[inline]
29203#[target_feature(enable = "avx512f")]
29204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29205pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29206    (_kor_mask16(a, b) == 0) as u8
29207}
29208
29209/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29210///
29211/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29212#[inline]
29213#[target_feature(enable = "avx512f")]
29214#[rustc_legacy_const_generics(1)]
29215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29216pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29217    a << COUNT
29218}
29219
29220/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29221///
29222/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29223#[inline]
29224#[target_feature(enable = "avx512f")]
29225#[rustc_legacy_const_generics(1)]
29226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29227pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29228    a >> COUNT
29229}
29230
29231/// Load 16-bit mask from memory
29232///
29233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29234#[inline]
29235#[target_feature(enable = "avx512f")]
29236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29237pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29238    *mem_addr
29239}
29240
29241/// Store 16-bit mask to memory
29242///
29243/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29244#[inline]
29245#[target_feature(enable = "avx512f")]
29246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29247pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29248    *mem_addr = a;
29249}
29250
29251/// Copy 16-bit mask a to k.
29252///
29253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29254#[inline]
29255#[target_feature(enable = "avx512f")]
29256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29257#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29258pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29259    a
29260}
29261
29262/// Converts integer mask into bitmask, storing the result in dst.
29263///
29264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29265#[inline]
29266#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29268pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29269    mask as u16
29270}
29271
29272/// Converts bit mask k1 into an integer value, storing the results in dst.
29273///
29274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29275#[inline]
29276#[target_feature(enable = "avx512f")]
29277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29278#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29279pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29280    k1 as i32
29281}
29282
29283/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29284///
29285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29286#[inline]
29287#[target_feature(enable = "avx512f")]
29288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29289#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29290pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29291    ((a & 0xff) << 8) | (b & 0xff)
29292}
29293
29294/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29295///
29296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29297#[inline]
29298#[target_feature(enable = "avx512f")]
29299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29300#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29301pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29302    let r = (a | b) == 0b11111111_11111111;
29303    r as i32
29304}
29305
29306/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29307///
29308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29309#[inline]
29310#[target_feature(enable = "avx512f")]
29311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29312#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29313pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29314    let r = (a | b) == 0;
29315    r as i32
29316}
29317
29318/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29319///
29320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29321#[inline]
29322#[target_feature(enable = "avx512f")]
29323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29324#[cfg_attr(test, assert_instr(vptestmd))]
29325pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29326    let and = _mm512_and_epi32(a, b);
29327    let zero = _mm512_setzero_si512();
29328    _mm512_cmpneq_epi32_mask(and, zero)
29329}
29330
29331/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29332///
29333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29334#[inline]
29335#[target_feature(enable = "avx512f")]
29336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29337#[cfg_attr(test, assert_instr(vptestmd))]
29338pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29339    let and = _mm512_and_epi32(a, b);
29340    let zero = _mm512_setzero_si512();
29341    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
29342}
29343
29344/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29345///
29346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29347#[inline]
29348#[target_feature(enable = "avx512f,avx512vl")]
29349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29350#[cfg_attr(test, assert_instr(vptestmd))]
29351pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29352    let and = _mm256_and_si256(a, b);
29353    let zero = _mm256_setzero_si256();
29354    _mm256_cmpneq_epi32_mask(and, zero)
29355}
29356
29357/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29358///
29359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29360#[inline]
29361#[target_feature(enable = "avx512f,avx512vl")]
29362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29363#[cfg_attr(test, assert_instr(vptestmd))]
29364pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29365    let and = _mm256_and_si256(a, b);
29366    let zero = _mm256_setzero_si256();
29367    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
29368}
29369
29370/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29371///
29372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29373#[inline]
29374#[target_feature(enable = "avx512f,avx512vl")]
29375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29376#[cfg_attr(test, assert_instr(vptestmd))]
29377pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29378    let and = _mm_and_si128(a, b);
29379    let zero = _mm_setzero_si128();
29380    _mm_cmpneq_epi32_mask(and, zero)
29381}
29382
29383/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29384///
29385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29386#[inline]
29387#[target_feature(enable = "avx512f,avx512vl")]
29388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29389#[cfg_attr(test, assert_instr(vptestmd))]
29390pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29391    let and = _mm_and_si128(a, b);
29392    let zero = _mm_setzero_si128();
29393    _mm_mask_cmpneq_epi32_mask(k, and, zero)
29394}
29395
29396/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29397///
29398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29399#[inline]
29400#[target_feature(enable = "avx512f")]
29401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29402#[cfg_attr(test, assert_instr(vptestmq))]
29403pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29404    let and = _mm512_and_epi64(a, b);
29405    let zero = _mm512_setzero_si512();
29406    _mm512_cmpneq_epi64_mask(and, zero)
29407}
29408
29409/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29410///
29411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29412#[inline]
29413#[target_feature(enable = "avx512f")]
29414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29415#[cfg_attr(test, assert_instr(vptestmq))]
29416pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29417    let and = _mm512_and_epi64(a, b);
29418    let zero = _mm512_setzero_si512();
29419    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
29420}
29421
29422/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29423///
29424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29425#[inline]
29426#[target_feature(enable = "avx512f,avx512vl")]
29427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29428#[cfg_attr(test, assert_instr(vptestmq))]
29429pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29430    let and = _mm256_and_si256(a, b);
29431    let zero = _mm256_setzero_si256();
29432    _mm256_cmpneq_epi64_mask(and, zero)
29433}
29434
29435/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29436///
29437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29438#[inline]
29439#[target_feature(enable = "avx512f,avx512vl")]
29440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29441#[cfg_attr(test, assert_instr(vptestmq))]
29442pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29443    let and = _mm256_and_si256(a, b);
29444    let zero = _mm256_setzero_si256();
29445    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
29446}
29447
29448/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29449///
29450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29451#[inline]
29452#[target_feature(enable = "avx512f,avx512vl")]
29453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29454#[cfg_attr(test, assert_instr(vptestmq))]
29455pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29456    let and = _mm_and_si128(a, b);
29457    let zero = _mm_setzero_si128();
29458    _mm_cmpneq_epi64_mask(and, zero)
29459}
29460
29461/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29462///
29463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29464#[inline]
29465#[target_feature(enable = "avx512f,avx512vl")]
29466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29467#[cfg_attr(test, assert_instr(vptestmq))]
29468pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29469    let and = _mm_and_si128(a, b);
29470    let zero = _mm_setzero_si128();
29471    _mm_mask_cmpneq_epi64_mask(k, and, zero)
29472}
29473
29474/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29475///
29476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29477#[inline]
29478#[target_feature(enable = "avx512f")]
29479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29480#[cfg_attr(test, assert_instr(vptestnmd))]
29481pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29482    let and = _mm512_and_epi32(a, b);
29483    let zero = _mm512_setzero_si512();
29484    _mm512_cmpeq_epi32_mask(and, zero)
29485}
29486
29487/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29488///
29489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29490#[inline]
29491#[target_feature(enable = "avx512f")]
29492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29493#[cfg_attr(test, assert_instr(vptestnmd))]
29494pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29495    let and = _mm512_and_epi32(a, b);
29496    let zero = _mm512_setzero_si512();
29497    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
29498}
29499
29500/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29501///
29502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29503#[inline]
29504#[target_feature(enable = "avx512f,avx512vl")]
29505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29506#[cfg_attr(test, assert_instr(vptestnmd))]
29507pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29508    let and = _mm256_and_si256(a, b);
29509    let zero = _mm256_setzero_si256();
29510    _mm256_cmpeq_epi32_mask(and, zero)
29511}
29512
29513/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29514///
29515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29516#[inline]
29517#[target_feature(enable = "avx512f,avx512vl")]
29518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29519#[cfg_attr(test, assert_instr(vptestnmd))]
29520pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29521    let and = _mm256_and_si256(a, b);
29522    let zero = _mm256_setzero_si256();
29523    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
29524}
29525
29526/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29527///
29528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29529#[inline]
29530#[target_feature(enable = "avx512f,avx512vl")]
29531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29532#[cfg_attr(test, assert_instr(vptestnmd))]
29533pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29534    let and = _mm_and_si128(a, b);
29535    let zero = _mm_setzero_si128();
29536    _mm_cmpeq_epi32_mask(and, zero)
29537}
29538
29539/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29540///
29541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29542#[inline]
29543#[target_feature(enable = "avx512f,avx512vl")]
29544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29545#[cfg_attr(test, assert_instr(vptestnmd))]
29546pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29547    let and = _mm_and_si128(a, b);
29548    let zero = _mm_setzero_si128();
29549    _mm_mask_cmpeq_epi32_mask(k, and, zero)
29550}
29551
29552/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29553///
29554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29555#[inline]
29556#[target_feature(enable = "avx512f")]
29557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29558#[cfg_attr(test, assert_instr(vptestnmq))]
29559pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29560    let and = _mm512_and_epi64(a, b);
29561    let zero = _mm512_setzero_si512();
29562    _mm512_cmpeq_epi64_mask(and, zero)
29563}
29564
29565/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29566///
29567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29568#[inline]
29569#[target_feature(enable = "avx512f")]
29570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29571#[cfg_attr(test, assert_instr(vptestnmq))]
29572pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29573    let and = _mm512_and_epi64(a, b);
29574    let zero = _mm512_setzero_si512();
29575    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
29576}
29577
29578/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29579///
29580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29581#[inline]
29582#[target_feature(enable = "avx512f,avx512vl")]
29583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29584#[cfg_attr(test, assert_instr(vptestnmq))]
29585pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29586    let and = _mm256_and_si256(a, b);
29587    let zero = _mm256_setzero_si256();
29588    _mm256_cmpeq_epi64_mask(and, zero)
29589}
29590
29591/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29592///
29593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29594#[inline]
29595#[target_feature(enable = "avx512f,avx512vl")]
29596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29597#[cfg_attr(test, assert_instr(vptestnmq))]
29598pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29599    let and = _mm256_and_si256(a, b);
29600    let zero = _mm256_setzero_si256();
29601    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
29602}
29603
29604/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29605///
29606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29607#[inline]
29608#[target_feature(enable = "avx512f,avx512vl")]
29609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29610#[cfg_attr(test, assert_instr(vptestnmq))]
29611pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29612    let and = _mm_and_si128(a, b);
29613    let zero = _mm_setzero_si128();
29614    _mm_cmpeq_epi64_mask(and, zero)
29615}
29616
29617/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29618///
29619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29620#[inline]
29621#[target_feature(enable = "avx512f,avx512vl")]
29622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29623#[cfg_attr(test, assert_instr(vptestnmq))]
29624pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29625    let and = _mm_and_si128(a, b);
29626    let zero = _mm_setzero_si128();
29627    _mm_mask_cmpeq_epi64_mask(k, and, zero)
29628}
29629
29630/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29631///
29632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29633///
29634/// # Safety of non-temporal stores
29635///
29636/// After using this intrinsic, but before any other access to the memory that this intrinsic
29637/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29638/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29639/// return.
29640///
29641/// See [`_mm_sfence`] for details.
29642#[inline]
29643#[target_feature(enable = "avx512f")]
29644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29645#[cfg_attr(test, assert_instr(vmovntps))]
29646#[allow(clippy::cast_ptr_alignment)]
29647pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29648    crate::arch::asm!(
29649        vps!("vmovntps", ",{a}"),
29650        p = in(reg) mem_addr,
29651        a = in(zmm_reg) a,
29652        options(nostack, preserves_flags),
29653    );
29654}
29655
29656/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29657///
29658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29659///
29660/// # Safety of non-temporal stores
29661///
29662/// After using this intrinsic, but before any other access to the memory that this intrinsic
29663/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29664/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29665/// return.
29666///
29667/// See [`_mm_sfence`] for details.
29668#[inline]
29669#[target_feature(enable = "avx512f")]
29670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29671#[cfg_attr(test, assert_instr(vmovntpd))]
29672#[allow(clippy::cast_ptr_alignment)]
29673pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29674    crate::arch::asm!(
29675        vps!("vmovntpd", ",{a}"),
29676        p = in(reg) mem_addr,
29677        a = in(zmm_reg) a,
29678        options(nostack, preserves_flags),
29679    );
29680}
29681
29682/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29683///
29684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29685///
29686/// # Safety of non-temporal stores
29687///
29688/// After using this intrinsic, but before any other access to the memory that this intrinsic
29689/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29690/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29691/// return.
29692///
29693/// See [`_mm_sfence`] for details.
29694#[inline]
29695#[target_feature(enable = "avx512f")]
29696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29697#[cfg_attr(test, assert_instr(vmovntdq))]
29698#[allow(clippy::cast_ptr_alignment)]
29699pub unsafe fn _mm512_stream_si512(mem_addr: *mut i32, a: __m512i) {
29700    crate::arch::asm!(
29701        vps!("vmovntdq", ",{a}"),
29702        p = in(reg) mem_addr,
29703        a = in(zmm_reg) a,
29704        options(nostack, preserves_flags),
29705    );
29706}
29707
29708/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29709/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29710/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29711///
29712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29713#[inline]
29714#[target_feature(enable = "avx512f")]
29715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29716pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29717    let dst: __m512i;
29718    crate::arch::asm!(
29719        vpl!("vmovntdqa {a}"),
29720        a = out(zmm_reg) dst,
29721        p = in(reg) mem_addr,
29722        options(pure, readonly, nostack, preserves_flags),
29723    );
29724    dst
29725}
29726
29727/// Sets packed 32-bit integers in `dst` with the supplied values.
29728///
29729/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29730#[inline]
29731#[target_feature(enable = "avx512f")]
29732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29733pub fn _mm512_set_ps(
29734    e0: f32,
29735    e1: f32,
29736    e2: f32,
29737    e3: f32,
29738    e4: f32,
29739    e5: f32,
29740    e6: f32,
29741    e7: f32,
29742    e8: f32,
29743    e9: f32,
29744    e10: f32,
29745    e11: f32,
29746    e12: f32,
29747    e13: f32,
29748    e14: f32,
29749    e15: f32,
29750) -> __m512 {
29751    _mm512_setr_ps(
29752        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
29753    )
29754}
29755
29756/// Sets packed 32-bit integers in `dst` with the supplied values in
29757/// reverse order.
29758///
29759/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29760#[inline]
29761#[target_feature(enable = "avx512f")]
29762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29763pub fn _mm512_setr_ps(
29764    e0: f32,
29765    e1: f32,
29766    e2: f32,
29767    e3: f32,
29768    e4: f32,
29769    e5: f32,
29770    e6: f32,
29771    e7: f32,
29772    e8: f32,
29773    e9: f32,
29774    e10: f32,
29775    e11: f32,
29776    e12: f32,
29777    e13: f32,
29778    e14: f32,
29779    e15: f32,
29780) -> __m512 {
29781    unsafe {
29782        let r = f32x16::new(
29783            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29784        );
29785        transmute(r)
29786    }
29787}
29788
29789/// Broadcast 64-bit float `a` to all elements of `dst`.
29790///
29791/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29792#[inline]
29793#[target_feature(enable = "avx512f")]
29794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29795pub fn _mm512_set1_pd(a: f64) -> __m512d {
29796    unsafe { transmute(f64x8::splat(a)) }
29797}
29798
29799/// Broadcast 32-bit float `a` to all elements of `dst`.
29800///
29801/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29802#[inline]
29803#[target_feature(enable = "avx512f")]
29804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29805pub fn _mm512_set1_ps(a: f32) -> __m512 {
29806    unsafe { transmute(f32x16::splat(a)) }
29807}
29808
29809/// Sets packed 32-bit integers in `dst` with the supplied values.
29810///
29811/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29812#[inline]
29813#[target_feature(enable = "avx512f")]
29814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29815pub fn _mm512_set_epi32(
29816    e15: i32,
29817    e14: i32,
29818    e13: i32,
29819    e12: i32,
29820    e11: i32,
29821    e10: i32,
29822    e9: i32,
29823    e8: i32,
29824    e7: i32,
29825    e6: i32,
29826    e5: i32,
29827    e4: i32,
29828    e3: i32,
29829    e2: i32,
29830    e1: i32,
29831    e0: i32,
29832) -> __m512i {
29833    _mm512_setr_epi32(
29834        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29835    )
29836}
29837
29838/// Broadcast 8-bit integer a to all elements of dst.
29839///
29840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29841#[inline]
29842#[target_feature(enable = "avx512f")]
29843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29844pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29845    unsafe { transmute(i8x64::splat(a)) }
29846}
29847
29848/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29849///
29850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29851#[inline]
29852#[target_feature(enable = "avx512f")]
29853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29854pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29855    unsafe { transmute(i16x32::splat(a)) }
29856}
29857
29858/// Broadcast 32-bit integer `a` to all elements of `dst`.
29859///
29860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29861#[inline]
29862#[target_feature(enable = "avx512f")]
29863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29864pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29865    unsafe { transmute(i32x16::splat(a)) }
29866}
29867
29868/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29869///
29870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29871#[inline]
29872#[target_feature(enable = "avx512f")]
29873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29874#[cfg_attr(test, assert_instr(vpbroadcastd))]
29875pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29876    unsafe {
29877        let r = _mm512_set1_epi32(a).as_i32x16();
29878        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
29879    }
29880}
29881
29882/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29883///
29884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29885#[inline]
29886#[target_feature(enable = "avx512f")]
29887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29888#[cfg_attr(test, assert_instr(vpbroadcastd))]
29889pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29890    unsafe {
29891        let r = _mm512_set1_epi32(a).as_i32x16();
29892        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
29893    }
29894}
29895
29896/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29897///
29898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29899#[inline]
29900#[target_feature(enable = "avx512f,avx512vl")]
29901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29902#[cfg_attr(test, assert_instr(vpbroadcastd))]
29903pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29904    unsafe {
29905        let r = _mm256_set1_epi32(a).as_i32x8();
29906        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
29907    }
29908}
29909
29910/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29911///
29912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29913#[inline]
29914#[target_feature(enable = "avx512f,avx512vl")]
29915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29916#[cfg_attr(test, assert_instr(vpbroadcastd))]
29917pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29918    unsafe {
29919        let r = _mm256_set1_epi32(a).as_i32x8();
29920        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
29921    }
29922}
29923
29924/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29925///
29926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29927#[inline]
29928#[target_feature(enable = "avx512f,avx512vl")]
29929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29930#[cfg_attr(test, assert_instr(vpbroadcastd))]
29931pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29932    unsafe {
29933        let r = _mm_set1_epi32(a).as_i32x4();
29934        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
29935    }
29936}
29937
29938/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29939///
29940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29941#[inline]
29942#[target_feature(enable = "avx512f,avx512vl")]
29943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29944#[cfg_attr(test, assert_instr(vpbroadcastd))]
29945pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29946    unsafe {
29947        let r = _mm_set1_epi32(a).as_i32x4();
29948        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
29949    }
29950}
29951
29952/// Broadcast 64-bit integer `a` to all elements of `dst`.
29953///
29954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29955#[inline]
29956#[target_feature(enable = "avx512f")]
29957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29958pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29959    unsafe { transmute(i64x8::splat(a)) }
29960}
29961
29962/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29963///
29964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29965#[inline]
29966#[target_feature(enable = "avx512f")]
29967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29968#[cfg_attr(test, assert_instr(vpbroadcastq))]
29969pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29970    unsafe {
29971        let r = _mm512_set1_epi64(a).as_i64x8();
29972        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
29973    }
29974}
29975
29976/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29977///
29978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29979#[inline]
29980#[target_feature(enable = "avx512f")]
29981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29982#[cfg_attr(test, assert_instr(vpbroadcastq))]
29983pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
29984    unsafe {
29985        let r = _mm512_set1_epi64(a).as_i64x8();
29986        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
29987    }
29988}
29989
29990/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29991///
29992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
29993#[inline]
29994#[target_feature(enable = "avx512f,avx512vl")]
29995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29996#[cfg_attr(test, assert_instr(vpbroadcastq))]
29997pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
29998    unsafe {
29999        let r = _mm256_set1_epi64x(a).as_i64x4();
30000        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
30001    }
30002}
30003
30004/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30005///
30006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
30007#[inline]
30008#[target_feature(enable = "avx512f,avx512vl")]
30009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30010#[cfg_attr(test, assert_instr(vpbroadcastq))]
30011pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
30012    unsafe {
30013        let r = _mm256_set1_epi64x(a).as_i64x4();
30014        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
30015    }
30016}
30017
30018/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30019///
30020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
30021#[inline]
30022#[target_feature(enable = "avx512f,avx512vl")]
30023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30024#[cfg_attr(test, assert_instr(vpbroadcastq))]
30025pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
30026    unsafe {
30027        let r = _mm_set1_epi64x(a).as_i64x2();
30028        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
30029    }
30030}
30031
30032/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30033///
30034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
30035#[inline]
30036#[target_feature(enable = "avx512f,avx512vl")]
30037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30038#[cfg_attr(test, assert_instr(vpbroadcastq))]
30039pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30040    unsafe {
30041        let r = _mm_set1_epi64x(a).as_i64x2();
30042        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
30043    }
30044}
30045
30046/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30047///
30048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30049#[inline]
30050#[target_feature(enable = "avx512f")]
30051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30052pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30053    _mm512_set_epi64(d, c, b, a, d, c, b, a)
30054}
30055
30056/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30057///
30058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30059#[inline]
30060#[target_feature(enable = "avx512f")]
30061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30062pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30063    _mm512_set_epi64(a, b, c, d, a, b, c, d)
30064}
30065
30066/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30067///
30068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30069#[inline]
30070#[target_feature(enable = "avx512f")]
30071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30072#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30073pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30074    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30075}
30076
30077/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30078///
30079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30080#[inline]
30081#[target_feature(enable = "avx512f")]
30082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30083#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30084pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30085    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30086}
30087
30088/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30089///
30090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30091#[inline]
30092#[target_feature(enable = "avx512f")]
30093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30094#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30095pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30096    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30097}
30098
30099/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30100///
30101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30102#[inline]
30103#[target_feature(enable = "avx512f")]
30104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30105#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30106pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30107    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30108}
30109
30110/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30111///
30112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30113#[inline]
30114#[target_feature(enable = "avx512f")]
30115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30116#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30117pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30118    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30119}
30120
30121/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30122///
30123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30124#[inline]
30125#[target_feature(enable = "avx512f")]
30126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30127#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30128pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30129    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30130}
30131
30132/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30133///
30134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30135#[inline]
30136#[target_feature(enable = "avx512f")]
30137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30138#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30139pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30140    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30141}
30142
30143/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30144///
30145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30146#[inline]
30147#[target_feature(enable = "avx512f")]
30148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30149#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30150pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30151    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30152}
30153
30154/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30155///
30156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30157#[inline]
30158#[target_feature(enable = "avx512f")]
30159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30160#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30161pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30162    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30163}
30164
30165/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30166///
30167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30168#[inline]
30169#[target_feature(enable = "avx512f")]
30170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30171#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30172pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30173    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30174}
30175
30176/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30177///
30178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30179#[inline]
30180#[target_feature(enable = "avx512f")]
30181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30182#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30183pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30184    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30185}
30186
30187/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30188///
30189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30190#[inline]
30191#[target_feature(enable = "avx512f")]
30192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30193#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30194pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30195    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30196}
30197
30198/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30199///
30200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30201#[inline]
30202#[target_feature(enable = "avx512f")]
30203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30204#[rustc_legacy_const_generics(2)]
30205#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30206pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30207    unsafe {
30208        static_assert_uimm_bits!(IMM8, 5);
30209        let neg_one = -1;
30210        let a = a.as_f32x16();
30211        let b = b.as_f32x16();
30212        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30213        r.cast_unsigned()
30214    }
30215}
30216
30217/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30218///
30219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30220#[inline]
30221#[target_feature(enable = "avx512f")]
30222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30223#[rustc_legacy_const_generics(3)]
30224#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30225pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30226    unsafe {
30227        static_assert_uimm_bits!(IMM8, 5);
30228        let a = a.as_f32x16();
30229        let b = b.as_f32x16();
30230        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
30231        r.cast_unsigned()
30232    }
30233}
30234
30235/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30236///
30237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30238#[inline]
30239#[target_feature(enable = "avx512f,avx512vl")]
30240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30241#[rustc_legacy_const_generics(2)]
30242#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30243pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30244    unsafe {
30245        static_assert_uimm_bits!(IMM8, 5);
30246        let neg_one = -1;
30247        let a = a.as_f32x8();
30248        let b = b.as_f32x8();
30249        let r = vcmpps256(a, b, IMM8, neg_one);
30250        r.cast_unsigned()
30251    }
30252}
30253
30254/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30255///
30256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30257#[inline]
30258#[target_feature(enable = "avx512f,avx512vl")]
30259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30260#[rustc_legacy_const_generics(3)]
30261#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30262pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30263    unsafe {
30264        static_assert_uimm_bits!(IMM8, 5);
30265        let a = a.as_f32x8();
30266        let b = b.as_f32x8();
30267        let r = vcmpps256(a, b, IMM8, k1 as i8);
30268        r.cast_unsigned()
30269    }
30270}
30271
30272/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30273///
30274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30275#[inline]
30276#[target_feature(enable = "avx512f,avx512vl")]
30277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30278#[rustc_legacy_const_generics(2)]
30279#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30280pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30281    unsafe {
30282        static_assert_uimm_bits!(IMM8, 5);
30283        let neg_one = -1;
30284        let a = a.as_f32x4();
30285        let b = b.as_f32x4();
30286        let r = vcmpps128(a, b, IMM8, neg_one);
30287        r.cast_unsigned()
30288    }
30289}
30290
30291/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30292///
30293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30294#[inline]
30295#[target_feature(enable = "avx512f,avx512vl")]
30296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30297#[rustc_legacy_const_generics(3)]
30298#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30299pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30300    unsafe {
30301        static_assert_uimm_bits!(IMM8, 5);
30302        let a = a.as_f32x4();
30303        let b = b.as_f32x4();
30304        let r = vcmpps128(a, b, IMM8, k1 as i8);
30305        r.cast_unsigned()
30306    }
30307}
30308
30309/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30310/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30311///
30312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30313#[inline]
30314#[target_feature(enable = "avx512f")]
30315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30316#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30317#[rustc_legacy_const_generics(2, 3)]
30318pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30319    a: __m512,
30320    b: __m512,
30321) -> __mmask16 {
30322    unsafe {
30323        static_assert_uimm_bits!(IMM5, 5);
30324        static_assert_mantissas_sae!(SAE);
30325        let neg_one = -1;
30326        let a = a.as_f32x16();
30327        let b = b.as_f32x16();
30328        let r = vcmpps(a, b, IMM5, neg_one, SAE);
30329        r.cast_unsigned()
30330    }
30331}
30332
30333/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30334/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30335///
30336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30337#[inline]
30338#[target_feature(enable = "avx512f")]
30339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30340#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30341#[rustc_legacy_const_generics(3, 4)]
30342pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30343    m: __mmask16,
30344    a: __m512,
30345    b: __m512,
30346) -> __mmask16 {
30347    unsafe {
30348        static_assert_uimm_bits!(IMM5, 5);
30349        static_assert_mantissas_sae!(SAE);
30350        let a = a.as_f32x16();
30351        let b = b.as_f32x16();
30352        let r = vcmpps(a, b, IMM5, m as i16, SAE);
30353        r.cast_unsigned()
30354    }
30355}
30356
30357/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30358///
30359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30360#[inline]
30361#[target_feature(enable = "avx512f")]
30362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30363#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30364pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30365    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30366}
30367
30368/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30369///
30370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30371#[inline]
30372#[target_feature(enable = "avx512f")]
30373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30374#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30375pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30376    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30377}
30378
30379/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30380///
30381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30382#[inline]
30383#[target_feature(enable = "avx512f")]
30384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30385#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30386pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30387    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30388}
30389
30390/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30391///
30392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30393#[inline]
30394#[target_feature(enable = "avx512f")]
30395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30396#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30397pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30398    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30399}
30400
30401/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30402///
30403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30404#[inline]
30405#[target_feature(enable = "avx512f")]
30406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30407#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30408pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30409    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30410}
30411
30412/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30413///
30414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30415#[inline]
30416#[target_feature(enable = "avx512f")]
30417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30418#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30419pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30420    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30421}
30422
30423/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30424///
30425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30426#[inline]
30427#[target_feature(enable = "avx512f")]
30428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30429#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30430pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30431    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30432}
30433
30434/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30435///
30436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30437#[inline]
30438#[target_feature(enable = "avx512f")]
30439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30440#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30441pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30442    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
30443}
30444
30445/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30446///
30447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30448#[inline]
30449#[target_feature(enable = "avx512f")]
30450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30451#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30452pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30453    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30454}
30455
30456/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30457///
30458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30459#[inline]
30460#[target_feature(enable = "avx512f")]
30461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30462#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30463pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30464    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30465}
30466
30467/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30468///
30469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30470#[inline]
30471#[target_feature(enable = "avx512f")]
30472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30473#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30474pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30475    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30476}
30477
30478/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30479///
30480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30481#[inline]
30482#[target_feature(enable = "avx512f")]
30483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30484#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30485pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30486    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30487}
30488
30489/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30490///
30491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30492#[inline]
30493#[target_feature(enable = "avx512f")]
30494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30495#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30496pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30497    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30498}
30499
30500/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30501///
30502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30503#[inline]
30504#[target_feature(enable = "avx512f")]
30505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30506#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30507pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30508    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30509}
30510
30511/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30512///
30513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30514#[inline]
30515#[target_feature(enable = "avx512f")]
30516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30517#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30518pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30519    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30520}
30521
30522/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30523///
30524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30525#[inline]
30526#[target_feature(enable = "avx512f")]
30527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30528#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30529pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30530    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30531}
30532
30533/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30534///
30535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30536#[inline]
30537#[target_feature(enable = "avx512f")]
30538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30539#[rustc_legacy_const_generics(2)]
30540#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30541pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30542    unsafe {
30543        static_assert_uimm_bits!(IMM8, 5);
30544        let neg_one = -1;
30545        let a = a.as_f64x8();
30546        let b = b.as_f64x8();
30547        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30548        r.cast_unsigned()
30549    }
30550}
30551
30552/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30553///
30554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30555#[inline]
30556#[target_feature(enable = "avx512f")]
30557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30558#[rustc_legacy_const_generics(3)]
30559#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30560pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30561    unsafe {
30562        static_assert_uimm_bits!(IMM8, 5);
30563        let a = a.as_f64x8();
30564        let b = b.as_f64x8();
30565        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30566        r.cast_unsigned()
30567    }
30568}
30569
30570/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30571///
30572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30573#[inline]
30574#[target_feature(enable = "avx512f,avx512vl")]
30575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30576#[rustc_legacy_const_generics(2)]
30577#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30578pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30579    unsafe {
30580        static_assert_uimm_bits!(IMM8, 5);
30581        let neg_one = -1;
30582        let a = a.as_f64x4();
30583        let b = b.as_f64x4();
30584        let r = vcmppd256(a, b, IMM8, neg_one);
30585        r.cast_unsigned()
30586    }
30587}
30588
30589/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30590///
30591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30592#[inline]
30593#[target_feature(enable = "avx512f,avx512vl")]
30594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30595#[rustc_legacy_const_generics(3)]
30596#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30597pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30598    unsafe {
30599        static_assert_uimm_bits!(IMM8, 5);
30600        let a = a.as_f64x4();
30601        let b = b.as_f64x4();
30602        let r = vcmppd256(a, b, IMM8, k1 as i8);
30603        r.cast_unsigned()
30604    }
30605}
30606
30607/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30608///
30609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30610#[inline]
30611#[target_feature(enable = "avx512f,avx512vl")]
30612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30613#[rustc_legacy_const_generics(2)]
30614#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30615pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30616    unsafe {
30617        static_assert_uimm_bits!(IMM8, 5);
30618        let neg_one = -1;
30619        let a = a.as_f64x2();
30620        let b = b.as_f64x2();
30621        let r = vcmppd128(a, b, IMM8, neg_one);
30622        r.cast_unsigned()
30623    }
30624}
30625
30626/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30627///
30628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30629#[inline]
30630#[target_feature(enable = "avx512f,avx512vl")]
30631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30632#[rustc_legacy_const_generics(3)]
30633#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30634pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30635    unsafe {
30636        static_assert_uimm_bits!(IMM8, 5);
30637        let a = a.as_f64x2();
30638        let b = b.as_f64x2();
30639        let r = vcmppd128(a, b, IMM8, k1 as i8);
30640        r.cast_unsigned()
30641    }
30642}
30643
30644/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30645/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30646///
30647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30648#[inline]
30649#[target_feature(enable = "avx512f")]
30650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30651#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30652#[rustc_legacy_const_generics(2, 3)]
30653pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30654    a: __m512d,
30655    b: __m512d,
30656) -> __mmask8 {
30657    unsafe {
30658        static_assert_uimm_bits!(IMM5, 5);
30659        static_assert_mantissas_sae!(SAE);
30660        let neg_one = -1;
30661        let a = a.as_f64x8();
30662        let b = b.as_f64x8();
30663        let r = vcmppd(a, b, IMM5, neg_one, SAE);
30664        r.cast_unsigned()
30665    }
30666}
30667
30668/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30669/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30670///
30671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30672#[inline]
30673#[target_feature(enable = "avx512f")]
30674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30675#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30676#[rustc_legacy_const_generics(3, 4)]
30677pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30678    k1: __mmask8,
30679    a: __m512d,
30680    b: __m512d,
30681) -> __mmask8 {
30682    unsafe {
30683        static_assert_uimm_bits!(IMM5, 5);
30684        static_assert_mantissas_sae!(SAE);
30685        let a = a.as_f64x8();
30686        let b = b.as_f64x8();
30687        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
30688        r.cast_unsigned()
30689    }
30690}
30691
30692/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30693///
30694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30695#[inline]
30696#[target_feature(enable = "avx512f")]
30697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30698#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30699pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30700    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30701}
30702
30703/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30704///
30705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30706#[inline]
30707#[target_feature(enable = "avx512f")]
30708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30709#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30710pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30711    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30712}
30713
30714/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30715///
30716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30717#[inline]
30718#[target_feature(enable = "avx512f")]
30719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30720#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30721pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30722    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30723}
30724
30725/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30726///
30727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30728#[inline]
30729#[target_feature(enable = "avx512f")]
30730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30731#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30732pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30733    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30734}
30735
30736/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30737///
30738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30739#[inline]
30740#[target_feature(enable = "avx512f")]
30741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30742#[rustc_legacy_const_generics(2)]
30743#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30744pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30745    unsafe {
30746        static_assert_uimm_bits!(IMM8, 5);
30747        let neg_one = -1;
30748        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30749        r.cast_unsigned()
30750    }
30751}
30752
30753/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30754///
30755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30756#[inline]
30757#[target_feature(enable = "avx512f")]
30758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30759#[rustc_legacy_const_generics(3)]
30760#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30761pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30762    unsafe {
30763        static_assert_uimm_bits!(IMM8, 5);
30764        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30765        r.cast_unsigned()
30766    }
30767}
30768
30769/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30771///
30772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30773#[inline]
30774#[target_feature(enable = "avx512f")]
30775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30776#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30777#[rustc_legacy_const_generics(2, 3)]
30778pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30779    unsafe {
30780        static_assert_uimm_bits!(IMM5, 5);
30781        static_assert_mantissas_sae!(SAE);
30782        let neg_one = -1;
30783        let r = vcmpss(a, b, IMM5, neg_one, SAE);
30784        r.cast_unsigned()
30785    }
30786}
30787
30788/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30789/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30790///
30791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30792#[inline]
30793#[target_feature(enable = "avx512f")]
30794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30795#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30796#[rustc_legacy_const_generics(3, 4)]
30797pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30798    k1: __mmask8,
30799    a: __m128,
30800    b: __m128,
30801) -> __mmask8 {
30802    unsafe {
30803        static_assert_uimm_bits!(IMM5, 5);
30804        static_assert_mantissas_sae!(SAE);
30805        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
30806        r.cast_unsigned()
30807    }
30808}
30809
30810/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30811///
30812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30813#[inline]
30814#[target_feature(enable = "avx512f")]
30815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30816#[rustc_legacy_const_generics(2)]
30817#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30818pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30819    unsafe {
30820        static_assert_uimm_bits!(IMM8, 5);
30821        let neg_one = -1;
30822        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30823        r.cast_unsigned()
30824    }
30825}
30826
30827/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30828///
30829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30830#[inline]
30831#[target_feature(enable = "avx512f")]
30832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30833#[rustc_legacy_const_generics(3)]
30834#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30835pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30836    unsafe {
30837        static_assert_uimm_bits!(IMM8, 5);
30838        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30839        r.cast_unsigned()
30840    }
30841}
30842
30843/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30844/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30845///
30846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30847#[inline]
30848#[target_feature(enable = "avx512f")]
30849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30850#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30851#[rustc_legacy_const_generics(2, 3)]
30852pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30853    unsafe {
30854        static_assert_uimm_bits!(IMM5, 5);
30855        static_assert_mantissas_sae!(SAE);
30856        let neg_one = -1;
30857        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
30858        r.cast_unsigned()
30859    }
30860}
30861
30862/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30863/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30864///
30865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30866#[inline]
30867#[target_feature(enable = "avx512f")]
30868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30869#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30870#[rustc_legacy_const_generics(3, 4)]
30871pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30872    k1: __mmask8,
30873    a: __m128d,
30874    b: __m128d,
30875) -> __mmask8 {
30876    unsafe {
30877        static_assert_uimm_bits!(IMM5, 5);
30878        static_assert_mantissas_sae!(SAE);
30879        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
30880        r.cast_unsigned()
30881    }
30882}
30883
30884/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30885///
30886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30887#[inline]
30888#[target_feature(enable = "avx512f")]
30889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30890#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30891pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30892    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
30893}
30894
30895/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30896///
30897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30898#[inline]
30899#[target_feature(enable = "avx512f")]
30900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30901#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30902pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30903    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30904}
30905
30906/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30907///
30908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30909#[inline]
30910#[target_feature(enable = "avx512f,avx512vl")]
30911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30912#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30913pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30914    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
30915}
30916
30917/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30918///
30919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30920#[inline]
30921#[target_feature(enable = "avx512f,avx512vl")]
30922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30923#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30924pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30925    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30926}
30927
30928/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30929///
30930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30931#[inline]
30932#[target_feature(enable = "avx512f,avx512vl")]
30933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30934#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30935pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30936    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
30937}
30938
30939/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30940///
30941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30942#[inline]
30943#[target_feature(enable = "avx512f,avx512vl")]
30944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30945#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30946pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30947    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30948}
30949
30950/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30951///
30952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30953#[inline]
30954#[target_feature(enable = "avx512f")]
30955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30956#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30957pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30958    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
30959}
30960
30961/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30962///
30963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30964#[inline]
30965#[target_feature(enable = "avx512f")]
30966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30967#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30968pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30969    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30970}
30971
30972/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30973///
30974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30975#[inline]
30976#[target_feature(enable = "avx512f,avx512vl")]
30977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30978#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30979pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30980    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
30981}
30982
30983/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30984///
30985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
30986#[inline]
30987#[target_feature(enable = "avx512f,avx512vl")]
30988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30989#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30990pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30991    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30992}
30993
30994/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30995///
30996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
30997#[inline]
30998#[target_feature(enable = "avx512f,avx512vl")]
30999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31000#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31001pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31002    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
31003}
31004
31005/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31006///
31007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
31008#[inline]
31009#[target_feature(enable = "avx512f,avx512vl")]
31010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31011#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31012pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31013    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31014}
31015
31016/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31017///
31018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
31019#[inline]
31020#[target_feature(enable = "avx512f")]
31021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31022#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31023pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31024    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
31025}
31026
31027/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31028///
31029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
31030#[inline]
31031#[target_feature(enable = "avx512f")]
31032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31033#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31034pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31035    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31036}
31037
31038/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31039///
31040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31041#[inline]
31042#[target_feature(enable = "avx512f,avx512vl")]
31043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31044#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31045pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31046    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
31047}
31048
31049/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31050///
31051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31052#[inline]
31053#[target_feature(enable = "avx512f,avx512vl")]
31054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31055#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31056pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31057    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31058}
31059
31060/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31061///
31062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31063#[inline]
31064#[target_feature(enable = "avx512f,avx512vl")]
31065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31066#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31067pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31068    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
31069}
31070
31071/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31072///
31073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31074#[inline]
31075#[target_feature(enable = "avx512f,avx512vl")]
31076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31077#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31078pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31079    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31080}
31081
31082/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31083///
31084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31085#[inline]
31086#[target_feature(enable = "avx512f")]
31087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31088#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31089pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31090    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
31091}
31092
31093/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31094///
31095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31096#[inline]
31097#[target_feature(enable = "avx512f")]
31098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31099#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31100pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31101    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31102}
31103
31104/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31105///
31106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31107#[inline]
31108#[target_feature(enable = "avx512f,avx512vl")]
31109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31110#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31111pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31112    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
31113}
31114
31115/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31116///
31117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31118#[inline]
31119#[target_feature(enable = "avx512f,avx512vl")]
31120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31121#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31122pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31123    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31124}
31125
31126/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31127///
31128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31129#[inline]
31130#[target_feature(enable = "avx512f,avx512vl")]
31131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31132#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31133pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31134    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
31135}
31136
31137/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31138///
31139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31140#[inline]
31141#[target_feature(enable = "avx512f,avx512vl")]
31142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31143#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31144pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31145    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31146}
31147
31148/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31149///
31150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31151#[inline]
31152#[target_feature(enable = "avx512f")]
31153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31154#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31155pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31156    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
31157}
31158
31159/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31160///
31161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31162#[inline]
31163#[target_feature(enable = "avx512f")]
31164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31165#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31166pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31167    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31168}
31169
31170/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31171///
31172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31173#[inline]
31174#[target_feature(enable = "avx512f,avx512vl")]
31175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31176#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31177pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31178    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
31179}
31180
31181/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31182///
31183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31184#[inline]
31185#[target_feature(enable = "avx512f,avx512vl")]
31186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31187#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31188pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31189    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31190}
31191
31192/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31193///
31194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31195#[inline]
31196#[target_feature(enable = "avx512f,avx512vl")]
31197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31198#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31199pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31200    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
31201}
31202
31203/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31204///
31205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31206#[inline]
31207#[target_feature(enable = "avx512f,avx512vl")]
31208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31209#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31210pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31211    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31212}
31213
31214/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31215///
31216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31217#[inline]
31218#[target_feature(enable = "avx512f")]
31219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31220#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31221pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31222    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
31223}
31224
31225/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31226///
31227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31228#[inline]
31229#[target_feature(enable = "avx512f")]
31230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31231#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31232pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31233    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31234}
31235
31236/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31237///
31238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31239#[inline]
31240#[target_feature(enable = "avx512f,avx512vl")]
31241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31242#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31243pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31244    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
31245}
31246
31247/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31248///
31249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31250#[inline]
31251#[target_feature(enable = "avx512f,avx512vl")]
31252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31253#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31254pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31255    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31256}
31257
31258/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31259///
31260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31261#[inline]
31262#[target_feature(enable = "avx512f,avx512vl")]
31263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31264#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31265pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31266    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
31267}
31268
31269/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31270///
31271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31272#[inline]
31273#[target_feature(enable = "avx512f,avx512vl")]
31274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31275#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31276pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31277    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31278}
31279
31280/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31281///
31282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31283#[inline]
31284#[target_feature(enable = "avx512f")]
31285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31286#[rustc_legacy_const_generics(2)]
31287#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31288pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31289    unsafe {
31290        static_assert_uimm_bits!(IMM3, 3);
31291        let a = a.as_u32x16();
31292        let b = b.as_u32x16();
31293        let r = match IMM3 {
31294            0 => simd_eq(a, b),
31295            1 => simd_lt(a, b),
31296            2 => simd_le(a, b),
31297            3 => i32x16::ZERO,
31298            4 => simd_ne(a, b),
31299            5 => simd_ge(a, b),
31300            6 => simd_gt(a, b),
31301            _ => i32x16::splat(-1),
31302        };
31303        simd_bitmask(r)
31304    }
31305}
31306
31307/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31308///
31309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31310#[inline]
31311#[target_feature(enable = "avx512f")]
31312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31313#[rustc_legacy_const_generics(3)]
31314#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31315pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31316    k1: __mmask16,
31317    a: __m512i,
31318    b: __m512i,
31319) -> __mmask16 {
31320    unsafe {
31321        static_assert_uimm_bits!(IMM3, 3);
31322        let a = a.as_u32x16();
31323        let b = b.as_u32x16();
31324        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31325        let r = match IMM3 {
31326            0 => simd_and(k1, simd_eq(a, b)),
31327            1 => simd_and(k1, simd_lt(a, b)),
31328            2 => simd_and(k1, simd_le(a, b)),
31329            3 => i32x16::ZERO,
31330            4 => simd_and(k1, simd_ne(a, b)),
31331            5 => simd_and(k1, simd_ge(a, b)),
31332            6 => simd_and(k1, simd_gt(a, b)),
31333            _ => k1,
31334        };
31335        simd_bitmask(r)
31336    }
31337}
31338
31339/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31340///
31341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31342#[inline]
31343#[target_feature(enable = "avx512f,avx512vl")]
31344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31345#[rustc_legacy_const_generics(2)]
31346#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31347pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31348    unsafe {
31349        static_assert_uimm_bits!(IMM3, 3);
31350        let a = a.as_u32x8();
31351        let b = b.as_u32x8();
31352        let r = match IMM3 {
31353            0 => simd_eq(a, b),
31354            1 => simd_lt(a, b),
31355            2 => simd_le(a, b),
31356            3 => i32x8::ZERO,
31357            4 => simd_ne(a, b),
31358            5 => simd_ge(a, b),
31359            6 => simd_gt(a, b),
31360            _ => i32x8::splat(-1),
31361        };
31362        simd_bitmask(r)
31363    }
31364}
31365
31366/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31367///
31368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31369#[inline]
31370#[target_feature(enable = "avx512f,avx512vl")]
31371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31372#[rustc_legacy_const_generics(3)]
31373#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31374pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31375    k1: __mmask8,
31376    a: __m256i,
31377    b: __m256i,
31378) -> __mmask8 {
31379    unsafe {
31380        static_assert_uimm_bits!(IMM3, 3);
31381        let a = a.as_u32x8();
31382        let b = b.as_u32x8();
31383        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31384        let r = match IMM3 {
31385            0 => simd_and(k1, simd_eq(a, b)),
31386            1 => simd_and(k1, simd_lt(a, b)),
31387            2 => simd_and(k1, simd_le(a, b)),
31388            3 => i32x8::ZERO,
31389            4 => simd_and(k1, simd_ne(a, b)),
31390            5 => simd_and(k1, simd_ge(a, b)),
31391            6 => simd_and(k1, simd_gt(a, b)),
31392            _ => k1,
31393        };
31394        simd_bitmask(r)
31395    }
31396}
31397
31398/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31399///
31400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31401#[inline]
31402#[target_feature(enable = "avx512f,avx512vl")]
31403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31404#[rustc_legacy_const_generics(2)]
31405#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31406pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31407    unsafe {
31408        static_assert_uimm_bits!(IMM3, 3);
31409        let a = a.as_u32x4();
31410        let b = b.as_u32x4();
31411        let r = match IMM3 {
31412            0 => simd_eq(a, b),
31413            1 => simd_lt(a, b),
31414            2 => simd_le(a, b),
31415            3 => i32x4::ZERO,
31416            4 => simd_ne(a, b),
31417            5 => simd_ge(a, b),
31418            6 => simd_gt(a, b),
31419            _ => i32x4::splat(-1),
31420        };
31421        simd_bitmask(r)
31422    }
31423}
31424
31425/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31426///
31427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31428#[inline]
31429#[target_feature(enable = "avx512f,avx512vl")]
31430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31431#[rustc_legacy_const_generics(3)]
31432#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31433pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31434    k1: __mmask8,
31435    a: __m128i,
31436    b: __m128i,
31437) -> __mmask8 {
31438    unsafe {
31439        static_assert_uimm_bits!(IMM3, 3);
31440        let a = a.as_u32x4();
31441        let b = b.as_u32x4();
31442        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
31443        let r = match IMM3 {
31444            0 => simd_and(k1, simd_eq(a, b)),
31445            1 => simd_and(k1, simd_lt(a, b)),
31446            2 => simd_and(k1, simd_le(a, b)),
31447            3 => i32x4::ZERO,
31448            4 => simd_and(k1, simd_ne(a, b)),
31449            5 => simd_and(k1, simd_ge(a, b)),
31450            6 => simd_and(k1, simd_gt(a, b)),
31451            _ => k1,
31452        };
31453        simd_bitmask(r)
31454    }
31455}
31456
31457/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31458///
31459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31460#[inline]
31461#[target_feature(enable = "avx512f")]
31462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31463#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31464pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31465    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
31466}
31467
31468/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31469///
31470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31471#[inline]
31472#[target_feature(enable = "avx512f")]
31473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31474#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31475pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31476    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31477}
31478
31479/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31480///
31481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31482#[inline]
31483#[target_feature(enable = "avx512f,avx512vl")]
31484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31485#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31486pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31487    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
31488}
31489
31490/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31491///
31492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31493#[inline]
31494#[target_feature(enable = "avx512f,avx512vl")]
31495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31496#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31497pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31498    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31499}
31500
31501/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31502///
31503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31504#[inline]
31505#[target_feature(enable = "avx512f,avx512vl")]
31506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31507#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31508pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31509    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
31510}
31511
31512/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31513///
31514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31515#[inline]
31516#[target_feature(enable = "avx512f,avx512vl")]
31517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31518#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31519pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31520    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31521}
31522
31523/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31524///
31525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31526#[inline]
31527#[target_feature(enable = "avx512f")]
31528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31529#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31530pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31531    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
31532}
31533
31534/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31535///
31536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31537#[inline]
31538#[target_feature(enable = "avx512f")]
31539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31540#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31541pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31542    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31543}
31544
31545/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31546///
31547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31548#[inline]
31549#[target_feature(enable = "avx512f,avx512vl")]
31550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31551#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31552pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31553    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
31554}
31555
31556/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31557///
31558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31559#[inline]
31560#[target_feature(enable = "avx512f,avx512vl")]
31561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31562#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31563pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31564    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31565}
31566
31567/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31568///
31569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31570#[inline]
31571#[target_feature(enable = "avx512f,avx512vl")]
31572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31573#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31574pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31575    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
31576}
31577
31578/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31579///
31580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31581#[inline]
31582#[target_feature(enable = "avx512f,avx512vl")]
31583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31584#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31585pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31586    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31587}
31588
31589/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31590///
31591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31592#[inline]
31593#[target_feature(enable = "avx512f")]
31594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31595#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31596pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31597    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
31598}
31599
31600/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31601///
31602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31603#[inline]
31604#[target_feature(enable = "avx512f")]
31605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31606#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31607pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31608    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31609}
31610
31611/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31612///
31613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31614#[inline]
31615#[target_feature(enable = "avx512f,avx512vl")]
31616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31617#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31618pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31619    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
31620}
31621
31622/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31623///
31624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31625#[inline]
31626#[target_feature(enable = "avx512f,avx512vl")]
31627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31628#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31629pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31630    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31631}
31632
31633/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31634///
31635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31636#[inline]
31637#[target_feature(enable = "avx512f,avx512vl")]
31638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31639#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31640pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31641    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
31642}
31643
31644/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31645///
31646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31647#[inline]
31648#[target_feature(enable = "avx512f,avx512vl")]
31649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31650#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31651pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31652    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31653}
31654
31655/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31656///
31657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31658#[inline]
31659#[target_feature(enable = "avx512f")]
31660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31661#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31662pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31663    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
31664}
31665
31666/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31667///
31668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31669#[inline]
31670#[target_feature(enable = "avx512f")]
31671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31672#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31673pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31674    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31675}
31676
31677/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31678///
31679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31680#[inline]
31681#[target_feature(enable = "avx512f,avx512vl")]
31682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31683#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31684pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31685    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
31686}
31687
31688/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31689///
31690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31691#[inline]
31692#[target_feature(enable = "avx512f,avx512vl")]
31693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31694#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31695pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31696    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31697}
31698
31699/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31700///
31701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31702#[inline]
31703#[target_feature(enable = "avx512f,avx512vl")]
31704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31705#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31706pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31707    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
31708}
31709
31710/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31711///
31712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31713#[inline]
31714#[target_feature(enable = "avx512f,avx512vl")]
31715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31716#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31717pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31718    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31719}
31720
31721/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31722///
31723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31724#[inline]
31725#[target_feature(enable = "avx512f")]
31726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31727#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31728pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31729    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
31730}
31731
31732/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31733///
31734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31735#[inline]
31736#[target_feature(enable = "avx512f")]
31737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31738#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31739pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31740    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31741}
31742
31743/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31744///
31745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31746#[inline]
31747#[target_feature(enable = "avx512f,avx512vl")]
31748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31749#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31750pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31751    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
31752}
31753
31754/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31755///
31756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31757#[inline]
31758#[target_feature(enable = "avx512f,avx512vl")]
31759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31760#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31761pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31762    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31763}
31764
31765/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31766///
31767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31768#[inline]
31769#[target_feature(enable = "avx512f,avx512vl")]
31770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31771#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31772pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31773    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
31774}
31775
31776/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31777///
31778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31779#[inline]
31780#[target_feature(enable = "avx512f,avx512vl")]
31781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31782#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31783pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31784    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31785}
31786
31787/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31788///
31789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31790#[inline]
31791#[target_feature(enable = "avx512f")]
31792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31793#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31794pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31795    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
31796}
31797
31798/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31799///
31800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31801#[inline]
31802#[target_feature(enable = "avx512f")]
31803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31804#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31805pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31806    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31807}
31808
31809/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31810///
31811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31812#[inline]
31813#[target_feature(enable = "avx512f,avx512vl")]
31814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31815#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31816pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31817    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
31818}
31819
31820/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31821///
31822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31823#[inline]
31824#[target_feature(enable = "avx512f,avx512vl")]
31825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31826#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31827pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31828    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31829}
31830
31831/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31832///
31833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31834#[inline]
31835#[target_feature(enable = "avx512f,avx512vl")]
31836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31837#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31838pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31839    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
31840}
31841
31842/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31843///
31844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31845#[inline]
31846#[target_feature(enable = "avx512f,avx512vl")]
31847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31848#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31849pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31850    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31851}
31852
31853/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31854///
31855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31856#[inline]
31857#[target_feature(enable = "avx512f")]
31858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31859#[rustc_legacy_const_generics(2)]
31860#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31861pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31862    unsafe {
31863        static_assert_uimm_bits!(IMM3, 3);
31864        let a = a.as_i32x16();
31865        let b = b.as_i32x16();
31866        let r = match IMM3 {
31867            0 => simd_eq(a, b),
31868            1 => simd_lt(a, b),
31869            2 => simd_le(a, b),
31870            3 => i32x16::ZERO,
31871            4 => simd_ne(a, b),
31872            5 => simd_ge(a, b),
31873            6 => simd_gt(a, b),
31874            _ => i32x16::splat(-1),
31875        };
31876        simd_bitmask(r)
31877    }
31878}
31879
31880/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881///
31882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31883#[inline]
31884#[target_feature(enable = "avx512f")]
31885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31886#[rustc_legacy_const_generics(3)]
31887#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31888pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31889    k1: __mmask16,
31890    a: __m512i,
31891    b: __m512i,
31892) -> __mmask16 {
31893    unsafe {
31894        static_assert_uimm_bits!(IMM3, 3);
31895        let a = a.as_i32x16();
31896        let b = b.as_i32x16();
31897        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31898        let r = match IMM3 {
31899            0 => simd_and(k1, simd_eq(a, b)),
31900            1 => simd_and(k1, simd_lt(a, b)),
31901            2 => simd_and(k1, simd_le(a, b)),
31902            3 => i32x16::ZERO,
31903            4 => simd_and(k1, simd_ne(a, b)),
31904            5 => simd_and(k1, simd_ge(a, b)),
31905            6 => simd_and(k1, simd_gt(a, b)),
31906            _ => k1,
31907        };
31908        simd_bitmask(r)
31909    }
31910}
31911
31912/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31913///
31914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31915#[inline]
31916#[target_feature(enable = "avx512f,avx512vl")]
31917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31918#[rustc_legacy_const_generics(2)]
31919#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31920pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31921    unsafe {
31922        static_assert_uimm_bits!(IMM3, 3);
31923        let a = a.as_i32x8();
31924        let b = b.as_i32x8();
31925        let r = match IMM3 {
31926            0 => simd_eq(a, b),
31927            1 => simd_lt(a, b),
31928            2 => simd_le(a, b),
31929            3 => i32x8::ZERO,
31930            4 => simd_ne(a, b),
31931            5 => simd_ge(a, b),
31932            6 => simd_gt(a, b),
31933            _ => i32x8::splat(-1),
31934        };
31935        simd_bitmask(r)
31936    }
31937}
31938
31939/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31940///
31941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31942#[inline]
31943#[target_feature(enable = "avx512f,avx512vl")]
31944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31945#[rustc_legacy_const_generics(3)]
31946#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31947pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31948    k1: __mmask8,
31949    a: __m256i,
31950    b: __m256i,
31951) -> __mmask8 {
31952    unsafe {
31953        static_assert_uimm_bits!(IMM3, 3);
31954        let a = a.as_i32x8();
31955        let b = b.as_i32x8();
31956        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31957        let r = match IMM3 {
31958            0 => simd_and(k1, simd_eq(a, b)),
31959            1 => simd_and(k1, simd_lt(a, b)),
31960            2 => simd_and(k1, simd_le(a, b)),
31961            3 => i32x8::ZERO,
31962            4 => simd_and(k1, simd_ne(a, b)),
31963            5 => simd_and(k1, simd_ge(a, b)),
31964            6 => simd_and(k1, simd_gt(a, b)),
31965            _ => k1,
31966        };
31967        simd_bitmask(r)
31968    }
31969}
31970
31971/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31972///
31973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31974#[inline]
31975#[target_feature(enable = "avx512f,avx512vl")]
31976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31977#[rustc_legacy_const_generics(2)]
31978#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31979pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31980    unsafe {
31981        static_assert_uimm_bits!(IMM3, 3);
31982        let a = a.as_i32x4();
31983        let b = b.as_i32x4();
31984        let r = match IMM3 {
31985            0 => simd_eq(a, b),
31986            1 => simd_lt(a, b),
31987            2 => simd_le(a, b),
31988            3 => i32x4::ZERO,
31989            4 => simd_ne(a, b),
31990            5 => simd_ge(a, b),
31991            6 => simd_gt(a, b),
31992            _ => i32x4::splat(-1),
31993        };
31994        simd_bitmask(r)
31995    }
31996}
31997
31998/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31999///
32000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
32001#[inline]
32002#[target_feature(enable = "avx512f,avx512vl")]
32003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32004#[rustc_legacy_const_generics(3)]
32005#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32006pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
32007    k1: __mmask8,
32008    a: __m128i,
32009    b: __m128i,
32010) -> __mmask8 {
32011    unsafe {
32012        static_assert_uimm_bits!(IMM3, 3);
32013        let a = a.as_i32x4();
32014        let b = b.as_i32x4();
32015        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
32016        let r = match IMM3 {
32017            0 => simd_and(k1, simd_eq(a, b)),
32018            1 => simd_and(k1, simd_lt(a, b)),
32019            2 => simd_and(k1, simd_le(a, b)),
32020            3 => i32x4::ZERO,
32021            4 => simd_and(k1, simd_ne(a, b)),
32022            5 => simd_and(k1, simd_ge(a, b)),
32023            6 => simd_and(k1, simd_gt(a, b)),
32024            _ => k1,
32025        };
32026        simd_bitmask(r)
32027    }
32028}
32029
32030/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32031///
32032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
32033#[inline]
32034#[target_feature(enable = "avx512f")]
32035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32036#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32037pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32038    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
32039}
32040
32041/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32042///
32043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32044#[inline]
32045#[target_feature(enable = "avx512f")]
32046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32048pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32049    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32050}
32051
32052/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32053///
32054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32055#[inline]
32056#[target_feature(enable = "avx512f,avx512vl")]
32057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32058#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32059pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32060    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
32061}
32062
32063/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32064///
32065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32066#[inline]
32067#[target_feature(enable = "avx512f,avx512vl")]
32068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32069#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32070pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32071    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32072}
32073
32074/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32075///
32076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32077#[inline]
32078#[target_feature(enable = "avx512f,avx512vl")]
32079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32080#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32081pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32082    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
32083}
32084
32085/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32086///
32087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32088#[inline]
32089#[target_feature(enable = "avx512f,avx512vl")]
32090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32091#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32092pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32093    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32094}
32095
32096/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32097///
32098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32099#[inline]
32100#[target_feature(enable = "avx512f")]
32101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32102#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32103pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32104    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
32105}
32106
32107/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32108///
32109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32110#[inline]
32111#[target_feature(enable = "avx512f")]
32112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32113#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32114pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32115    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32116}
32117
32118/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32119///
32120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32121#[inline]
32122#[target_feature(enable = "avx512f,avx512vl")]
32123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32124#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32125pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32126    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
32127}
32128
32129/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32130///
32131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32132#[inline]
32133#[target_feature(enable = "avx512f,avx512vl")]
32134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32135#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32136pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32137    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32138}
32139
32140/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32141///
32142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32143#[inline]
32144#[target_feature(enable = "avx512f,avx512vl")]
32145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32146#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32147pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32148    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
32149}
32150
32151/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32152///
32153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32154#[inline]
32155#[target_feature(enable = "avx512f,avx512vl")]
32156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32157#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32158pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32159    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32160}
32161
32162/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32163///
32164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32165#[inline]
32166#[target_feature(enable = "avx512f")]
32167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32168#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32169pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32170    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
32171}
32172
32173/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32174///
32175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32176#[inline]
32177#[target_feature(enable = "avx512f")]
32178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32179#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32180pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32181    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32182}
32183
32184/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32185///
32186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32187#[inline]
32188#[target_feature(enable = "avx512f,avx512vl")]
32189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32190#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32191pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32192    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
32193}
32194
32195/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32196///
32197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32198#[inline]
32199#[target_feature(enable = "avx512f,avx512vl")]
32200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32201#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32202pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32203    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32204}
32205
32206/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32207///
32208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32209#[inline]
32210#[target_feature(enable = "avx512f,avx512vl")]
32211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32212#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32213pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32214    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
32215}
32216
32217/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32218///
32219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32220#[inline]
32221#[target_feature(enable = "avx512f,avx512vl")]
32222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32223#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32224pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32225    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32226}
32227
32228/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32229///
32230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32231#[inline]
32232#[target_feature(enable = "avx512f")]
32233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32234#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32235pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32236    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
32237}
32238
32239/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32240///
32241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32242#[inline]
32243#[target_feature(enable = "avx512f")]
32244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32245#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32246pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32247    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32248}
32249
32250/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32251///
32252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32253#[inline]
32254#[target_feature(enable = "avx512f,avx512vl")]
32255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32256#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32257pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32258    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
32259}
32260
32261/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32262///
32263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32264#[inline]
32265#[target_feature(enable = "avx512f,avx512vl")]
32266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32267#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32268pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32269    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32270}
32271
32272/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32273///
32274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32275#[inline]
32276#[target_feature(enable = "avx512f,avx512vl")]
32277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32278#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32279pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32280    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
32281}
32282
32283/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32284///
32285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32286#[inline]
32287#[target_feature(enable = "avx512f,avx512vl")]
32288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32289#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32290pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32291    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32292}
32293
32294/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32295///
32296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32297#[inline]
32298#[target_feature(enable = "avx512f")]
32299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32300#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32301pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32302    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
32303}
32304
32305/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32306///
32307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32308#[inline]
32309#[target_feature(enable = "avx512f")]
32310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32311#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32312pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32313    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32314}
32315
32316/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32317///
32318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32319#[inline]
32320#[target_feature(enable = "avx512f,avx512vl")]
32321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32322#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32323pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32324    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
32325}
32326
32327/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32328///
32329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32330#[inline]
32331#[target_feature(enable = "avx512f,avx512vl")]
32332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32333#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32334pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32335    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32336}
32337
32338/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32339///
32340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32341#[inline]
32342#[target_feature(enable = "avx512f,avx512vl")]
32343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32344#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32345pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32346    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
32347}
32348
32349/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32350///
32351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32352#[inline]
32353#[target_feature(enable = "avx512f,avx512vl")]
32354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32355#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32356pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32357    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32358}
32359
32360/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32361///
32362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32363#[inline]
32364#[target_feature(enable = "avx512f")]
32365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32366#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32367pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32368    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
32369}
32370
32371/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32372///
32373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32374#[inline]
32375#[target_feature(enable = "avx512f")]
32376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32377#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32378pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32379    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32380}
32381
32382/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32383///
32384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32385#[inline]
32386#[target_feature(enable = "avx512f,avx512vl")]
32387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32388#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32389pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32390    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
32391}
32392
32393/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32394///
32395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32396#[inline]
32397#[target_feature(enable = "avx512f,avx512vl")]
32398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32399#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32400pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32401    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32402}
32403
32404/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32405///
32406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32407#[inline]
32408#[target_feature(enable = "avx512f,avx512vl")]
32409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32410#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32411pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32412    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
32413}
32414
32415/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32416///
32417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32418#[inline]
32419#[target_feature(enable = "avx512f,avx512vl")]
32420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32421#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32422pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32423    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32424}
32425
32426/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32427///
32428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32429#[inline]
32430#[target_feature(enable = "avx512f")]
32431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32432#[rustc_legacy_const_generics(2)]
32433#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32434pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32435    unsafe {
32436        static_assert_uimm_bits!(IMM3, 3);
32437        let a = a.as_u64x8();
32438        let b = b.as_u64x8();
32439        let r = match IMM3 {
32440            0 => simd_eq(a, b),
32441            1 => simd_lt(a, b),
32442            2 => simd_le(a, b),
32443            3 => i64x8::ZERO,
32444            4 => simd_ne(a, b),
32445            5 => simd_ge(a, b),
32446            6 => simd_gt(a, b),
32447            _ => i64x8::splat(-1),
32448        };
32449        simd_bitmask(r)
32450    }
32451}
32452
32453/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32454///
32455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32456#[inline]
32457#[target_feature(enable = "avx512f")]
32458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32459#[rustc_legacy_const_generics(3)]
32460#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32461pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32462    k1: __mmask8,
32463    a: __m512i,
32464    b: __m512i,
32465) -> __mmask8 {
32466    unsafe {
32467        static_assert_uimm_bits!(IMM3, 3);
32468        let a = a.as_u64x8();
32469        let b = b.as_u64x8();
32470        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
32471        let r = match IMM3 {
32472            0 => simd_and(k1, simd_eq(a, b)),
32473            1 => simd_and(k1, simd_lt(a, b)),
32474            2 => simd_and(k1, simd_le(a, b)),
32475            3 => i64x8::ZERO,
32476            4 => simd_and(k1, simd_ne(a, b)),
32477            5 => simd_and(k1, simd_ge(a, b)),
32478            6 => simd_and(k1, simd_gt(a, b)),
32479            _ => k1,
32480        };
32481        simd_bitmask(r)
32482    }
32483}
32484
32485/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32486///
32487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32488#[inline]
32489#[target_feature(enable = "avx512f,avx512vl")]
32490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32491#[rustc_legacy_const_generics(2)]
32492#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32493pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32494    unsafe {
32495        static_assert_uimm_bits!(IMM3, 3);
32496        let a = a.as_u64x4();
32497        let b = b.as_u64x4();
32498        let r = match IMM3 {
32499            0 => simd_eq(a, b),
32500            1 => simd_lt(a, b),
32501            2 => simd_le(a, b),
32502            3 => i64x4::ZERO,
32503            4 => simd_ne(a, b),
32504            5 => simd_ge(a, b),
32505            6 => simd_gt(a, b),
32506            _ => i64x4::splat(-1),
32507        };
32508        simd_bitmask(r)
32509    }
32510}
32511
32512/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32513///
32514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32515#[inline]
32516#[target_feature(enable = "avx512f,avx512vl")]
32517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32518#[rustc_legacy_const_generics(3)]
32519#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32520pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32521    k1: __mmask8,
32522    a: __m256i,
32523    b: __m256i,
32524) -> __mmask8 {
32525    unsafe {
32526        static_assert_uimm_bits!(IMM3, 3);
32527        let a = a.as_u64x4();
32528        let b = b.as_u64x4();
32529        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
32530        let r = match IMM3 {
32531            0 => simd_and(k1, simd_eq(a, b)),
32532            1 => simd_and(k1, simd_lt(a, b)),
32533            2 => simd_and(k1, simd_le(a, b)),
32534            3 => i64x4::ZERO,
32535            4 => simd_and(k1, simd_ne(a, b)),
32536            5 => simd_and(k1, simd_ge(a, b)),
32537            6 => simd_and(k1, simd_gt(a, b)),
32538            _ => k1,
32539        };
32540        simd_bitmask(r)
32541    }
32542}
32543
32544/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32545///
32546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32547#[inline]
32548#[target_feature(enable = "avx512f,avx512vl")]
32549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32550#[rustc_legacy_const_generics(2)]
32551#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32552pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32553    unsafe {
32554        static_assert_uimm_bits!(IMM3, 3);
32555        let a = a.as_u64x2();
32556        let b = b.as_u64x2();
32557        let r = match IMM3 {
32558            0 => simd_eq(a, b),
32559            1 => simd_lt(a, b),
32560            2 => simd_le(a, b),
32561            3 => i64x2::ZERO,
32562            4 => simd_ne(a, b),
32563            5 => simd_ge(a, b),
32564            6 => simd_gt(a, b),
32565            _ => i64x2::splat(-1),
32566        };
32567        simd_bitmask(r)
32568    }
32569}
32570
32571/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32572///
32573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32574#[inline]
32575#[target_feature(enable = "avx512f,avx512vl")]
32576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32577#[rustc_legacy_const_generics(3)]
32578#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32579pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32580    k1: __mmask8,
32581    a: __m128i,
32582    b: __m128i,
32583) -> __mmask8 {
32584    unsafe {
32585        static_assert_uimm_bits!(IMM3, 3);
32586        let a = a.as_u64x2();
32587        let b = b.as_u64x2();
32588        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
32589        let r = match IMM3 {
32590            0 => simd_and(k1, simd_eq(a, b)),
32591            1 => simd_and(k1, simd_lt(a, b)),
32592            2 => simd_and(k1, simd_le(a, b)),
32593            3 => i64x2::ZERO,
32594            4 => simd_and(k1, simd_ne(a, b)),
32595            5 => simd_and(k1, simd_ge(a, b)),
32596            6 => simd_and(k1, simd_gt(a, b)),
32597            _ => k1,
32598        };
32599        simd_bitmask(r)
32600    }
32601}
32602
32603/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32604///
32605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32606#[inline]
32607#[target_feature(enable = "avx512f")]
32608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32609#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32610pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32611    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
32612}
32613
32614/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32615///
32616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32617#[inline]
32618#[target_feature(enable = "avx512f")]
32619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32620#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32621pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32622    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32623}
32624
32625/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32626///
32627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32628#[inline]
32629#[target_feature(enable = "avx512f,avx512vl")]
32630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32631#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32632pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32633    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
32634}
32635
32636/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637///
32638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32639#[inline]
32640#[target_feature(enable = "avx512f,avx512vl")]
32641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32642#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32643pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32644    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32645}
32646
32647/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32648///
32649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32650#[inline]
32651#[target_feature(enable = "avx512f,avx512vl")]
32652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32653#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32654pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32655    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
32656}
32657
32658/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32659///
32660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32661#[inline]
32662#[target_feature(enable = "avx512f,avx512vl")]
32663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32664#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32665pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32666    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32667}
32668
32669/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32670///
32671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32672#[inline]
32673#[target_feature(enable = "avx512f")]
32674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32675#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32676pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32677    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
32678}
32679
32680/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32681///
32682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32683#[inline]
32684#[target_feature(enable = "avx512f")]
32685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32686#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32687pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32688    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32689}
32690
32691/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32692///
32693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32694#[inline]
32695#[target_feature(enable = "avx512f,avx512vl")]
32696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32697#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32698pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32699    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
32700}
32701
32702/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32703///
32704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32705#[inline]
32706#[target_feature(enable = "avx512f,avx512vl")]
32707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32708#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32709pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32710    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32711}
32712
32713/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32714///
32715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32716#[inline]
32717#[target_feature(enable = "avx512f,avx512vl")]
32718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32719#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32720pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32721    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
32722}
32723
32724/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32725///
32726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32727#[inline]
32728#[target_feature(enable = "avx512f,avx512vl")]
32729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32730#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32731pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32732    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32733}
32734
32735/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32736///
32737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32738#[inline]
32739#[target_feature(enable = "avx512f")]
32740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32741#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32742pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32743    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
32744}
32745
32746/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32747///
32748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32749#[inline]
32750#[target_feature(enable = "avx512f")]
32751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32752#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32753pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32754    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32755}
32756
32757/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32758///
32759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32760#[inline]
32761#[target_feature(enable = "avx512f,avx512vl")]
32762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32763#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32764pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32765    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
32766}
32767
32768/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32769///
32770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32771#[inline]
32772#[target_feature(enable = "avx512f,avx512vl")]
32773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32774#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32775pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32776    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32777}
32778
32779/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32780///
32781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32782#[inline]
32783#[target_feature(enable = "avx512f,avx512vl")]
32784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32785#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32786pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32787    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
32788}
32789
32790/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32791///
32792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32793#[inline]
32794#[target_feature(enable = "avx512f,avx512vl")]
32795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32796#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32797pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32798    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32799}
32800
32801/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32802///
32803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32804#[inline]
32805#[target_feature(enable = "avx512f")]
32806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32807#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32808pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32809    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
32810}
32811
32812/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32813///
32814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32815#[inline]
32816#[target_feature(enable = "avx512f")]
32817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32818#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32819pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32820    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32821}
32822
32823/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32824///
32825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32826#[inline]
32827#[target_feature(enable = "avx512f,avx512vl")]
32828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32829#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32830pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32831    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
32832}
32833
32834/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32835///
32836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32837#[inline]
32838#[target_feature(enable = "avx512f,avx512vl")]
32839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32840#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32841pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32842    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32843}
32844
32845/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32846///
32847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32848#[inline]
32849#[target_feature(enable = "avx512f,avx512vl")]
32850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32851#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32852pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32853    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
32854}
32855
32856/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32857///
32858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32859#[inline]
32860#[target_feature(enable = "avx512f,avx512vl")]
32861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32862#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32863pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32864    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32865}
32866
32867/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32868///
32869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32870#[inline]
32871#[target_feature(enable = "avx512f")]
32872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32873#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32874pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32875    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
32876}
32877
32878/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32879///
32880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32881#[inline]
32882#[target_feature(enable = "avx512f")]
32883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32884#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32885pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32886    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32887}
32888
32889/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32890///
32891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32892#[inline]
32893#[target_feature(enable = "avx512f,avx512vl")]
32894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32895#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32896pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32897    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
32898}
32899
32900/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32901///
32902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32903#[inline]
32904#[target_feature(enable = "avx512f,avx512vl")]
32905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32906#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32907pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32908    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32909}
32910
32911/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32912///
32913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32914#[inline]
32915#[target_feature(enable = "avx512f,avx512vl")]
32916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32917#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32918pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32919    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
32920}
32921
32922/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32923///
32924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32925#[inline]
32926#[target_feature(enable = "avx512f,avx512vl")]
32927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32928#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32929pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32930    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32931}
32932
32933/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32934///
32935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32936#[inline]
32937#[target_feature(enable = "avx512f")]
32938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32939#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32940pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32941    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
32942}
32943
32944/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32945///
32946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32947#[inline]
32948#[target_feature(enable = "avx512f")]
32949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32950#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32951pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32952    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32953}
32954
32955/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32956///
32957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32958#[inline]
32959#[target_feature(enable = "avx512f,avx512vl")]
32960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32961#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32962pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32963    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
32964}
32965
32966/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32967///
32968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32969#[inline]
32970#[target_feature(enable = "avx512f,avx512vl")]
32971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32972#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32973pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32974    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32975}
32976
32977/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32978///
32979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
32980#[inline]
32981#[target_feature(enable = "avx512f,avx512vl")]
32982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32983#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32984pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32985    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
32986}
32987
32988/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32989///
32990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
32991#[inline]
32992#[target_feature(enable = "avx512f,avx512vl")]
32993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32994#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32995pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32996    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32997}
32998
32999/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33000///
33001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
33002#[inline]
33003#[target_feature(enable = "avx512f")]
33004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33005#[rustc_legacy_const_generics(2)]
33006#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33007pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
33008    unsafe {
33009        static_assert_uimm_bits!(IMM3, 3);
33010        let a = a.as_i64x8();
33011        let b = b.as_i64x8();
33012        let r = match IMM3 {
33013            0 => simd_eq(a, b),
33014            1 => simd_lt(a, b),
33015            2 => simd_le(a, b),
33016            3 => i64x8::ZERO,
33017            4 => simd_ne(a, b),
33018            5 => simd_ge(a, b),
33019            6 => simd_gt(a, b),
33020            _ => i64x8::splat(-1),
33021        };
33022        simd_bitmask(r)
33023    }
33024}
33025
33026/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33027///
33028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
33029#[inline]
33030#[target_feature(enable = "avx512f")]
33031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33032#[rustc_legacy_const_generics(3)]
33033#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33034pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33035    k1: __mmask8,
33036    a: __m512i,
33037    b: __m512i,
33038) -> __mmask8 {
33039    unsafe {
33040        static_assert_uimm_bits!(IMM3, 3);
33041        let a = a.as_i64x8();
33042        let b = b.as_i64x8();
33043        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33044        let r = match IMM3 {
33045            0 => simd_and(k1, simd_eq(a, b)),
33046            1 => simd_and(k1, simd_lt(a, b)),
33047            2 => simd_and(k1, simd_le(a, b)),
33048            3 => i64x8::ZERO,
33049            4 => simd_and(k1, simd_ne(a, b)),
33050            5 => simd_and(k1, simd_ge(a, b)),
33051            6 => simd_and(k1, simd_gt(a, b)),
33052            _ => k1,
33053        };
33054        simd_bitmask(r)
33055    }
33056}
33057
33058/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33059///
33060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33061#[inline]
33062#[target_feature(enable = "avx512f,avx512vl")]
33063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33064#[rustc_legacy_const_generics(2)]
33065#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33066pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33067    unsafe {
33068        static_assert_uimm_bits!(IMM3, 3);
33069        let a = a.as_i64x4();
33070        let b = b.as_i64x4();
33071        let r = match IMM3 {
33072            0 => simd_eq(a, b),
33073            1 => simd_lt(a, b),
33074            2 => simd_le(a, b),
33075            3 => i64x4::ZERO,
33076            4 => simd_ne(a, b),
33077            5 => simd_ge(a, b),
33078            6 => simd_gt(a, b),
33079            _ => i64x4::splat(-1),
33080        };
33081        simd_bitmask(r)
33082    }
33083}
33084
33085/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33086///
33087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33088#[inline]
33089#[target_feature(enable = "avx512f,avx512vl")]
33090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33091#[rustc_legacy_const_generics(3)]
33092#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33093pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33094    k1: __mmask8,
33095    a: __m256i,
33096    b: __m256i,
33097) -> __mmask8 {
33098    unsafe {
33099        static_assert_uimm_bits!(IMM3, 3);
33100        let a = a.as_i64x4();
33101        let b = b.as_i64x4();
33102        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
33103        let r = match IMM3 {
33104            0 => simd_and(k1, simd_eq(a, b)),
33105            1 => simd_and(k1, simd_lt(a, b)),
33106            2 => simd_and(k1, simd_le(a, b)),
33107            3 => i64x4::ZERO,
33108            4 => simd_and(k1, simd_ne(a, b)),
33109            5 => simd_and(k1, simd_ge(a, b)),
33110            6 => simd_and(k1, simd_gt(a, b)),
33111            _ => k1,
33112        };
33113        simd_bitmask(r)
33114    }
33115}
33116
33117/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33118///
33119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33120#[inline]
33121#[target_feature(enable = "avx512f,avx512vl")]
33122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33123#[rustc_legacy_const_generics(2)]
33124#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33125pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33126    unsafe {
33127        static_assert_uimm_bits!(IMM3, 3);
33128        let a = a.as_i64x2();
33129        let b = b.as_i64x2();
33130        let r = match IMM3 {
33131            0 => simd_eq(a, b),
33132            1 => simd_lt(a, b),
33133            2 => simd_le(a, b),
33134            3 => i64x2::ZERO,
33135            4 => simd_ne(a, b),
33136            5 => simd_ge(a, b),
33137            6 => simd_gt(a, b),
33138            _ => i64x2::splat(-1),
33139        };
33140        simd_bitmask(r)
33141    }
33142}
33143
33144/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33145///
33146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33147#[inline]
33148#[target_feature(enable = "avx512f,avx512vl")]
33149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33150#[rustc_legacy_const_generics(3)]
33151#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33152pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33153    k1: __mmask8,
33154    a: __m128i,
33155    b: __m128i,
33156) -> __mmask8 {
33157    unsafe {
33158        static_assert_uimm_bits!(IMM3, 3);
33159        let a = a.as_i64x2();
33160        let b = b.as_i64x2();
33161        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
33162        let r = match IMM3 {
33163            0 => simd_and(k1, simd_eq(a, b)),
33164            1 => simd_and(k1, simd_lt(a, b)),
33165            2 => simd_and(k1, simd_le(a, b)),
33166            3 => i64x2::ZERO,
33167            4 => simd_and(k1, simd_ne(a, b)),
33168            5 => simd_and(k1, simd_ge(a, b)),
33169            6 => simd_and(k1, simd_gt(a, b)),
33170            _ => k1,
33171        };
33172        simd_bitmask(r)
33173    }
33174}
33175
33176/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33177///
33178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33179#[inline]
33180#[target_feature(enable = "avx512f")]
33181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33182pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33183    unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33184}
33185
33186/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33187///
33188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33189#[inline]
33190#[target_feature(enable = "avx512f")]
33191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33192pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33193    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33194}
33195
33196/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33197///
33198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33199#[inline]
33200#[target_feature(enable = "avx512f")]
33201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33202pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33203    unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33204}
33205
33206/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33207///
33208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33209#[inline]
33210#[target_feature(enable = "avx512f")]
33211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33212pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33213    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33214}
33215
33216/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33217///
33218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33219#[inline]
33220#[target_feature(enable = "avx512f")]
33221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33222pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33223    unsafe {
33224        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33225        let a = _mm256_add_ps(
33226            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33227            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33228        );
33229        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33230        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33231        simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
33232    }
33233}
33234
33235/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33236///
33237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33238#[inline]
33239#[target_feature(enable = "avx512f")]
33240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33241pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33242    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
33243}
33244
33245/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33246///
33247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33248#[inline]
33249#[target_feature(enable = "avx512f")]
33250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33251pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33252    unsafe {
33253        let a = _mm256_add_pd(
33254            _mm512_extractf64x4_pd::<0>(a),
33255            _mm512_extractf64x4_pd::<1>(a),
33256        );
33257        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33258        simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
33259    }
33260}
33261
33262/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33263///
33264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33265#[inline]
33266#[target_feature(enable = "avx512f")]
33267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33268pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33269    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
33270}
33271
33272/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33273///
33274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33275#[inline]
33276#[target_feature(enable = "avx512f")]
33277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33278pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33279    unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33280}
33281
33282/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33283///
33284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33285#[inline]
33286#[target_feature(enable = "avx512f")]
33287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33288pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33289    unsafe {
33290        simd_reduce_mul_unordered(simd_select_bitmask(
33291            k,
33292            a.as_i32x16(),
33293            _mm512_set1_epi32(1).as_i32x16(),
33294        ))
33295    }
33296}
33297
33298/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33299///
33300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33301#[inline]
33302#[target_feature(enable = "avx512f")]
33303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33304pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33305    unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33306}
33307
33308/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33309///
33310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33311#[inline]
33312#[target_feature(enable = "avx512f")]
33313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33314pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33315    unsafe {
33316        simd_reduce_mul_unordered(simd_select_bitmask(
33317            k,
33318            a.as_i64x8(),
33319            _mm512_set1_epi64(1).as_i64x8(),
33320        ))
33321    }
33322}
33323
33324/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33325///
33326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33327#[inline]
33328#[target_feature(enable = "avx512f")]
33329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33330pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33331    unsafe {
33332        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33333        let a = _mm256_mul_ps(
33334            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33335            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33336        );
33337        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33338        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33339        simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
33340    }
33341}
33342
33343/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33344///
33345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33346#[inline]
33347#[target_feature(enable = "avx512f")]
33348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33349pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33350    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
33351}
33352
33353/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33354///
33355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33356#[inline]
33357#[target_feature(enable = "avx512f")]
33358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33359pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33360    unsafe {
33361        let a = _mm256_mul_pd(
33362            _mm512_extractf64x4_pd::<0>(a),
33363            _mm512_extractf64x4_pd::<1>(a),
33364        );
33365        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33366        simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
33367    }
33368}
33369
33370/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33371///
33372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33373#[inline]
33374#[target_feature(enable = "avx512f")]
33375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33376pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33377    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
33378}
33379
33380/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33381///
33382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33383#[inline]
33384#[target_feature(enable = "avx512f")]
33385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33386pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33387    unsafe { simd_reduce_max(a.as_i32x16()) }
33388}
33389
33390/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33391///
33392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33393#[inline]
33394#[target_feature(enable = "avx512f")]
33395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33396pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33397    unsafe {
33398        simd_reduce_max(simd_select_bitmask(
33399            k,
33400            a.as_i32x16(),
33401            i32x16::splat(i32::MIN),
33402        ))
33403    }
33404}
33405
33406/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33407///
33408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33409#[inline]
33410#[target_feature(enable = "avx512f")]
33411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33412pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33413    unsafe { simd_reduce_max(a.as_i64x8()) }
33414}
33415
33416/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33417///
33418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33419#[inline]
33420#[target_feature(enable = "avx512f")]
33421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33422pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33423    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
33424}
33425
33426/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33427///
33428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33429#[inline]
33430#[target_feature(enable = "avx512f")]
33431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33432pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33433    unsafe { simd_reduce_max(a.as_u32x16()) }
33434}
33435
33436/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33437///
33438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33439#[inline]
33440#[target_feature(enable = "avx512f")]
33441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33442pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33443    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
33444}
33445
33446/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33447///
33448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33449#[inline]
33450#[target_feature(enable = "avx512f")]
33451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33452pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33453    unsafe { simd_reduce_max(a.as_u64x8()) }
33454}
33455
33456/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33457///
33458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33459#[inline]
33460#[target_feature(enable = "avx512f")]
33461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33462pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33463    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
33464}
33465
33466/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33467///
33468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33469#[inline]
33470#[target_feature(enable = "avx512f")]
33471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33472pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33473    unsafe {
33474        let a = _mm256_max_ps(
33475            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33476            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33477        );
33478        let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33479        let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33480        _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
33481    }
33482}
33483
33484/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33485///
33486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33487#[inline]
33488#[target_feature(enable = "avx512f")]
33489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33490pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33491    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
33492}
33493
33494/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33495///
33496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33497#[inline]
33498#[target_feature(enable = "avx512f")]
33499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33500pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33501    unsafe {
33502        let a = _mm256_max_pd(
33503            _mm512_extractf64x4_pd::<0>(a),
33504            _mm512_extractf64x4_pd::<1>(a),
33505        );
33506        let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33507        _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
33508    }
33509}
33510
33511/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33512///
33513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33514#[inline]
33515#[target_feature(enable = "avx512f")]
33516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33517pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33518    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
33519}
33520
33521/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33522///
33523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33524#[inline]
33525#[target_feature(enable = "avx512f")]
33526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33527pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33528    unsafe { simd_reduce_min(a.as_i32x16()) }
33529}
33530
33531/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33532///
33533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33534#[inline]
33535#[target_feature(enable = "avx512f")]
33536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33537pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33538    unsafe {
33539        simd_reduce_min(simd_select_bitmask(
33540            k,
33541            a.as_i32x16(),
33542            i32x16::splat(i32::MAX),
33543        ))
33544    }
33545}
33546
33547/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33548///
33549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33550#[inline]
33551#[target_feature(enable = "avx512f")]
33552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33553pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33554    unsafe { simd_reduce_min(a.as_i64x8()) }
33555}
33556
33557/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33558///
33559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33560#[inline]
33561#[target_feature(enable = "avx512f")]
33562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33563pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33564    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
33565}
33566
33567/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33568///
33569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33570#[inline]
33571#[target_feature(enable = "avx512f")]
33572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33573pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33574    unsafe { simd_reduce_min(a.as_u32x16()) }
33575}
33576
33577/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33578///
33579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33580#[inline]
33581#[target_feature(enable = "avx512f")]
33582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33583pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33584    unsafe {
33585        simd_reduce_min(simd_select_bitmask(
33586            k,
33587            a.as_u32x16(),
33588            u32x16::splat(u32::MAX),
33589        ))
33590    }
33591}
33592
33593/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33594///
33595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33596#[inline]
33597#[target_feature(enable = "avx512f")]
33598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33599pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33600    unsafe { simd_reduce_min(a.as_u64x8()) }
33601}
33602
33603/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33604///
33605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33606#[inline]
33607#[target_feature(enable = "avx512f")]
33608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33609pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33610    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
33611}
33612
33613/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33614///
33615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33616#[inline]
33617#[target_feature(enable = "avx512f")]
33618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33619pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33620    unsafe {
33621        let a = _mm256_min_ps(
33622            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33623            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33624        );
33625        let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33626        let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33627        _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
33628    }
33629}
33630
33631/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33632///
33633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33634#[inline]
33635#[target_feature(enable = "avx512f")]
33636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33637pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33638    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
33639}
33640
33641/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33642///
33643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33644#[inline]
33645#[target_feature(enable = "avx512f")]
33646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33647pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33648    unsafe {
33649        let a = _mm256_min_pd(
33650            _mm512_extractf64x4_pd::<0>(a),
33651            _mm512_extractf64x4_pd::<1>(a),
33652        );
33653        let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33654        _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
33655    }
33656}
33657
33658/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33659///
33660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33661#[inline]
33662#[target_feature(enable = "avx512f")]
33663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33664pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33665    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
33666}
33667
33668/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33669///
33670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33671#[inline]
33672#[target_feature(enable = "avx512f")]
33673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33674pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33675    unsafe { simd_reduce_and(a.as_i32x16()) }
33676}
33677
33678/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33679///
33680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33681#[inline]
33682#[target_feature(enable = "avx512f")]
33683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33684pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33685    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
33686}
33687
33688/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33689///
33690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33691#[inline]
33692#[target_feature(enable = "avx512f")]
33693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33694pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33695    unsafe { simd_reduce_and(a.as_i64x8()) }
33696}
33697
33698/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33701#[inline]
33702#[target_feature(enable = "avx512f")]
33703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33704pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33705    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
33706}
33707
33708/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33709///
33710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33711#[inline]
33712#[target_feature(enable = "avx512f")]
33713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33714pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33715    unsafe { simd_reduce_or(a.as_i32x16()) }
33716}
33717
33718/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33719///
33720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33721#[inline]
33722#[target_feature(enable = "avx512f")]
33723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33724pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33725    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33726}
33727
33728/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33729///
33730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33731#[inline]
33732#[target_feature(enable = "avx512f")]
33733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33734pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33735    unsafe { simd_reduce_or(a.as_i64x8()) }
33736}
33737
33738/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33739///
33740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33741#[inline]
33742#[target_feature(enable = "avx512f")]
33743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33744pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33745    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33746}
33747
33748/// Returns vector of type `__m512d` with indeterminate elements.
33749/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33750/// In practice, this is equivalent to [`mem::zeroed`].
33751///
33752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33753#[inline]
33754#[target_feature(enable = "avx512f")]
33755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33756// This intrinsic has no corresponding instruction.
33757pub fn _mm512_undefined_pd() -> __m512d {
33758    unsafe { const { mem::zeroed() } }
33759}
33760
33761/// Returns vector of type `__m512` with indeterminate elements.
33762/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33763/// In practice, this is equivalent to [`mem::zeroed`].
33764///
33765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33766#[inline]
33767#[target_feature(enable = "avx512f")]
33768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33769// This intrinsic has no corresponding instruction.
33770pub fn _mm512_undefined_ps() -> __m512 {
33771    unsafe { const { mem::zeroed() } }
33772}
33773
33774/// Return vector of type __m512i with indeterminate elements.
33775/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33776/// In practice, this is equivalent to [`mem::zeroed`].
33777///
33778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33779#[inline]
33780#[target_feature(enable = "avx512f")]
33781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33782// This intrinsic has no corresponding instruction.
33783pub fn _mm512_undefined_epi32() -> __m512i {
33784    unsafe { const { mem::zeroed() } }
33785}
33786
33787/// Return vector of type __m512 with indeterminate elements.
33788/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33789/// In practice, this is equivalent to [`mem::zeroed`].
33790///
33791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33792#[inline]
33793#[target_feature(enable = "avx512f")]
33794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33795// This intrinsic has no corresponding instruction.
33796pub fn _mm512_undefined() -> __m512 {
33797    unsafe { const { mem::zeroed() } }
33798}
33799
33800/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33801///
33802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33803#[inline]
33804#[target_feature(enable = "avx512f")]
33805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33806#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33807pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33808    ptr::read_unaligned(mem_addr as *const __m512i)
33809}
33810
33811/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33812///
33813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33814#[inline]
33815#[target_feature(enable = "avx512f,avx512vl")]
33816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33817#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33818pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33819    ptr::read_unaligned(mem_addr as *const __m256i)
33820}
33821
33822/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33823///
33824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33825#[inline]
33826#[target_feature(enable = "avx512f,avx512vl")]
33827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33828#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33829pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33830    ptr::read_unaligned(mem_addr as *const __m128i)
33831}
33832
33833/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33834///
33835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33836#[inline]
33837#[target_feature(enable = "avx512f")]
33838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33839#[cfg_attr(test, assert_instr(vpmovdw))]
33840pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33841    vpmovdwmem(mem_addr, a.as_i32x16(), k);
33842}
33843
33844/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33845///
33846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33847#[inline]
33848#[target_feature(enable = "avx512f,avx512vl")]
33849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33850#[cfg_attr(test, assert_instr(vpmovdw))]
33851pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33852    vpmovdwmem256(mem_addr, a.as_i32x8(), k);
33853}
33854
33855/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33856///
33857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33858#[inline]
33859#[target_feature(enable = "avx512f,avx512vl")]
33860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33861#[cfg_attr(test, assert_instr(vpmovdw))]
33862pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33863    vpmovdwmem128(mem_addr, a.as_i32x4(), k);
33864}
33865
33866/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33867///
33868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33869#[inline]
33870#[target_feature(enable = "avx512f")]
33871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33872#[cfg_attr(test, assert_instr(vpmovsdw))]
33873pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33874    vpmovsdwmem(mem_addr, a.as_i32x16(), k);
33875}
33876
33877/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33878///
33879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33880#[inline]
33881#[target_feature(enable = "avx512f,avx512vl")]
33882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33883#[cfg_attr(test, assert_instr(vpmovsdw))]
33884pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33885    vpmovsdwmem256(mem_addr, a.as_i32x8(), k);
33886}
33887
33888/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33889///
33890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33891#[inline]
33892#[target_feature(enable = "avx512f,avx512vl")]
33893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33894#[cfg_attr(test, assert_instr(vpmovsdw))]
33895pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33896    vpmovsdwmem128(mem_addr, a.as_i32x4(), k);
33897}
33898
33899/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33900///
33901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33902#[inline]
33903#[target_feature(enable = "avx512f")]
33904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33905#[cfg_attr(test, assert_instr(vpmovusdw))]
33906pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33907    vpmovusdwmem(mem_addr, a.as_i32x16(), k);
33908}
33909
33910/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33911///
33912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33913#[inline]
33914#[target_feature(enable = "avx512f,avx512vl")]
33915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33916#[cfg_attr(test, assert_instr(vpmovusdw))]
33917pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33918    vpmovusdwmem256(mem_addr, a.as_i32x8(), k);
33919}
33920
33921/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33922///
33923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33924#[inline]
33925#[target_feature(enable = "avx512f,avx512vl")]
33926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33927#[cfg_attr(test, assert_instr(vpmovusdw))]
33928pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33929    vpmovusdwmem128(mem_addr, a.as_i32x4(), k);
33930}
33931
33932/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33933///
33934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33935#[inline]
33936#[target_feature(enable = "avx512f")]
33937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33938#[cfg_attr(test, assert_instr(vpmovdb))]
33939pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33940    vpmovdbmem(mem_addr, a.as_i32x16(), k);
33941}
33942
33943/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33944///
33945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33946#[inline]
33947#[target_feature(enable = "avx512f,avx512vl")]
33948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33949#[cfg_attr(test, assert_instr(vpmovdb))]
33950pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33951    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
33952}
33953
33954/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33955///
33956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33957#[inline]
33958#[target_feature(enable = "avx512f,avx512vl")]
33959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33960#[cfg_attr(test, assert_instr(vpmovdb))]
33961pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33962    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
33963}
33964
33965/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33966///
33967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33968#[inline]
33969#[target_feature(enable = "avx512f")]
33970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33971#[cfg_attr(test, assert_instr(vpmovsdb))]
33972pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33973    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
33974}
33975
33976/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33977///
33978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
33979#[inline]
33980#[target_feature(enable = "avx512f,avx512vl")]
33981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33982#[cfg_attr(test, assert_instr(vpmovsdb))]
33983pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33984    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
33985}
33986
33987/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33988///
33989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
33990#[inline]
33991#[target_feature(enable = "avx512f,avx512vl")]
33992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33993#[cfg_attr(test, assert_instr(vpmovsdb))]
33994pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33995    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
33996}
33997
33998/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33999///
34000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
34001#[inline]
34002#[target_feature(enable = "avx512f")]
34003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34004#[cfg_attr(test, assert_instr(vpmovusdb))]
34005pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
34006    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
34007}
34008
34009/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34010///
34011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
34012#[inline]
34013#[target_feature(enable = "avx512f,avx512vl")]
34014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34015#[cfg_attr(test, assert_instr(vpmovusdb))]
34016pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34017    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
34018}
34019
34020/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34021///
34022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
34023#[inline]
34024#[target_feature(enable = "avx512f,avx512vl")]
34025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34026#[cfg_attr(test, assert_instr(vpmovusdb))]
34027pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34028    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
34029}
34030
34031/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34032///
34033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
34034#[inline]
34035#[target_feature(enable = "avx512f")]
34036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34037#[cfg_attr(test, assert_instr(vpmovqw))]
34038pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34039    vpmovqwmem(mem_addr, a.as_i64x8(), k);
34040}
34041
34042/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34043///
34044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34045#[inline]
34046#[target_feature(enable = "avx512f,avx512vl")]
34047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34048#[cfg_attr(test, assert_instr(vpmovqw))]
34049pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34050    vpmovqwmem256(mem_addr, a.as_i64x4(), k);
34051}
34052
34053/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34054///
34055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34056#[inline]
34057#[target_feature(enable = "avx512f,avx512vl")]
34058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34059#[cfg_attr(test, assert_instr(vpmovqw))]
34060pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34061    vpmovqwmem128(mem_addr, a.as_i64x2(), k);
34062}
34063
34064/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34065///
34066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34067#[inline]
34068#[target_feature(enable = "avx512f")]
34069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34070#[cfg_attr(test, assert_instr(vpmovsqw))]
34071pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34072    vpmovsqwmem(mem_addr, a.as_i64x8(), k);
34073}
34074
34075/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34076///
34077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34078#[inline]
34079#[target_feature(enable = "avx512f,avx512vl")]
34080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34081#[cfg_attr(test, assert_instr(vpmovsqw))]
34082pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34083    vpmovsqwmem256(mem_addr, a.as_i64x4(), k);
34084}
34085
34086/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34087///
34088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34089#[inline]
34090#[target_feature(enable = "avx512f,avx512vl")]
34091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34092#[cfg_attr(test, assert_instr(vpmovsqw))]
34093pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34094    vpmovsqwmem128(mem_addr, a.as_i64x2(), k);
34095}
34096
34097/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34098///
34099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34100#[inline]
34101#[target_feature(enable = "avx512f")]
34102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34103#[cfg_attr(test, assert_instr(vpmovusqw))]
34104pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34105    vpmovusqwmem(mem_addr, a.as_i64x8(), k);
34106}
34107
34108/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34109///
34110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34111#[inline]
34112#[target_feature(enable = "avx512f,avx512vl")]
34113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34114#[cfg_attr(test, assert_instr(vpmovusqw))]
34115pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34116    vpmovusqwmem256(mem_addr, a.as_i64x4(), k);
34117}
34118
34119/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34120///
34121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34122#[inline]
34123#[target_feature(enable = "avx512f,avx512vl")]
34124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34125#[cfg_attr(test, assert_instr(vpmovusqw))]
34126pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34127    vpmovusqwmem128(mem_addr, a.as_i64x2(), k);
34128}
34129
34130/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34131///
34132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34133#[inline]
34134#[target_feature(enable = "avx512f")]
34135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34136#[cfg_attr(test, assert_instr(vpmovqb))]
34137pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34138    vpmovqbmem(mem_addr, a.as_i64x8(), k);
34139}
34140
34141/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34142///
34143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34144#[inline]
34145#[target_feature(enable = "avx512f,avx512vl")]
34146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34147#[cfg_attr(test, assert_instr(vpmovqb))]
34148pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34149    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
34150}
34151
34152/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34153///
34154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34155#[inline]
34156#[target_feature(enable = "avx512f,avx512vl")]
34157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34158#[cfg_attr(test, assert_instr(vpmovqb))]
34159pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34160    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
34161}
34162
34163/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34164///
34165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34166#[inline]
34167#[target_feature(enable = "avx512f")]
34168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34169#[cfg_attr(test, assert_instr(vpmovsqb))]
34170pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34171    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
34172}
34173
34174/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34175///
34176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34177#[inline]
34178#[target_feature(enable = "avx512f,avx512vl")]
34179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34180#[cfg_attr(test, assert_instr(vpmovsqb))]
34181pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34182    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
34183}
34184
34185/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34186///
34187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34188#[inline]
34189#[target_feature(enable = "avx512f,avx512vl")]
34190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34191#[cfg_attr(test, assert_instr(vpmovsqb))]
34192pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34193    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
34194}
34195
34196/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34197///
34198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34199#[inline]
34200#[target_feature(enable = "avx512f")]
34201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34202#[cfg_attr(test, assert_instr(vpmovusqb))]
34203pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34204    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
34205}
34206
34207/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34208///
34209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34210#[inline]
34211#[target_feature(enable = "avx512f,avx512vl")]
34212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34213#[cfg_attr(test, assert_instr(vpmovusqb))]
34214pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34215    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
34216}
34217
34218/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34219///
34220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34221#[inline]
34222#[target_feature(enable = "avx512f,avx512vl")]
34223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34224#[cfg_attr(test, assert_instr(vpmovusqb))]
34225pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34226    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
34227}
34228
34229///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34230///
34231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34232#[inline]
34233#[target_feature(enable = "avx512f")]
34234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34235#[cfg_attr(test, assert_instr(vpmovqd))]
34236pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34237    vpmovqdmem(mem_addr, a.as_i64x8(), k);
34238}
34239
34240///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34241///
34242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34243#[inline]
34244#[target_feature(enable = "avx512f,avx512vl")]
34245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34246#[cfg_attr(test, assert_instr(vpmovqd))]
34247pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34248    vpmovqdmem256(mem_addr, a.as_i64x4(), k);
34249}
34250
34251///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34252///
34253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34254#[inline]
34255#[target_feature(enable = "avx512f,avx512vl")]
34256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34257#[cfg_attr(test, assert_instr(vpmovqd))]
34258pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34259    vpmovqdmem128(mem_addr, a.as_i64x2(), k);
34260}
34261
34262/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34263///
34264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34265#[inline]
34266#[target_feature(enable = "avx512f")]
34267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34268#[cfg_attr(test, assert_instr(vpmovsqd))]
34269pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34270    vpmovsqdmem(mem_addr, a.as_i64x8(), k);
34271}
34272
34273/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34274///
34275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34276#[inline]
34277#[target_feature(enable = "avx512f,avx512vl")]
34278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34279#[cfg_attr(test, assert_instr(vpmovsqd))]
34280pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34281    vpmovsqdmem256(mem_addr, a.as_i64x4(), k);
34282}
34283
34284/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34285///
34286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34287#[inline]
34288#[target_feature(enable = "avx512f,avx512vl")]
34289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34290#[cfg_attr(test, assert_instr(vpmovsqd))]
34291pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34292    vpmovsqdmem128(mem_addr, a.as_i64x2(), k);
34293}
34294
34295/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34296///
34297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34298#[inline]
34299#[target_feature(enable = "avx512f")]
34300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34301#[cfg_attr(test, assert_instr(vpmovusqd))]
34302pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34303    vpmovusqdmem(mem_addr, a.as_i64x8(), k);
34304}
34305
34306/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34307///
34308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34309#[inline]
34310#[target_feature(enable = "avx512f,avx512vl")]
34311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34312#[cfg_attr(test, assert_instr(vpmovusqd))]
34313pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34314    vpmovusqdmem256(mem_addr, a.as_i64x4(), k);
34315}
34316
34317/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34318///
34319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34320#[inline]
34321#[target_feature(enable = "avx512f,avx512vl")]
34322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34323#[cfg_attr(test, assert_instr(vpmovusqd))]
34324pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34325    vpmovusqdmem128(mem_addr, a.as_i64x2(), k);
34326}
34327
34328/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34329///
34330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34331#[inline]
34332#[target_feature(enable = "avx512f")]
34333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34334#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34335pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34336    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34337}
34338
34339/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34340///
34341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34342#[inline]
34343#[target_feature(enable = "avx512f,avx512vl")]
34344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34345#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34346pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34347    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34348}
34349
34350/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34351///
34352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34353#[inline]
34354#[target_feature(enable = "avx512f,avx512vl")]
34355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34356#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34357pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34358    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34359}
34360
34361/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34362///
34363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34364#[inline]
34365#[target_feature(enable = "avx512f")]
34366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34367#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34368pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34369    ptr::read_unaligned(mem_addr as *const __m512i)
34370}
34371
34372/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34373///
34374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34375#[inline]
34376#[target_feature(enable = "avx512f,avx512vl")]
34377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34378#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34379pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34380    ptr::read_unaligned(mem_addr as *const __m256i)
34381}
34382
34383/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34384///
34385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34386#[inline]
34387#[target_feature(enable = "avx512f,avx512vl")]
34388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34389#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34390pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34391    ptr::read_unaligned(mem_addr as *const __m128i)
34392}
34393
34394/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34395///
34396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34397#[inline]
34398#[target_feature(enable = "avx512f")]
34399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34400#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34401pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34402    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34403}
34404
34405/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34406///
34407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34408#[inline]
34409#[target_feature(enable = "avx512f,avx512vl")]
34410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34411#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34412pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34413    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34414}
34415
34416/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34417///
34418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34419#[inline]
34420#[target_feature(enable = "avx512f,avx512vl")]
34421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34422#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34423pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34424    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34425}
34426
34427/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34428///
34429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34430#[inline]
34431#[target_feature(enable = "avx512f")]
34432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34433#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34434pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
34435    ptr::read_unaligned(mem_addr as *const __m512i)
34436}
34437
34438/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34439///
34440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34441#[inline]
34442#[target_feature(enable = "avx512f")]
34443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34444#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34445pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34446    ptr::write_unaligned(mem_addr, a);
34447}
34448
34449/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34450/// floating-point elements) from memory into result.
34451/// `mem_addr` does not need to be aligned on any particular boundary.
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34454#[inline]
34455#[target_feature(enable = "avx512f")]
34456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34457#[cfg_attr(test, assert_instr(vmovups))]
34458pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34459    ptr::read_unaligned(mem_addr as *const __m512d)
34460}
34461
34462/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34463/// floating-point elements) from `a` into memory.
34464/// `mem_addr` does not need to be aligned on any particular boundary.
34465///
34466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34467#[inline]
34468#[target_feature(enable = "avx512f")]
34469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34470#[cfg_attr(test, assert_instr(vmovups))]
34471pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34472    ptr::write_unaligned(mem_addr as *mut __m512d, a);
34473}
34474
34475/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34476/// floating-point elements) from memory into result.
34477/// `mem_addr` does not need to be aligned on any particular boundary.
34478///
34479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34480#[inline]
34481#[target_feature(enable = "avx512f")]
34482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34483#[cfg_attr(test, assert_instr(vmovups))]
34484pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34485    ptr::read_unaligned(mem_addr as *const __m512)
34486}
34487
34488/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34489/// floating-point elements) from `a` into memory.
34490/// `mem_addr` does not need to be aligned on any particular boundary.
34491///
34492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34493#[inline]
34494#[target_feature(enable = "avx512f")]
34495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34496#[cfg_attr(test, assert_instr(vmovups))]
34497pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34498    ptr::write_unaligned(mem_addr as *mut __m512, a);
34499}
34500
34501/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34502///
34503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34504#[inline]
34505#[target_feature(enable = "avx512f")]
34506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34507#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34508pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
34509    ptr::read(mem_addr as *const __m512i)
34510}
34511
34512/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34513///
34514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34515#[inline]
34516#[target_feature(enable = "avx512f")]
34517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34518#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34519pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34520    ptr::write(mem_addr, a);
34521}
34522
34523/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34524///
34525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34526#[inline]
34527#[target_feature(enable = "avx512f")]
34528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34529#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34530pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34531    ptr::read(mem_addr as *const __m512i)
34532}
34533
34534/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34535///
34536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34537#[inline]
34538#[target_feature(enable = "avx512f,avx512vl")]
34539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34540#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34541pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34542    ptr::read(mem_addr as *const __m256i)
34543}
34544
34545/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34546///
34547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34548#[inline]
34549#[target_feature(enable = "avx512f,avx512vl")]
34550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34551#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34552pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34553    ptr::read(mem_addr as *const __m128i)
34554}
34555
34556/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34557///
34558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34559#[inline]
34560#[target_feature(enable = "avx512f")]
34561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34562#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34563pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34564    ptr::write(mem_addr as *mut __m512i, a);
34565}
34566
34567/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34568///
34569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34570#[inline]
34571#[target_feature(enable = "avx512f,avx512vl")]
34572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34573#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34574pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34575    ptr::write(mem_addr as *mut __m256i, a);
34576}
34577
34578/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34579///
34580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34581#[inline]
34582#[target_feature(enable = "avx512f,avx512vl")]
34583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34584#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34585pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34586    ptr::write(mem_addr as *mut __m128i, a);
34587}
34588
34589/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34590///
34591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34592#[inline]
34593#[target_feature(enable = "avx512f")]
34594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34595#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34596pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34597    ptr::read(mem_addr as *const __m512i)
34598}
34599
34600/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34601///
34602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34603#[inline]
34604#[target_feature(enable = "avx512f,avx512vl")]
34605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34606#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34607pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34608    ptr::read(mem_addr as *const __m256i)
34609}
34610
34611/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34612///
34613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34614#[inline]
34615#[target_feature(enable = "avx512f,avx512vl")]
34616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34617#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34618pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34619    ptr::read(mem_addr as *const __m128i)
34620}
34621
34622/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34623///
34624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34625#[inline]
34626#[target_feature(enable = "avx512f")]
34627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34628#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34629pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34630    ptr::write(mem_addr as *mut __m512i, a);
34631}
34632
34633/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34634///
34635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34636#[inline]
34637#[target_feature(enable = "avx512f,avx512vl")]
34638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34639#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34640pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34641    ptr::write(mem_addr as *mut __m256i, a);
34642}
34643
34644/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34645///
34646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34647#[inline]
34648#[target_feature(enable = "avx512f,avx512vl")]
34649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34650#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34651pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34652    ptr::write(mem_addr as *mut __m128i, a);
34653}
34654
34655/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34656///
34657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34658#[inline]
34659#[target_feature(enable = "avx512f")]
34660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34661#[cfg_attr(test, assert_instr(vmovaps))]
34662pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34663    ptr::read(mem_addr as *const __m512)
34664}
34665
34666/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34667///
34668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34669#[inline]
34670#[target_feature(enable = "avx512f")]
34671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34672#[cfg_attr(test, assert_instr(vmovaps))]
34673pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34674    ptr::write(mem_addr as *mut __m512, a);
34675}
34676
34677/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34678///
34679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34680#[inline]
34681#[target_feature(enable = "avx512f")]
34682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34683#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
34684pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34685    ptr::read(mem_addr as *const __m512d)
34686}
34687
34688/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34689///
34690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34691#[inline]
34692#[target_feature(enable = "avx512f")]
34693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34694#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
34695pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34696    ptr::write(mem_addr as *mut __m512d, a);
34697}
34698
34699/// Load packed 32-bit integers from memory into dst using writemask k
34700/// (elements are copied from src when the corresponding mask bit is not set).
34701/// mem_addr does not need to be aligned on any particular boundary.
34702///
34703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34704#[inline]
34705#[target_feature(enable = "avx512f")]
34706#[cfg_attr(test, assert_instr(vmovdqu32))]
34707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34708pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34709    transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
34710}
34711
34712/// Load packed 32-bit integers from memory into dst using zeromask k
34713/// (elements are zeroed out when the corresponding mask bit is not set).
34714/// mem_addr does not need to be aligned on any particular boundary.
34715///
34716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34717#[inline]
34718#[target_feature(enable = "avx512f")]
34719#[cfg_attr(test, assert_instr(vmovdqu32))]
34720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34721pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34722    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
34723}
34724
34725/// Load packed 64-bit integers from memory into dst using writemask k
34726/// (elements are copied from src when the corresponding mask bit is not set).
34727/// mem_addr does not need to be aligned on any particular boundary.
34728///
34729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34730#[inline]
34731#[target_feature(enable = "avx512f")]
34732#[cfg_attr(test, assert_instr(vmovdqu64))]
34733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34734pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34735    transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
34736}
34737
34738/// Load packed 64-bit integers from memory into dst using zeromask k
34739/// (elements are zeroed out when the corresponding mask bit is not set).
34740/// mem_addr does not need to be aligned on any particular boundary.
34741///
34742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34743#[inline]
34744#[target_feature(enable = "avx512f")]
34745#[cfg_attr(test, assert_instr(vmovdqu64))]
34746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34747pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34748    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
34749}
34750
34751/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34752/// (elements are copied from src when the corresponding mask bit is not set).
34753/// mem_addr does not need to be aligned on any particular boundary.
34754///
34755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34756#[inline]
34757#[target_feature(enable = "avx512f")]
34758#[cfg_attr(test, assert_instr(vmovups))]
34759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34760pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34761    transmute(loadups_512(mem_addr, src.as_f32x16(), k))
34762}
34763
34764/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34765/// (elements are zeroed out when the corresponding mask bit is not set).
34766/// mem_addr does not need to be aligned on any particular boundary.
34767///
34768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34769#[inline]
34770#[target_feature(enable = "avx512f")]
34771#[cfg_attr(test, assert_instr(vmovups))]
34772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34773pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34774    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
34775}
34776
34777/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34778/// (elements are copied from src when the corresponding mask bit is not set).
34779/// mem_addr does not need to be aligned on any particular boundary.
34780///
34781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34782#[inline]
34783#[target_feature(enable = "avx512f")]
34784#[cfg_attr(test, assert_instr(vmovupd))]
34785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34786pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34787    transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
34788}
34789
34790/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34791/// (elements are zeroed out when the corresponding mask bit is not set).
34792/// mem_addr does not need to be aligned on any particular boundary.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[cfg_attr(test, assert_instr(vmovupd))]
34798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34799pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34800    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
34801}
34802
34803/// Load packed 32-bit integers from memory into dst using writemask k
34804/// (elements are copied from src when the corresponding mask bit is not set).
34805/// mem_addr does not need to be aligned on any particular boundary.
34806///
34807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34808#[inline]
34809#[target_feature(enable = "avx512f,avx512vl")]
34810#[cfg_attr(test, assert_instr(vmovdqu32))]
34811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34812pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34813    transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
34814}
34815
34816/// Load packed 32-bit integers from memory into dst using zeromask k
34817/// (elements are zeroed out when the corresponding mask bit is not set).
34818/// mem_addr does not need to be aligned on any particular boundary.
34819///
34820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34821#[inline]
34822#[target_feature(enable = "avx512f,avx512vl")]
34823#[cfg_attr(test, assert_instr(vmovdqu32))]
34824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34825pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34826    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
34827}
34828
34829/// Load packed 64-bit integers from memory into dst using writemask k
34830/// (elements are copied from src when the corresponding mask bit is not set).
34831/// mem_addr does not need to be aligned on any particular boundary.
34832///
34833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34834#[inline]
34835#[target_feature(enable = "avx512f,avx512vl")]
34836#[cfg_attr(test, assert_instr(vmovdqu64))]
34837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34838pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34839    transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
34840}
34841
34842/// Load packed 64-bit integers from memory into dst using zeromask k
34843/// (elements are zeroed out when the corresponding mask bit is not set).
34844/// mem_addr does not need to be aligned on any particular boundary.
34845///
34846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34847#[inline]
34848#[target_feature(enable = "avx512f,avx512vl")]
34849#[cfg_attr(test, assert_instr(vmovdqu64))]
34850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34851pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34852    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
34853}
34854
34855/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34856/// (elements are copied from src when the corresponding mask bit is not set).
34857/// mem_addr does not need to be aligned on any particular boundary.
34858///
34859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34860#[inline]
34861#[target_feature(enable = "avx512f,avx512vl")]
34862#[cfg_attr(test, assert_instr(vmovups))]
34863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34864pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34865    transmute(loadups_256(mem_addr, src.as_f32x8(), k))
34866}
34867
34868/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34869/// (elements are zeroed out when the corresponding mask bit is not set).
34870/// mem_addr does not need to be aligned on any particular boundary.
34871///
34872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34873#[inline]
34874#[target_feature(enable = "avx512f,avx512vl")]
34875#[cfg_attr(test, assert_instr(vmovups))]
34876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34877pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34878    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
34879}
34880
34881/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34882/// (elements are copied from src when the corresponding mask bit is not set).
34883/// mem_addr does not need to be aligned on any particular boundary.
34884///
34885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34886#[inline]
34887#[target_feature(enable = "avx512f,avx512vl")]
34888#[cfg_attr(test, assert_instr(vmovupd))]
34889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34890pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34891    transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
34892}
34893
34894/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34895/// (elements are zeroed out when the corresponding mask bit is not set).
34896/// mem_addr does not need to be aligned on any particular boundary.
34897///
34898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34899#[inline]
34900#[target_feature(enable = "avx512f,avx512vl")]
34901#[cfg_attr(test, assert_instr(vmovupd))]
34902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34903pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34904    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
34905}
34906
34907/// Load packed 32-bit integers from memory into dst using writemask k
34908/// (elements are copied from src when the corresponding mask bit is not set).
34909/// mem_addr does not need to be aligned on any particular boundary.
34910///
34911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34912#[inline]
34913#[target_feature(enable = "avx512f,avx512vl")]
34914#[cfg_attr(test, assert_instr(vmovdqu32))]
34915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34916pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34917    transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
34918}
34919
34920/// Load packed 32-bit integers from memory into dst using zeromask k
34921/// (elements are zeroed out when the corresponding mask bit is not set).
34922/// mem_addr does not need to be aligned on any particular boundary.
34923///
34924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
34925#[inline]
34926#[target_feature(enable = "avx512f,avx512vl")]
34927#[cfg_attr(test, assert_instr(vmovdqu32))]
34928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34929pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34930    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
34931}
34932
34933/// Load packed 64-bit integers from memory into dst using writemask k
34934/// (elements are copied from src when the corresponding mask bit is not set).
34935/// mem_addr does not need to be aligned on any particular boundary.
34936///
34937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
34938#[inline]
34939#[target_feature(enable = "avx512f,avx512vl")]
34940#[cfg_attr(test, assert_instr(vmovdqu64))]
34941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34942pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
34943    transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
34944}
34945
34946/// Load packed 64-bit integers from memory into dst using zeromask k
34947/// (elements are zeroed out when the corresponding mask bit is not set).
34948/// mem_addr does not need to be aligned on any particular boundary.
34949///
34950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
34951#[inline]
34952#[target_feature(enable = "avx512f,avx512vl")]
34953#[cfg_attr(test, assert_instr(vmovdqu64))]
34954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34955pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
34956    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
34957}
34958
34959/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34960/// (elements are copied from src when the corresponding mask bit is not set).
34961/// mem_addr does not need to be aligned on any particular boundary.
34962///
34963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
34964#[inline]
34965#[target_feature(enable = "avx512f,avx512vl")]
34966#[cfg_attr(test, assert_instr(vmovups))]
34967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34968pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
34969    transmute(loadups_128(mem_addr, src.as_f32x4(), k))
34970}
34971
34972/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34973/// (elements are zeroed out when the corresponding mask bit is not set).
34974/// mem_addr does not need to be aligned on any particular boundary.
34975///
34976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
34977#[inline]
34978#[target_feature(enable = "avx512f,avx512vl")]
34979#[cfg_attr(test, assert_instr(vmovups))]
34980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34981pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
34982    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
34983}
34984
34985/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34986/// (elements are copied from src when the corresponding mask bit is not set).
34987/// mem_addr does not need to be aligned on any particular boundary.
34988///
34989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
34990#[inline]
34991#[target_feature(enable = "avx512f,avx512vl")]
34992#[cfg_attr(test, assert_instr(vmovupd))]
34993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34994pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
34995    transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
34996}
34997
34998/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34999/// (elements are zeroed out when the corresponding mask bit is not set).
35000/// mem_addr does not need to be aligned on any particular boundary.
35001///
35002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35003#[inline]
35004#[target_feature(enable = "avx512f,avx512vl")]
35005#[cfg_attr(test, assert_instr(vmovupd))]
35006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35007pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35008    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
35009}
35010
35011/// Load packed 32-bit integers from memory into dst using writemask k
35012/// (elements are copied from src when the corresponding mask bit is not set).
35013/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35014///
35015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35016#[inline]
35017#[target_feature(enable = "avx512f")]
35018#[cfg_attr(test, assert_instr(vmovdqa32))]
35019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35020pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35021    transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
35022}
35023
35024/// Load packed 32-bit integers from memory into dst using zeromask k
35025/// (elements are zeroed out when the corresponding mask bit is not set).
35026/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35027///
35028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35029#[inline]
35030#[target_feature(enable = "avx512f")]
35031#[cfg_attr(test, assert_instr(vmovdqa32))]
35032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35033pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35034    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
35035}
35036
35037/// Load packed 64-bit integers from memory into dst using writemask k
35038/// (elements are copied from src when the corresponding mask bit is not set).
35039/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35040///
35041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35042#[inline]
35043#[target_feature(enable = "avx512f")]
35044#[cfg_attr(test, assert_instr(vmovdqa64))]
35045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35046pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35047    transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
35048}
35049
35050/// Load packed 64-bit integers from memory into dst using zeromask k
35051/// (elements are zeroed out when the corresponding mask bit is not set).
35052/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35053///
35054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35055#[inline]
35056#[target_feature(enable = "avx512f")]
35057#[cfg_attr(test, assert_instr(vmovdqa64))]
35058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35059pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35060    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
35061}
35062
35063/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35064/// (elements are copied from src when the corresponding mask bit is not set).
35065/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35066///
35067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35068#[inline]
35069#[target_feature(enable = "avx512f")]
35070#[cfg_attr(test, assert_instr(vmovaps))]
35071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35072pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35073    transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
35074}
35075
35076/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35077/// (elements are zeroed out when the corresponding mask bit is not set).
35078/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35079///
35080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35081#[inline]
35082#[target_feature(enable = "avx512f")]
35083#[cfg_attr(test, assert_instr(vmovaps))]
35084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35085pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35086    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
35087}
35088
35089/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35090/// (elements are copied from src when the corresponding mask bit is not set).
35091/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35092///
35093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35094#[inline]
35095#[target_feature(enable = "avx512f")]
35096#[cfg_attr(test, assert_instr(vmovapd))]
35097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35098pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35099    transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
35100}
35101
35102/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35103/// (elements are zeroed out when the corresponding mask bit is not set).
35104/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35105///
35106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35107#[inline]
35108#[target_feature(enable = "avx512f")]
35109#[cfg_attr(test, assert_instr(vmovapd))]
35110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35111pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35112    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
35113}
35114
35115/// Load packed 32-bit integers from memory into dst using writemask k
35116/// (elements are copied from src when the corresponding mask bit is not set).
35117/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35118///
35119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35120#[inline]
35121#[target_feature(enable = "avx512f,avx512vl")]
35122#[cfg_attr(test, assert_instr(vmovdqa32))]
35123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35124pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35125    transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
35126}
35127
35128/// Load packed 32-bit integers from memory into dst using zeromask k
35129/// (elements are zeroed out when the corresponding mask bit is not set).
35130/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35131///
35132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35133#[inline]
35134#[target_feature(enable = "avx512f,avx512vl")]
35135#[cfg_attr(test, assert_instr(vmovdqa32))]
35136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35137pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35138    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
35139}
35140
35141/// Load packed 64-bit integers from memory into dst using writemask k
35142/// (elements are copied from src when the corresponding mask bit is not set).
35143/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35144///
35145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35146#[inline]
35147#[target_feature(enable = "avx512f,avx512vl")]
35148#[cfg_attr(test, assert_instr(vmovdqa64))]
35149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35150pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35151    transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
35152}
35153
35154/// Load packed 64-bit integers from memory into dst using zeromask k
35155/// (elements are zeroed out when the corresponding mask bit is not set).
35156/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35157///
35158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35159#[inline]
35160#[target_feature(enable = "avx512f,avx512vl")]
35161#[cfg_attr(test, assert_instr(vmovdqa64))]
35162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35163pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35164    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
35165}
35166
35167/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35168/// (elements are copied from src when the corresponding mask bit is not set).
35169/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35170///
35171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35172#[inline]
35173#[target_feature(enable = "avx512f,avx512vl")]
35174#[cfg_attr(test, assert_instr(vmovaps))]
35175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35176pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35177    transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
35178}
35179
35180/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35181/// (elements are zeroed out when the corresponding mask bit is not set).
35182/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35183///
35184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35185#[inline]
35186#[target_feature(enable = "avx512f,avx512vl")]
35187#[cfg_attr(test, assert_instr(vmovaps))]
35188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35189pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35190    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
35191}
35192
35193/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35194/// (elements are copied from src when the corresponding mask bit is not set).
35195/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35196///
35197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35198#[inline]
35199#[target_feature(enable = "avx512f,avx512vl")]
35200#[cfg_attr(test, assert_instr(vmovapd))]
35201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35202pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35203    transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
35204}
35205
35206/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35207/// (elements are zeroed out when the corresponding mask bit is not set).
35208/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35209///
35210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35211#[inline]
35212#[target_feature(enable = "avx512f,avx512vl")]
35213#[cfg_attr(test, assert_instr(vmovapd))]
35214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35215pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35216    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
35217}
35218
35219/// Load packed 32-bit integers from memory into dst using writemask k
35220/// (elements are copied from src when the corresponding mask bit is not set).
35221/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35222///
35223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35224#[inline]
35225#[target_feature(enable = "avx512f,avx512vl")]
35226#[cfg_attr(test, assert_instr(vmovdqa32))]
35227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35228pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35229    transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
35230}
35231
35232/// Load packed 32-bit integers from memory into dst using zeromask k
35233/// (elements are zeroed out when the corresponding mask bit is not set).
35234/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35235///
35236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35237#[inline]
35238#[target_feature(enable = "avx512f,avx512vl")]
35239#[cfg_attr(test, assert_instr(vmovdqa32))]
35240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35241pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35242    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
35243}
35244
35245/// Load packed 64-bit integers from memory into dst using writemask k
35246/// (elements are copied from src when the corresponding mask bit is not set).
35247/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35248///
35249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35250#[inline]
35251#[target_feature(enable = "avx512f,avx512vl")]
35252#[cfg_attr(test, assert_instr(vmovdqa64))]
35253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35254pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35255    transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
35256}
35257
35258/// Load packed 64-bit integers from memory into dst using zeromask k
35259/// (elements are zeroed out when the corresponding mask bit is not set).
35260/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35261///
35262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35263#[inline]
35264#[target_feature(enable = "avx512f,avx512vl")]
35265#[cfg_attr(test, assert_instr(vmovdqa64))]
35266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35267pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35268    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
35269}
35270
35271/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35272/// (elements are copied from src when the corresponding mask bit is not set).
35273/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35274///
35275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35276#[inline]
35277#[target_feature(enable = "avx512f,avx512vl")]
35278#[cfg_attr(test, assert_instr(vmovaps))]
35279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35280pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35281    transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
35282}
35283
35284/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35285/// (elements are zeroed out when the corresponding mask bit is not set).
35286/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35287///
35288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35289#[inline]
35290#[target_feature(enable = "avx512f,avx512vl")]
35291#[cfg_attr(test, assert_instr(vmovaps))]
35292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35293pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35294    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
35295}
35296
35297/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35298/// (elements are copied from src when the corresponding mask bit is not set).
35299/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35300///
35301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35302#[inline]
35303#[target_feature(enable = "avx512f,avx512vl")]
35304#[cfg_attr(test, assert_instr(vmovapd))]
35305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35306pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35307    transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
35308}
35309
35310/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35311/// (elements are zeroed out when the corresponding mask bit is not set).
35312/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35313///
35314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35315#[inline]
35316#[target_feature(enable = "avx512f,avx512vl")]
35317#[cfg_attr(test, assert_instr(vmovapd))]
35318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35319pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35320    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
35321}
35322
35323/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35324/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35325/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35326/// exception may be generated.
35327///
35328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35329#[inline]
35330#[cfg_attr(test, assert_instr(vmovss))]
35331#[target_feature(enable = "avx512f")]
35332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35333pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35334    let mut dst: __m128 = src;
35335    asm!(
35336        vpl!("vmovss {dst}{{{k}}}"),
35337        p = in(reg) mem_addr,
35338        k = in(kreg) k,
35339        dst = inout(xmm_reg) dst,
35340        options(pure, readonly, nostack, preserves_flags),
35341    );
35342    dst
35343}
35344
35345/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35346/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35347/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35348/// exception may be generated.
35349///
35350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35351#[inline]
35352#[cfg_attr(test, assert_instr(vmovss))]
35353#[target_feature(enable = "avx512f")]
35354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35355pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35356    let mut dst: __m128;
35357    asm!(
35358        vpl!("vmovss {dst}{{{k}}} {{z}}"),
35359        p = in(reg) mem_addr,
35360        k = in(kreg) k,
35361        dst = out(xmm_reg) dst,
35362        options(pure, readonly, nostack, preserves_flags),
35363    );
35364    dst
35365}
35366
35367/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35368/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35369/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35370/// exception may be generated.
35371///
35372/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35373#[inline]
35374#[cfg_attr(test, assert_instr(vmovsd))]
35375#[target_feature(enable = "avx512f")]
35376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35377pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35378    let mut dst: __m128d = src;
35379    asm!(
35380        vpl!("vmovsd {dst}{{{k}}}"),
35381        p = in(reg) mem_addr,
35382        k = in(kreg) k,
35383        dst = inout(xmm_reg) dst,
35384        options(pure, readonly, nostack, preserves_flags),
35385    );
35386    dst
35387}
35388
35389/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35390/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35391/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35392/// may be generated.
35393///
35394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35395#[inline]
35396#[cfg_attr(test, assert_instr(vmovsd))]
35397#[target_feature(enable = "avx512f")]
35398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35399pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35400    let mut dst: __m128d;
35401    asm!(
35402        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35403        p = in(reg) mem_addr,
35404        k = in(kreg) k,
35405        dst = out(xmm_reg) dst,
35406        options(pure, readonly, nostack, preserves_flags),
35407    );
35408    dst
35409}
35410
35411/// Store packed 32-bit integers from a into memory using writemask k.
35412/// mem_addr does not need to be aligned on any particular boundary.
35413///
35414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35415#[inline]
35416#[target_feature(enable = "avx512f")]
35417#[cfg_attr(test, assert_instr(vmovdqu32))]
35418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35419pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35420    storedqu32_512(mem_addr, a.as_i32x16(), mask)
35421}
35422
35423/// Store packed 64-bit integers from a into memory using writemask k.
35424/// mem_addr does not need to be aligned on any particular boundary.
35425///
35426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35427#[inline]
35428#[target_feature(enable = "avx512f")]
35429#[cfg_attr(test, assert_instr(vmovdqu64))]
35430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35431pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35432    storedqu64_512(mem_addr, a.as_i64x8(), mask)
35433}
35434
35435/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35436/// mem_addr does not need to be aligned on any particular boundary.
35437///
35438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35439#[inline]
35440#[target_feature(enable = "avx512f")]
35441#[cfg_attr(test, assert_instr(vmovups))]
35442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35443pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35444    storeups_512(mem_addr, a.as_f32x16(), mask)
35445}
35446
35447/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35448/// mem_addr does not need to be aligned on any particular boundary.
35449///
35450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35451#[inline]
35452#[target_feature(enable = "avx512f")]
35453#[cfg_attr(test, assert_instr(vmovupd))]
35454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35455pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35456    storeupd_512(mem_addr, a.as_f64x8(), mask)
35457}
35458
35459/// Store packed 32-bit integers from a into memory using writemask k.
35460/// mem_addr does not need to be aligned on any particular boundary.
35461///
35462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35463#[inline]
35464#[target_feature(enable = "avx512f,avx512vl")]
35465#[cfg_attr(test, assert_instr(vmovdqu32))]
35466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35467pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35468    storedqu32_256(mem_addr, a.as_i32x8(), mask)
35469}
35470
35471/// Store packed 64-bit integers from a into memory using writemask k.
35472/// mem_addr does not need to be aligned on any particular boundary.
35473///
35474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35475#[inline]
35476#[target_feature(enable = "avx512f,avx512vl")]
35477#[cfg_attr(test, assert_instr(vmovdqu64))]
35478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35479pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35480    storedqu64_256(mem_addr, a.as_i64x4(), mask)
35481}
35482
35483/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35484/// mem_addr does not need to be aligned on any particular boundary.
35485///
35486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35487#[inline]
35488#[target_feature(enable = "avx512f,avx512vl")]
35489#[cfg_attr(test, assert_instr(vmovups))]
35490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35491pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35492    storeups_256(mem_addr, a.as_f32x8(), mask)
35493}
35494
35495/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35496/// mem_addr does not need to be aligned on any particular boundary.
35497///
35498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35499#[inline]
35500#[target_feature(enable = "avx512f,avx512vl")]
35501#[cfg_attr(test, assert_instr(vmovupd))]
35502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35503pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35504    storeupd_256(mem_addr, a.as_f64x4(), mask)
35505}
35506
35507/// Store packed 32-bit integers from a into memory using writemask k.
35508/// mem_addr does not need to be aligned on any particular boundary.
35509///
35510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35511#[inline]
35512#[target_feature(enable = "avx512f,avx512vl")]
35513#[cfg_attr(test, assert_instr(vmovdqu32))]
35514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35515pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35516    storedqu32_128(mem_addr, a.as_i32x4(), mask)
35517}
35518
35519/// Store packed 64-bit integers from a into memory using writemask k.
35520/// mem_addr does not need to be aligned on any particular boundary.
35521///
35522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35523#[inline]
35524#[target_feature(enable = "avx512f,avx512vl")]
35525#[cfg_attr(test, assert_instr(vmovdqu64))]
35526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35527pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35528    storedqu64_128(mem_addr, a.as_i64x2(), mask)
35529}
35530
35531/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35532/// mem_addr does not need to be aligned on any particular boundary.
35533///
35534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35535#[inline]
35536#[target_feature(enable = "avx512f,avx512vl")]
35537#[cfg_attr(test, assert_instr(vmovups))]
35538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35539pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35540    storeups_128(mem_addr, a.as_f32x4(), mask)
35541}
35542
35543/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35544/// mem_addr does not need to be aligned on any particular boundary.
35545///
35546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35547#[inline]
35548#[target_feature(enable = "avx512f,avx512vl")]
35549#[cfg_attr(test, assert_instr(vmovupd))]
35550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35551pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35552    storeupd_128(mem_addr, a.as_f64x2(), mask)
35553}
35554
35555/// Store packed 32-bit integers from a into memory using writemask k.
35556/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35557///
35558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35559#[inline]
35560#[target_feature(enable = "avx512f")]
35561#[cfg_attr(test, assert_instr(vmovdqa32))]
35562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35563pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35564    storedqa32_512(mem_addr, a.as_i32x16(), mask)
35565}
35566
35567/// Store packed 64-bit integers from a into memory using writemask k.
35568/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35569///
35570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35571#[inline]
35572#[target_feature(enable = "avx512f")]
35573#[cfg_attr(test, assert_instr(vmovdqa64))]
35574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35575pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35576    storedqa64_512(mem_addr, a.as_i64x8(), mask)
35577}
35578
35579/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35580/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35581///
35582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35583#[inline]
35584#[target_feature(enable = "avx512f")]
35585#[cfg_attr(test, assert_instr(vmovaps))]
35586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35587pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35588    storeaps_512(mem_addr, a.as_f32x16(), mask)
35589}
35590
35591/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35592/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35593///
35594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35595#[inline]
35596#[target_feature(enable = "avx512f")]
35597#[cfg_attr(test, assert_instr(vmovapd))]
35598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35599pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35600    storeapd_512(mem_addr, a.as_f64x8(), mask)
35601}
35602
35603/// Store packed 32-bit integers from a into memory using writemask k.
35604/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35605///
35606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35607#[inline]
35608#[target_feature(enable = "avx512f,avx512vl")]
35609#[cfg_attr(test, assert_instr(vmovdqa32))]
35610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35611pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35612    storedqa32_256(mem_addr, a.as_i32x8(), mask)
35613}
35614
35615/// Store packed 64-bit integers from a into memory using writemask k.
35616/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35617///
35618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35619#[inline]
35620#[target_feature(enable = "avx512f,avx512vl")]
35621#[cfg_attr(test, assert_instr(vmovdqa64))]
35622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35623pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35624    storedqa64_256(mem_addr, a.as_i64x4(), mask)
35625}
35626
35627/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35628/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35629///
35630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35631#[inline]
35632#[target_feature(enable = "avx512f,avx512vl")]
35633#[cfg_attr(test, assert_instr(vmovaps))]
35634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35635pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35636    storeaps_256(mem_addr, a.as_f32x8(), mask)
35637}
35638
35639/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35640/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35641///
35642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35643#[inline]
35644#[target_feature(enable = "avx512f,avx512vl")]
35645#[cfg_attr(test, assert_instr(vmovapd))]
35646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35647pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35648    storeapd_256(mem_addr, a.as_f64x4(), mask)
35649}
35650
35651/// Store packed 32-bit integers from a into memory using writemask k.
35652/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35653///
35654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35655#[inline]
35656#[target_feature(enable = "avx512f,avx512vl")]
35657#[cfg_attr(test, assert_instr(vmovdqa32))]
35658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35659pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35660    storedqa32_128(mem_addr, a.as_i32x4(), mask)
35661}
35662
35663/// Store packed 64-bit integers from a into memory using writemask k.
35664/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35665///
35666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35667#[inline]
35668#[target_feature(enable = "avx512f,avx512vl")]
35669#[cfg_attr(test, assert_instr(vmovdqa64))]
35670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35671pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35672    storedqa64_128(mem_addr, a.as_i64x2(), mask)
35673}
35674
35675/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35676/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35677///
35678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35679#[inline]
35680#[target_feature(enable = "avx512f,avx512vl")]
35681#[cfg_attr(test, assert_instr(vmovaps))]
35682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35683pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35684    storeaps_128(mem_addr, a.as_f32x4(), mask)
35685}
35686
35687/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35688/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35689///
35690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35691#[inline]
35692#[target_feature(enable = "avx512f,avx512vl")]
35693#[cfg_attr(test, assert_instr(vmovapd))]
35694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35695pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35696    storeapd_128(mem_addr, a.as_f64x2(), mask)
35697}
35698
35699/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35700/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35701///
35702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35703#[inline]
35704#[cfg_attr(test, assert_instr(vmovss))]
35705#[target_feature(enable = "avx512f")]
35706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35707pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35708    asm!(
35709        vps!("vmovss", "{{{k}}}, {a}"),
35710        p = in(reg) mem_addr,
35711        k = in(kreg) k,
35712        a = in(xmm_reg) a,
35713        options(nostack, preserves_flags),
35714    );
35715}
35716
35717/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35718/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35719///
35720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35721#[inline]
35722#[cfg_attr(test, assert_instr(vmovsd))]
35723#[target_feature(enable = "avx512f")]
35724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35725pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35726    asm!(
35727        vps!("vmovsd", "{{{k}}}, {a}"),
35728        p = in(reg) mem_addr,
35729        k = in(kreg) k,
35730        a = in(xmm_reg) a,
35731        options(nostack, preserves_flags),
35732    );
35733}
35734
35735/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35736///
35737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35738#[inline]
35739#[target_feature(enable = "avx512f")]
35740#[cfg_attr(test, assert_instr(vpexpandd))]
35741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35742pub unsafe fn _mm512_mask_expandloadu_epi32(
35743    src: __m512i,
35744    k: __mmask16,
35745    mem_addr: *const i32,
35746) -> __m512i {
35747    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
35748}
35749
35750/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35751///
35752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35753#[inline]
35754#[target_feature(enable = "avx512f")]
35755#[cfg_attr(test, assert_instr(vpexpandd))]
35756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35757pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35758    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
35759}
35760
35761/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35762///
35763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35764#[inline]
35765#[target_feature(enable = "avx512f,avx512vl")]
35766#[cfg_attr(test, assert_instr(vpexpandd))]
35767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35768pub unsafe fn _mm256_mask_expandloadu_epi32(
35769    src: __m256i,
35770    k: __mmask8,
35771    mem_addr: *const i32,
35772) -> __m256i {
35773    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
35774}
35775
35776/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35777///
35778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35779#[inline]
35780#[target_feature(enable = "avx512f,avx512vl")]
35781#[cfg_attr(test, assert_instr(vpexpandd))]
35782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35783pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35784    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
35785}
35786
35787/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35788///
35789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35790#[inline]
35791#[target_feature(enable = "avx512f,avx512vl")]
35792#[cfg_attr(test, assert_instr(vpexpandd))]
35793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35794pub unsafe fn _mm_mask_expandloadu_epi32(
35795    src: __m128i,
35796    k: __mmask8,
35797    mem_addr: *const i32,
35798) -> __m128i {
35799    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
35800}
35801
35802/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35803///
35804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35805#[inline]
35806#[target_feature(enable = "avx512f,avx512vl")]
35807#[cfg_attr(test, assert_instr(vpexpandd))]
35808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35809pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35810    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
35811}
35812
35813/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35814///
35815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35816#[inline]
35817#[target_feature(enable = "avx512f")]
35818#[cfg_attr(test, assert_instr(vpexpandq))]
35819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35820pub unsafe fn _mm512_mask_expandloadu_epi64(
35821    src: __m512i,
35822    k: __mmask8,
35823    mem_addr: *const i64,
35824) -> __m512i {
35825    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
35826}
35827
35828/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35831#[inline]
35832#[target_feature(enable = "avx512f")]
35833#[cfg_attr(test, assert_instr(vpexpandq))]
35834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35835pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35836    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
35837}
35838
35839/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35840///
35841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35842#[inline]
35843#[target_feature(enable = "avx512f,avx512vl")]
35844#[cfg_attr(test, assert_instr(vpexpandq))]
35845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35846pub unsafe fn _mm256_mask_expandloadu_epi64(
35847    src: __m256i,
35848    k: __mmask8,
35849    mem_addr: *const i64,
35850) -> __m256i {
35851    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
35852}
35853
35854/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35855///
35856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35857#[inline]
35858#[target_feature(enable = "avx512f,avx512vl")]
35859#[cfg_attr(test, assert_instr(vpexpandq))]
35860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35861pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35862    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
35863}
35864
35865/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35866///
35867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35868#[inline]
35869#[target_feature(enable = "avx512f,avx512vl")]
35870#[cfg_attr(test, assert_instr(vpexpandq))]
35871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35872pub unsafe fn _mm_mask_expandloadu_epi64(
35873    src: __m128i,
35874    k: __mmask8,
35875    mem_addr: *const i64,
35876) -> __m128i {
35877    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
35878}
35879
35880/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35881///
35882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35883#[inline]
35884#[target_feature(enable = "avx512f,avx512vl")]
35885#[cfg_attr(test, assert_instr(vpexpandq))]
35886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35887pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35888    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
35889}
35890
35891/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35892///
35893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35894#[inline]
35895#[target_feature(enable = "avx512f")]
35896#[cfg_attr(test, assert_instr(vexpandps))]
35897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35898pub unsafe fn _mm512_mask_expandloadu_ps(
35899    src: __m512,
35900    k: __mmask16,
35901    mem_addr: *const f32,
35902) -> __m512 {
35903    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
35904}
35905
35906/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35907///
35908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
35909#[inline]
35910#[target_feature(enable = "avx512f")]
35911#[cfg_attr(test, assert_instr(vexpandps))]
35912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35913pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35914    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
35915}
35916
35917/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35918///
35919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
35920#[inline]
35921#[target_feature(enable = "avx512f,avx512vl")]
35922#[cfg_attr(test, assert_instr(vexpandps))]
35923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35924pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35925    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
35926}
35927
35928/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35929///
35930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
35931#[inline]
35932#[target_feature(enable = "avx512f,avx512vl")]
35933#[cfg_attr(test, assert_instr(vexpandps))]
35934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35935pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35936    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
35937}
35938
35939/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35940///
35941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
35942#[inline]
35943#[target_feature(enable = "avx512f,avx512vl")]
35944#[cfg_attr(test, assert_instr(vexpandps))]
35945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35946pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35947    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
35948}
35949
35950/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35951///
35952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
35953#[inline]
35954#[target_feature(enable = "avx512f,avx512vl")]
35955#[cfg_attr(test, assert_instr(vexpandps))]
35956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35957pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35958    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
35959}
35960
35961/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35962///
35963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
35964#[inline]
35965#[target_feature(enable = "avx512f")]
35966#[cfg_attr(test, assert_instr(vexpandpd))]
35967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35968pub unsafe fn _mm512_mask_expandloadu_pd(
35969    src: __m512d,
35970    k: __mmask8,
35971    mem_addr: *const f64,
35972) -> __m512d {
35973    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
35974}
35975
35976/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35977///
35978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
35979#[inline]
35980#[target_feature(enable = "avx512f")]
35981#[cfg_attr(test, assert_instr(vexpandpd))]
35982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35983pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35984    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
35985}
35986
35987/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35988///
35989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
35990#[inline]
35991#[target_feature(enable = "avx512f,avx512vl")]
35992#[cfg_attr(test, assert_instr(vexpandpd))]
35993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35994pub unsafe fn _mm256_mask_expandloadu_pd(
35995    src: __m256d,
35996    k: __mmask8,
35997    mem_addr: *const f64,
35998) -> __m256d {
35999    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
36000}
36001
36002/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36003///
36004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36005#[inline]
36006#[target_feature(enable = "avx512f,avx512vl")]
36007#[cfg_attr(test, assert_instr(vexpandpd))]
36008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36009pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36010    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
36011}
36012
36013/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36014///
36015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36016#[inline]
36017#[target_feature(enable = "avx512f,avx512vl")]
36018#[cfg_attr(test, assert_instr(vexpandpd))]
36019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36020pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36021    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
36022}
36023
36024/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36025///
36026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36027#[inline]
36028#[target_feature(enable = "avx512f,avx512vl")]
36029#[cfg_attr(test, assert_instr(vexpandpd))]
36030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36031pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36032    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
36033}
36034
36035/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36036///
36037/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36038#[inline]
36039#[target_feature(enable = "avx512f")]
36040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36041pub fn _mm512_setr_pd(
36042    e0: f64,
36043    e1: f64,
36044    e2: f64,
36045    e3: f64,
36046    e4: f64,
36047    e5: f64,
36048    e6: f64,
36049    e7: f64,
36050) -> __m512d {
36051    unsafe {
36052        let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
36053        transmute(r)
36054    }
36055}
36056
36057/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36058///
36059/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36060#[inline]
36061#[target_feature(enable = "avx512f")]
36062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36063pub fn _mm512_set_pd(
36064    e0: f64,
36065    e1: f64,
36066    e2: f64,
36067    e3: f64,
36068    e4: f64,
36069    e5: f64,
36070    e6: f64,
36071    e7: f64,
36072) -> __m512d {
36073    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
36074}
36075
36076/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36077///
36078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36079#[inline]
36080#[target_feature(enable = "avx512f")]
36081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36082#[cfg_attr(test, assert_instr(vmovss))]
36083pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36084    unsafe {
36085        let extractsrc: f32 = simd_extract!(src, 0);
36086        let mut mov: f32 = extractsrc;
36087        if (k & 0b00000001) != 0 {
36088            mov = simd_extract!(b, 0);
36089        }
36090        simd_insert!(a, 0, mov)
36091    }
36092}
36093
36094/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36095///
36096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36097#[inline]
36098#[target_feature(enable = "avx512f")]
36099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36100#[cfg_attr(test, assert_instr(vmovss))]
36101pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36102    unsafe {
36103        let mut mov: f32 = 0.;
36104        if (k & 0b00000001) != 0 {
36105            mov = simd_extract!(b, 0);
36106        }
36107        simd_insert!(a, 0, mov)
36108    }
36109}
36110
36111/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36112///
36113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36114#[inline]
36115#[target_feature(enable = "avx512f")]
36116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36117#[cfg_attr(test, assert_instr(vmovsd))]
36118pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36119    unsafe {
36120        let extractsrc: f64 = simd_extract!(src, 0);
36121        let mut mov: f64 = extractsrc;
36122        if (k & 0b00000001) != 0 {
36123            mov = simd_extract!(b, 0);
36124        }
36125        simd_insert!(a, 0, mov)
36126    }
36127}
36128
36129/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36130///
36131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36132#[inline]
36133#[target_feature(enable = "avx512f")]
36134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36135#[cfg_attr(test, assert_instr(vmovsd))]
36136pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36137    unsafe {
36138        let mut mov: f64 = 0.;
36139        if (k & 0b00000001) != 0 {
36140            mov = simd_extract!(b, 0);
36141        }
36142        simd_insert!(a, 0, mov)
36143    }
36144}
36145
36146/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36147///
36148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36149#[inline]
36150#[target_feature(enable = "avx512f")]
36151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36152#[cfg_attr(test, assert_instr(vaddss))]
36153pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36154    unsafe {
36155        let extractsrc: f32 = simd_extract!(src, 0);
36156        let mut add: f32 = extractsrc;
36157        if (k & 0b00000001) != 0 {
36158            let extracta: f32 = simd_extract!(a, 0);
36159            let extractb: f32 = simd_extract!(b, 0);
36160            add = extracta + extractb;
36161        }
36162        simd_insert!(a, 0, add)
36163    }
36164}
36165
36166/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36167///
36168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36169#[inline]
36170#[target_feature(enable = "avx512f")]
36171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36172#[cfg_attr(test, assert_instr(vaddss))]
36173pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36174    unsafe {
36175        let mut add: f32 = 0.;
36176        if (k & 0b00000001) != 0 {
36177            let extracta: f32 = simd_extract!(a, 0);
36178            let extractb: f32 = simd_extract!(b, 0);
36179            add = extracta + extractb;
36180        }
36181        simd_insert!(a, 0, add)
36182    }
36183}
36184
36185/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36186///
36187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36188#[inline]
36189#[target_feature(enable = "avx512f")]
36190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36191#[cfg_attr(test, assert_instr(vaddsd))]
36192pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36193    unsafe {
36194        let extractsrc: f64 = simd_extract!(src, 0);
36195        let mut add: f64 = extractsrc;
36196        if (k & 0b00000001) != 0 {
36197            let extracta: f64 = simd_extract!(a, 0);
36198            let extractb: f64 = simd_extract!(b, 0);
36199            add = extracta + extractb;
36200        }
36201        simd_insert!(a, 0, add)
36202    }
36203}
36204
36205/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36206///
36207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36208#[inline]
36209#[target_feature(enable = "avx512f")]
36210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36211#[cfg_attr(test, assert_instr(vaddsd))]
36212pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36213    unsafe {
36214        let mut add: f64 = 0.;
36215        if (k & 0b00000001) != 0 {
36216            let extracta: f64 = simd_extract!(a, 0);
36217            let extractb: f64 = simd_extract!(b, 0);
36218            add = extracta + extractb;
36219        }
36220        simd_insert!(a, 0, add)
36221    }
36222}
36223
36224/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36225///
36226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36227#[inline]
36228#[target_feature(enable = "avx512f")]
36229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36230#[cfg_attr(test, assert_instr(vsubss))]
36231pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36232    unsafe {
36233        let extractsrc: f32 = simd_extract!(src, 0);
36234        let mut add: f32 = extractsrc;
36235        if (k & 0b00000001) != 0 {
36236            let extracta: f32 = simd_extract!(a, 0);
36237            let extractb: f32 = simd_extract!(b, 0);
36238            add = extracta - extractb;
36239        }
36240        simd_insert!(a, 0, add)
36241    }
36242}
36243
36244/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36245///
36246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36247#[inline]
36248#[target_feature(enable = "avx512f")]
36249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36250#[cfg_attr(test, assert_instr(vsubss))]
36251pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36252    unsafe {
36253        let mut add: f32 = 0.;
36254        if (k & 0b00000001) != 0 {
36255            let extracta: f32 = simd_extract!(a, 0);
36256            let extractb: f32 = simd_extract!(b, 0);
36257            add = extracta - extractb;
36258        }
36259        simd_insert!(a, 0, add)
36260    }
36261}
36262
36263/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36264///
36265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36266#[inline]
36267#[target_feature(enable = "avx512f")]
36268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36269#[cfg_attr(test, assert_instr(vsubsd))]
36270pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36271    unsafe {
36272        let extractsrc: f64 = simd_extract!(src, 0);
36273        let mut add: f64 = extractsrc;
36274        if (k & 0b00000001) != 0 {
36275            let extracta: f64 = simd_extract!(a, 0);
36276            let extractb: f64 = simd_extract!(b, 0);
36277            add = extracta - extractb;
36278        }
36279        simd_insert!(a, 0, add)
36280    }
36281}
36282
36283/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36284///
36285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36286#[inline]
36287#[target_feature(enable = "avx512f")]
36288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36289#[cfg_attr(test, assert_instr(vsubsd))]
36290pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36291    unsafe {
36292        let mut add: f64 = 0.;
36293        if (k & 0b00000001) != 0 {
36294            let extracta: f64 = simd_extract!(a, 0);
36295            let extractb: f64 = simd_extract!(b, 0);
36296            add = extracta - extractb;
36297        }
36298        simd_insert!(a, 0, add)
36299    }
36300}
36301
36302/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36308#[cfg_attr(test, assert_instr(vmulss))]
36309pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36310    unsafe {
36311        let extractsrc: f32 = simd_extract!(src, 0);
36312        let mut add: f32 = extractsrc;
36313        if (k & 0b00000001) != 0 {
36314            let extracta: f32 = simd_extract!(a, 0);
36315            let extractb: f32 = simd_extract!(b, 0);
36316            add = extracta * extractb;
36317        }
36318        simd_insert!(a, 0, add)
36319    }
36320}
36321
36322/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36323///
36324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36325#[inline]
36326#[target_feature(enable = "avx512f")]
36327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36328#[cfg_attr(test, assert_instr(vmulss))]
36329pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36330    unsafe {
36331        let mut add: f32 = 0.;
36332        if (k & 0b00000001) != 0 {
36333            let extracta: f32 = simd_extract!(a, 0);
36334            let extractb: f32 = simd_extract!(b, 0);
36335            add = extracta * extractb;
36336        }
36337        simd_insert!(a, 0, add)
36338    }
36339}
36340
36341/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36342///
36343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36344#[inline]
36345#[target_feature(enable = "avx512f")]
36346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36347#[cfg_attr(test, assert_instr(vmulsd))]
36348pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36349    unsafe {
36350        let extractsrc: f64 = simd_extract!(src, 0);
36351        let mut add: f64 = extractsrc;
36352        if (k & 0b00000001) != 0 {
36353            let extracta: f64 = simd_extract!(a, 0);
36354            let extractb: f64 = simd_extract!(b, 0);
36355            add = extracta * extractb;
36356        }
36357        simd_insert!(a, 0, add)
36358    }
36359}
36360
36361/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36362///
36363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36364#[inline]
36365#[target_feature(enable = "avx512f")]
36366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36367#[cfg_attr(test, assert_instr(vmulsd))]
36368pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36369    unsafe {
36370        let mut add: f64 = 0.;
36371        if (k & 0b00000001) != 0 {
36372            let extracta: f64 = simd_extract!(a, 0);
36373            let extractb: f64 = simd_extract!(b, 0);
36374            add = extracta * extractb;
36375        }
36376        simd_insert!(a, 0, add)
36377    }
36378}
36379
36380/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36381///
36382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36383#[inline]
36384#[target_feature(enable = "avx512f")]
36385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36386#[cfg_attr(test, assert_instr(vdivss))]
36387pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36388    unsafe {
36389        let extractsrc: f32 = simd_extract!(src, 0);
36390        let mut add: f32 = extractsrc;
36391        if (k & 0b00000001) != 0 {
36392            let extracta: f32 = simd_extract!(a, 0);
36393            let extractb: f32 = simd_extract!(b, 0);
36394            add = extracta / extractb;
36395        }
36396        simd_insert!(a, 0, add)
36397    }
36398}
36399
36400/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36401///
36402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36403#[inline]
36404#[target_feature(enable = "avx512f")]
36405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36406#[cfg_attr(test, assert_instr(vdivss))]
36407pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36408    unsafe {
36409        let mut add: f32 = 0.;
36410        if (k & 0b00000001) != 0 {
36411            let extracta: f32 = simd_extract!(a, 0);
36412            let extractb: f32 = simd_extract!(b, 0);
36413            add = extracta / extractb;
36414        }
36415        simd_insert!(a, 0, add)
36416    }
36417}
36418
36419/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36420///
36421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36422#[inline]
36423#[target_feature(enable = "avx512f")]
36424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36425#[cfg_attr(test, assert_instr(vdivsd))]
36426pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36427    unsafe {
36428        let extractsrc: f64 = simd_extract!(src, 0);
36429        let mut add: f64 = extractsrc;
36430        if (k & 0b00000001) != 0 {
36431            let extracta: f64 = simd_extract!(a, 0);
36432            let extractb: f64 = simd_extract!(b, 0);
36433            add = extracta / extractb;
36434        }
36435        simd_insert!(a, 0, add)
36436    }
36437}
36438
36439/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36440///
36441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36442#[inline]
36443#[target_feature(enable = "avx512f")]
36444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36445#[cfg_attr(test, assert_instr(vdivsd))]
36446pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36447    unsafe {
36448        let mut add: f64 = 0.;
36449        if (k & 0b00000001) != 0 {
36450            let extracta: f64 = simd_extract!(a, 0);
36451            let extractb: f64 = simd_extract!(b, 0);
36452            add = extracta / extractb;
36453        }
36454        simd_insert!(a, 0, add)
36455    }
36456}
36457
36458/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36459///
36460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36461#[inline]
36462#[target_feature(enable = "avx512f")]
36463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36464#[cfg_attr(test, assert_instr(vmaxss))]
36465pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36466    unsafe {
36467        transmute(vmaxss(
36468            a.as_f32x4(),
36469            b.as_f32x4(),
36470            src.as_f32x4(),
36471            k,
36472            _MM_FROUND_CUR_DIRECTION,
36473        ))
36474    }
36475}
36476
36477/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36478///
36479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36480#[inline]
36481#[target_feature(enable = "avx512f")]
36482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36483#[cfg_attr(test, assert_instr(vmaxss))]
36484pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36485    unsafe {
36486        transmute(vmaxss(
36487            a.as_f32x4(),
36488            b.as_f32x4(),
36489            f32x4::ZERO,
36490            k,
36491            _MM_FROUND_CUR_DIRECTION,
36492        ))
36493    }
36494}
36495
36496/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36497///
36498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36499#[inline]
36500#[target_feature(enable = "avx512f")]
36501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36502#[cfg_attr(test, assert_instr(vmaxsd))]
36503pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36504    unsafe {
36505        transmute(vmaxsd(
36506            a.as_f64x2(),
36507            b.as_f64x2(),
36508            src.as_f64x2(),
36509            k,
36510            _MM_FROUND_CUR_DIRECTION,
36511        ))
36512    }
36513}
36514
36515/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36516///
36517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36518#[inline]
36519#[target_feature(enable = "avx512f")]
36520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36521#[cfg_attr(test, assert_instr(vmaxsd))]
36522pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36523    unsafe {
36524        transmute(vmaxsd(
36525            a.as_f64x2(),
36526            b.as_f64x2(),
36527            f64x2::ZERO,
36528            k,
36529            _MM_FROUND_CUR_DIRECTION,
36530        ))
36531    }
36532}
36533
36534/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36535///
36536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36537#[inline]
36538#[target_feature(enable = "avx512f")]
36539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36540#[cfg_attr(test, assert_instr(vminss))]
36541pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36542    unsafe {
36543        transmute(vminss(
36544            a.as_f32x4(),
36545            b.as_f32x4(),
36546            src.as_f32x4(),
36547            k,
36548            _MM_FROUND_CUR_DIRECTION,
36549        ))
36550    }
36551}
36552
36553/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36554///
36555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36556#[inline]
36557#[target_feature(enable = "avx512f")]
36558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36559#[cfg_attr(test, assert_instr(vminss))]
36560pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36561    unsafe {
36562        transmute(vminss(
36563            a.as_f32x4(),
36564            b.as_f32x4(),
36565            f32x4::ZERO,
36566            k,
36567            _MM_FROUND_CUR_DIRECTION,
36568        ))
36569    }
36570}
36571
36572/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36573///
36574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36575#[inline]
36576#[target_feature(enable = "avx512f")]
36577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36578#[cfg_attr(test, assert_instr(vminsd))]
36579pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36580    unsafe {
36581        transmute(vminsd(
36582            a.as_f64x2(),
36583            b.as_f64x2(),
36584            src.as_f64x2(),
36585            k,
36586            _MM_FROUND_CUR_DIRECTION,
36587        ))
36588    }
36589}
36590
36591/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36592///
36593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36594#[inline]
36595#[target_feature(enable = "avx512f")]
36596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36597#[cfg_attr(test, assert_instr(vminsd))]
36598pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36599    unsafe {
36600        transmute(vminsd(
36601            a.as_f64x2(),
36602            b.as_f64x2(),
36603            f64x2::ZERO,
36604            k,
36605            _MM_FROUND_CUR_DIRECTION,
36606        ))
36607    }
36608}
36609
36610/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36611///
36612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36613#[inline]
36614#[target_feature(enable = "avx512f")]
36615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36616#[cfg_attr(test, assert_instr(vsqrtss))]
36617pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36618    unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36619}
36620
36621/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36622///
36623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36624#[inline]
36625#[target_feature(enable = "avx512f")]
36626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36627#[cfg_attr(test, assert_instr(vsqrtss))]
36628pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36629    unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) }
36630}
36631
36632/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36633///
36634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36635#[inline]
36636#[target_feature(enable = "avx512f")]
36637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36638#[cfg_attr(test, assert_instr(vsqrtsd))]
36639pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36640    unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36641}
36642
36643/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36644///
36645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36646#[inline]
36647#[target_feature(enable = "avx512f")]
36648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36649#[cfg_attr(test, assert_instr(vsqrtsd))]
36650pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36651    unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) }
36652}
36653
36654/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36655///
36656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36657#[inline]
36658#[target_feature(enable = "avx512f")]
36659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36660#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36661pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36662    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36663}
36664
36665/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36666///
36667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36668#[inline]
36669#[target_feature(enable = "avx512f")]
36670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36671#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36672pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36673    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36674}
36675
36676/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36677///
36678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36679#[inline]
36680#[target_feature(enable = "avx512f")]
36681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36682#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36683pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36684    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36685}
36686
36687/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36688///
36689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36690#[inline]
36691#[target_feature(enable = "avx512f")]
36692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36693#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36694pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36695    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36696}
36697
36698/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36699///
36700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36701#[inline]
36702#[target_feature(enable = "avx512f")]
36703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36704#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36705pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36706    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36707}
36708
36709/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36710///
36711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36712#[inline]
36713#[target_feature(enable = "avx512f")]
36714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36715#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36716pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36717    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36718}
36719
36720/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36721///
36722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36723#[inline]
36724#[target_feature(enable = "avx512f")]
36725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36726#[cfg_attr(test, assert_instr(vrcp14ss))]
36727pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36728    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36729}
36730
36731/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36732///
36733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36734#[inline]
36735#[target_feature(enable = "avx512f")]
36736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36737#[cfg_attr(test, assert_instr(vrcp14ss))]
36738pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36739    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36740}
36741
36742/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36743///
36744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36745#[inline]
36746#[target_feature(enable = "avx512f")]
36747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36748#[cfg_attr(test, assert_instr(vrcp14ss))]
36749pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36750    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36751}
36752
36753/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36754///
36755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36756#[inline]
36757#[target_feature(enable = "avx512f")]
36758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36759#[cfg_attr(test, assert_instr(vrcp14sd))]
36760pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36761    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36762}
36763
36764/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36765///
36766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36767#[inline]
36768#[target_feature(enable = "avx512f")]
36769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36770#[cfg_attr(test, assert_instr(vrcp14sd))]
36771pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36772    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36773}
36774
36775/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36776///
36777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36778#[inline]
36779#[target_feature(enable = "avx512f")]
36780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36781#[cfg_attr(test, assert_instr(vrcp14sd))]
36782pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36783    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36784}
36785
36786/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36787///
36788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36789#[inline]
36790#[target_feature(enable = "avx512f")]
36791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36792#[cfg_attr(test, assert_instr(vgetexpss))]
36793pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36794    unsafe {
36795        transmute(vgetexpss(
36796            a.as_f32x4(),
36797            b.as_f32x4(),
36798            f32x4::ZERO,
36799            0b1,
36800            _MM_FROUND_NO_EXC,
36801        ))
36802    }
36803}
36804
36805/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36806///
36807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36808#[inline]
36809#[target_feature(enable = "avx512f")]
36810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36811#[cfg_attr(test, assert_instr(vgetexpss))]
36812pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36813    unsafe {
36814        transmute(vgetexpss(
36815            a.as_f32x4(),
36816            b.as_f32x4(),
36817            src.as_f32x4(),
36818            k,
36819            _MM_FROUND_NO_EXC,
36820        ))
36821    }
36822}
36823
36824/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36825///
36826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36827#[inline]
36828#[target_feature(enable = "avx512f")]
36829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36830#[cfg_attr(test, assert_instr(vgetexpss))]
36831pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36832    unsafe {
36833        transmute(vgetexpss(
36834            a.as_f32x4(),
36835            b.as_f32x4(),
36836            f32x4::ZERO,
36837            k,
36838            _MM_FROUND_NO_EXC,
36839        ))
36840    }
36841}
36842
36843/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36844///
36845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36846#[inline]
36847#[target_feature(enable = "avx512f")]
36848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36849#[cfg_attr(test, assert_instr(vgetexpsd))]
36850pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36851    unsafe {
36852        transmute(vgetexpsd(
36853            a.as_f64x2(),
36854            b.as_f64x2(),
36855            f64x2::ZERO,
36856            0b1,
36857            _MM_FROUND_NO_EXC,
36858        ))
36859    }
36860}
36861
36862/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36863///
36864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36865#[inline]
36866#[target_feature(enable = "avx512f")]
36867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36868#[cfg_attr(test, assert_instr(vgetexpsd))]
36869pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36870    unsafe {
36871        transmute(vgetexpsd(
36872            a.as_f64x2(),
36873            b.as_f64x2(),
36874            src.as_f64x2(),
36875            k,
36876            _MM_FROUND_NO_EXC,
36877        ))
36878    }
36879}
36880
36881/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36882///
36883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36884#[inline]
36885#[target_feature(enable = "avx512f")]
36886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36887#[cfg_attr(test, assert_instr(vgetexpsd))]
36888pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36889    unsafe {
36890        transmute(vgetexpsd(
36891            a.as_f64x2(),
36892            b.as_f64x2(),
36893            f64x2::ZERO,
36894            k,
36895            _MM_FROUND_NO_EXC,
36896        ))
36897    }
36898}
36899
36900/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36901/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36902///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36903///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36904///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36905///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36906/// The sign is determined by sc which can take the following values:\
36907///    _MM_MANT_SIGN_src     // sign = sign(src)\
36908///    _MM_MANT_SIGN_zero    // sign = 0\
36909///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36910/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36911///
36912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
36913#[inline]
36914#[target_feature(enable = "avx512f")]
36915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36916#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36917#[rustc_legacy_const_generics(2, 3)]
36918pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
36919    a: __m128,
36920    b: __m128,
36921) -> __m128 {
36922    unsafe {
36923        static_assert_uimm_bits!(NORM, 4);
36924        static_assert_uimm_bits!(SIGN, 2);
36925        let a = a.as_f32x4();
36926        let b = b.as_f32x4();
36927        let r = vgetmantss(
36928            a,
36929            b,
36930            SIGN << 2 | NORM,
36931            f32x4::ZERO,
36932            0b1,
36933            _MM_FROUND_CUR_DIRECTION,
36934        );
36935        transmute(r)
36936    }
36937}
36938
36939/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36940/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36941///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36942///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36943///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36944///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36945/// The sign is determined by sc which can take the following values:\
36946///    _MM_MANT_SIGN_src     // sign = sign(src)\
36947///    _MM_MANT_SIGN_zero    // sign = 0\
36948///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36949/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36950///
36951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
36952#[inline]
36953#[target_feature(enable = "avx512f")]
36954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36955#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36956#[rustc_legacy_const_generics(4, 5)]
36957pub fn _mm_mask_getmant_ss<
36958    const NORM: _MM_MANTISSA_NORM_ENUM,
36959    const SIGN: _MM_MANTISSA_SIGN_ENUM,
36960>(
36961    src: __m128,
36962    k: __mmask8,
36963    a: __m128,
36964    b: __m128,
36965) -> __m128 {
36966    unsafe {
36967        static_assert_uimm_bits!(NORM, 4);
36968        static_assert_uimm_bits!(SIGN, 2);
36969        let a = a.as_f32x4();
36970        let b = b.as_f32x4();
36971        let src = src.as_f32x4();
36972        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
36973        transmute(r)
36974    }
36975}
36976
36977/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36978/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36979///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36980///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36981///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36982///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36983/// The sign is determined by sc which can take the following values:\
36984///    _MM_MANT_SIGN_src     // sign = sign(src)\
36985///    _MM_MANT_SIGN_zero    // sign = 0\
36986///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36987/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36988///
36989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
36990#[inline]
36991#[target_feature(enable = "avx512f")]
36992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36993#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36994#[rustc_legacy_const_generics(3, 4)]
36995pub fn _mm_maskz_getmant_ss<
36996    const NORM: _MM_MANTISSA_NORM_ENUM,
36997    const SIGN: _MM_MANTISSA_SIGN_ENUM,
36998>(
36999    k: __mmask8,
37000    a: __m128,
37001    b: __m128,
37002) -> __m128 {
37003    unsafe {
37004        static_assert_uimm_bits!(NORM, 4);
37005        static_assert_uimm_bits!(SIGN, 2);
37006        let a = a.as_f32x4();
37007        let b = b.as_f32x4();
37008        let r = vgetmantss(
37009            a,
37010            b,
37011            SIGN << 2 | NORM,
37012            f32x4::ZERO,
37013            k,
37014            _MM_FROUND_CUR_DIRECTION,
37015        );
37016        transmute(r)
37017    }
37018}
37019
37020/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37021/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37022///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37023///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37024///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37025///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37026/// The sign is determined by sc which can take the following values:\
37027///    _MM_MANT_SIGN_src     // sign = sign(src)\
37028///    _MM_MANT_SIGN_zero    // sign = 0\
37029///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37030/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37031///
37032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37033#[inline]
37034#[target_feature(enable = "avx512f")]
37035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37036#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37037#[rustc_legacy_const_generics(2, 3)]
37038pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37039    a: __m128d,
37040    b: __m128d,
37041) -> __m128d {
37042    unsafe {
37043        static_assert_uimm_bits!(NORM, 4);
37044        static_assert_uimm_bits!(SIGN, 2);
37045        let a = a.as_f64x2();
37046        let b = b.as_f64x2();
37047        let r = vgetmantsd(
37048            a,
37049            b,
37050            SIGN << 2 | NORM,
37051            f64x2::ZERO,
37052            0b1,
37053            _MM_FROUND_CUR_DIRECTION,
37054        );
37055        transmute(r)
37056    }
37057}
37058
37059/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37060/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37061///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37062///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37063///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37064///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37065/// The sign is determined by sc which can take the following values:\
37066///    _MM_MANT_SIGN_src     // sign = sign(src)\
37067///    _MM_MANT_SIGN_zero    // sign = 0\
37068///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37069/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37070///
37071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37072#[inline]
37073#[target_feature(enable = "avx512f")]
37074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37075#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37076#[rustc_legacy_const_generics(4, 5)]
37077pub fn _mm_mask_getmant_sd<
37078    const NORM: _MM_MANTISSA_NORM_ENUM,
37079    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37080>(
37081    src: __m128d,
37082    k: __mmask8,
37083    a: __m128d,
37084    b: __m128d,
37085) -> __m128d {
37086    unsafe {
37087        static_assert_uimm_bits!(NORM, 4);
37088        static_assert_uimm_bits!(SIGN, 2);
37089        let a = a.as_f64x2();
37090        let b = b.as_f64x2();
37091        let src = src.as_f64x2();
37092        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37093        transmute(r)
37094    }
37095}
37096
37097/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37098/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37099///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37100///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37101///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37102///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37103/// The sign is determined by sc which can take the following values:\
37104///    _MM_MANT_SIGN_src     // sign = sign(src)\
37105///    _MM_MANT_SIGN_zero    // sign = 0\
37106///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37107/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37108///
37109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37110#[inline]
37111#[target_feature(enable = "avx512f")]
37112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37113#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37114#[rustc_legacy_const_generics(3, 4)]
37115pub fn _mm_maskz_getmant_sd<
37116    const NORM: _MM_MANTISSA_NORM_ENUM,
37117    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37118>(
37119    k: __mmask8,
37120    a: __m128d,
37121    b: __m128d,
37122) -> __m128d {
37123    unsafe {
37124        static_assert_uimm_bits!(NORM, 4);
37125        static_assert_uimm_bits!(SIGN, 2);
37126        let a = a.as_f64x2();
37127        let b = b.as_f64x2();
37128        let r = vgetmantsd(
37129            a,
37130            b,
37131            SIGN << 2 | NORM,
37132            f64x2::ZERO,
37133            k,
37134            _MM_FROUND_CUR_DIRECTION,
37135        );
37136        transmute(r)
37137    }
37138}
37139
37140/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37141/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37142/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37143/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37144/// * [`_MM_FROUND_TO_POS_INF`] : round up
37145/// * [`_MM_FROUND_TO_ZERO`] : truncate
37146/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37147///
37148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37149#[inline]
37150#[target_feature(enable = "avx512f")]
37151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37152#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
37153#[rustc_legacy_const_generics(2)]
37154pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37155    unsafe {
37156        static_assert_uimm_bits!(IMM8, 8);
37157        let a = a.as_f32x4();
37158        let b = b.as_f32x4();
37159        let r = vrndscaless(
37160            a,
37161            b,
37162            f32x4::ZERO,
37163            0b11111111,
37164            IMM8,
37165            _MM_FROUND_CUR_DIRECTION,
37166        );
37167        transmute(r)
37168    }
37169}
37170
37171/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37172/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37173/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37174/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37175/// * [`_MM_FROUND_TO_POS_INF`] : round up
37176/// * [`_MM_FROUND_TO_ZERO`] : truncate
37177/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37178///
37179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37180#[inline]
37181#[target_feature(enable = "avx512f")]
37182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37183#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37184#[rustc_legacy_const_generics(4)]
37185pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37186    src: __m128,
37187    k: __mmask8,
37188    a: __m128,
37189    b: __m128,
37190) -> __m128 {
37191    unsafe {
37192        static_assert_uimm_bits!(IMM8, 8);
37193        let a = a.as_f32x4();
37194        let b = b.as_f32x4();
37195        let src = src.as_f32x4();
37196        let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37197        transmute(r)
37198    }
37199}
37200
37201/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37202/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37203/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37204/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37205/// * [`_MM_FROUND_TO_POS_INF`] : round up
37206/// * [`_MM_FROUND_TO_ZERO`] : truncate
37207/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37208///
37209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37210#[inline]
37211#[target_feature(enable = "avx512f")]
37212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37213#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37214#[rustc_legacy_const_generics(3)]
37215pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37216    unsafe {
37217        static_assert_uimm_bits!(IMM8, 8);
37218        let a = a.as_f32x4();
37219        let b = b.as_f32x4();
37220        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37221        transmute(r)
37222    }
37223}
37224
37225/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37226/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37227/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37228/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37229/// * [`_MM_FROUND_TO_POS_INF`] : round up
37230/// * [`_MM_FROUND_TO_ZERO`] : truncate
37231/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37232///
37233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37234#[inline]
37235#[target_feature(enable = "avx512f")]
37236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37237#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
37238#[rustc_legacy_const_generics(2)]
37239pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37240    unsafe {
37241        static_assert_uimm_bits!(IMM8, 8);
37242        let a = a.as_f64x2();
37243        let b = b.as_f64x2();
37244        let r = vrndscalesd(
37245            a,
37246            b,
37247            f64x2::ZERO,
37248            0b11111111,
37249            IMM8,
37250            _MM_FROUND_CUR_DIRECTION,
37251        );
37252        transmute(r)
37253    }
37254}
37255
37256/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37257/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37258/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37259/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37260/// * [`_MM_FROUND_TO_POS_INF`] : round up
37261/// * [`_MM_FROUND_TO_ZERO`] : truncate
37262/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37263///
37264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37265#[inline]
37266#[target_feature(enable = "avx512f")]
37267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37268#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37269#[rustc_legacy_const_generics(4)]
37270pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37271    src: __m128d,
37272    k: __mmask8,
37273    a: __m128d,
37274    b: __m128d,
37275) -> __m128d {
37276    unsafe {
37277        static_assert_uimm_bits!(IMM8, 8);
37278        let a = a.as_f64x2();
37279        let b = b.as_f64x2();
37280        let src = src.as_f64x2();
37281        let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37282        transmute(r)
37283    }
37284}
37285
37286/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37287/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37288/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37289/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37290/// * [`_MM_FROUND_TO_POS_INF`] : round up
37291/// * [`_MM_FROUND_TO_ZERO`] : truncate
37292/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37293///
37294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37295#[inline]
37296#[target_feature(enable = "avx512f")]
37297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37298#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37299#[rustc_legacy_const_generics(3)]
37300pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37301    unsafe {
37302        static_assert_uimm_bits!(IMM8, 8);
37303        let a = a.as_f64x2();
37304        let b = b.as_f64x2();
37305        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37306        transmute(r)
37307    }
37308}
37309
37310/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37311///
37312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37313#[inline]
37314#[target_feature(enable = "avx512f")]
37315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37316#[cfg_attr(test, assert_instr(vscalefss))]
37317pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37318    unsafe {
37319        let a = a.as_f32x4();
37320        let b = b.as_f32x4();
37321        transmute(vscalefss(
37322            a,
37323            b,
37324            f32x4::ZERO,
37325            0b11111111,
37326            _MM_FROUND_CUR_DIRECTION,
37327        ))
37328    }
37329}
37330
37331/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37332///
37333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37334#[inline]
37335#[target_feature(enable = "avx512f")]
37336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37337#[cfg_attr(test, assert_instr(vscalefss))]
37338pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37339    unsafe {
37340        let a = a.as_f32x4();
37341        let b = b.as_f32x4();
37342        let src = src.as_f32x4();
37343        transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
37344    }
37345}
37346
37347/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37348///
37349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37350#[inline]
37351#[target_feature(enable = "avx512f")]
37352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37353#[cfg_attr(test, assert_instr(vscalefss))]
37354pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37355    unsafe {
37356        transmute(vscalefss(
37357            a.as_f32x4(),
37358            b.as_f32x4(),
37359            f32x4::ZERO,
37360            k,
37361            _MM_FROUND_CUR_DIRECTION,
37362        ))
37363    }
37364}
37365
37366/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37367///
37368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37369#[inline]
37370#[target_feature(enable = "avx512f")]
37371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37372#[cfg_attr(test, assert_instr(vscalefsd))]
37373pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37374    unsafe {
37375        transmute(vscalefsd(
37376            a.as_f64x2(),
37377            b.as_f64x2(),
37378            f64x2::ZERO,
37379            0b11111111,
37380            _MM_FROUND_CUR_DIRECTION,
37381        ))
37382    }
37383}
37384
37385/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37386///
37387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37388#[inline]
37389#[target_feature(enable = "avx512f")]
37390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37391#[cfg_attr(test, assert_instr(vscalefsd))]
37392pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37393    unsafe {
37394        transmute(vscalefsd(
37395            a.as_f64x2(),
37396            b.as_f64x2(),
37397            src.as_f64x2(),
37398            k,
37399            _MM_FROUND_CUR_DIRECTION,
37400        ))
37401    }
37402}
37403
37404/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37405///
37406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37407#[inline]
37408#[target_feature(enable = "avx512f")]
37409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37410#[cfg_attr(test, assert_instr(vscalefsd))]
37411pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37412    unsafe {
37413        transmute(vscalefsd(
37414            a.as_f64x2(),
37415            b.as_f64x2(),
37416            f64x2::ZERO,
37417            k,
37418            _MM_FROUND_CUR_DIRECTION,
37419        ))
37420    }
37421}
37422
37423/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37424///
37425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37426#[inline]
37427#[target_feature(enable = "avx512f")]
37428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37429#[cfg_attr(test, assert_instr(vfmadd))]
37430pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37431    unsafe {
37432        let mut fmadd: f32 = simd_extract!(a, 0);
37433        if (k & 0b00000001) != 0 {
37434            let extractb: f32 = simd_extract!(b, 0);
37435            let extractc: f32 = simd_extract!(c, 0);
37436            fmadd = fmaf32(fmadd, extractb, extractc);
37437        }
37438        simd_insert!(a, 0, fmadd)
37439    }
37440}
37441
37442/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37443///
37444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37445#[inline]
37446#[target_feature(enable = "avx512f")]
37447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37448#[cfg_attr(test, assert_instr(vfmadd))]
37449pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37450    unsafe {
37451        let mut fmadd: f32 = 0.;
37452        if (k & 0b00000001) != 0 {
37453            let extracta: f32 = simd_extract!(a, 0);
37454            let extractb: f32 = simd_extract!(b, 0);
37455            let extractc: f32 = simd_extract!(c, 0);
37456            fmadd = fmaf32(extracta, extractb, extractc);
37457        }
37458        simd_insert!(a, 0, fmadd)
37459    }
37460}
37461
37462/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37463///
37464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37465#[inline]
37466#[target_feature(enable = "avx512f")]
37467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37468#[cfg_attr(test, assert_instr(vfmadd))]
37469pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37470    unsafe {
37471        let mut fmadd: f32 = simd_extract!(c, 0);
37472        if (k & 0b00000001) != 0 {
37473            let extracta: f32 = simd_extract!(a, 0);
37474            let extractb: f32 = simd_extract!(b, 0);
37475            fmadd = fmaf32(extracta, extractb, fmadd);
37476        }
37477        simd_insert!(c, 0, fmadd)
37478    }
37479}
37480
37481/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37482///
37483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37484#[inline]
37485#[target_feature(enable = "avx512f")]
37486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37487#[cfg_attr(test, assert_instr(vfmadd))]
37488pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37489    unsafe {
37490        let mut fmadd: f64 = simd_extract!(a, 0);
37491        if (k & 0b00000001) != 0 {
37492            let extractb: f64 = simd_extract!(b, 0);
37493            let extractc: f64 = simd_extract!(c, 0);
37494            fmadd = fmaf64(fmadd, extractb, extractc);
37495        }
37496        simd_insert!(a, 0, fmadd)
37497    }
37498}
37499
37500/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37501///
37502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37503#[inline]
37504#[target_feature(enable = "avx512f")]
37505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37506#[cfg_attr(test, assert_instr(vfmadd))]
37507pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37508    unsafe {
37509        let mut fmadd: f64 = 0.;
37510        if (k & 0b00000001) != 0 {
37511            let extracta: f64 = simd_extract!(a, 0);
37512            let extractb: f64 = simd_extract!(b, 0);
37513            let extractc: f64 = simd_extract!(c, 0);
37514            fmadd = fmaf64(extracta, extractb, extractc);
37515        }
37516        simd_insert!(a, 0, fmadd)
37517    }
37518}
37519
37520/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37521///
37522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37523#[inline]
37524#[target_feature(enable = "avx512f")]
37525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37526#[cfg_attr(test, assert_instr(vfmadd))]
37527pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37528    unsafe {
37529        let mut fmadd: f64 = simd_extract!(c, 0);
37530        if (k & 0b00000001) != 0 {
37531            let extracta: f64 = simd_extract!(a, 0);
37532            let extractb: f64 = simd_extract!(b, 0);
37533            fmadd = fmaf64(extracta, extractb, fmadd);
37534        }
37535        simd_insert!(c, 0, fmadd)
37536    }
37537}
37538
37539/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37540///
37541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37542#[inline]
37543#[target_feature(enable = "avx512f")]
37544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37545#[cfg_attr(test, assert_instr(vfmsub))]
37546pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37547    unsafe {
37548        let mut fmsub: f32 = simd_extract!(a, 0);
37549        if (k & 0b00000001) != 0 {
37550            let extractb: f32 = simd_extract!(b, 0);
37551            let extractc: f32 = simd_extract!(c, 0);
37552            let extractc = -extractc;
37553            fmsub = fmaf32(fmsub, extractb, extractc);
37554        }
37555        simd_insert!(a, 0, fmsub)
37556    }
37557}
37558
37559/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37560///
37561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37562#[inline]
37563#[target_feature(enable = "avx512f")]
37564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37565#[cfg_attr(test, assert_instr(vfmsub))]
37566pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37567    unsafe {
37568        let mut fmsub: f32 = 0.;
37569        if (k & 0b00000001) != 0 {
37570            let extracta: f32 = simd_extract!(a, 0);
37571            let extractb: f32 = simd_extract!(b, 0);
37572            let extractc: f32 = simd_extract!(c, 0);
37573            let extractc = -extractc;
37574            fmsub = fmaf32(extracta, extractb, extractc);
37575        }
37576        simd_insert!(a, 0, fmsub)
37577    }
37578}
37579
37580/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37581///
37582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37583#[inline]
37584#[target_feature(enable = "avx512f")]
37585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37586#[cfg_attr(test, assert_instr(vfmsub))]
37587pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37588    unsafe {
37589        let mut fmsub: f32 = simd_extract!(c, 0);
37590        if (k & 0b00000001) != 0 {
37591            let extracta: f32 = simd_extract!(a, 0);
37592            let extractb: f32 = simd_extract!(b, 0);
37593            let extractc = -fmsub;
37594            fmsub = fmaf32(extracta, extractb, extractc);
37595        }
37596        simd_insert!(c, 0, fmsub)
37597    }
37598}
37599
37600/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37601///
37602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37603#[inline]
37604#[target_feature(enable = "avx512f")]
37605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37606#[cfg_attr(test, assert_instr(vfmsub))]
37607pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37608    unsafe {
37609        let mut fmsub: f64 = simd_extract!(a, 0);
37610        if (k & 0b00000001) != 0 {
37611            let extractb: f64 = simd_extract!(b, 0);
37612            let extractc: f64 = simd_extract!(c, 0);
37613            let extractc = -extractc;
37614            fmsub = fmaf64(fmsub, extractb, extractc);
37615        }
37616        simd_insert!(a, 0, fmsub)
37617    }
37618}
37619
37620/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37621///
37622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37623#[inline]
37624#[target_feature(enable = "avx512f")]
37625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37626#[cfg_attr(test, assert_instr(vfmsub))]
37627pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37628    unsafe {
37629        let mut fmsub: f64 = 0.;
37630        if (k & 0b00000001) != 0 {
37631            let extracta: f64 = simd_extract!(a, 0);
37632            let extractb: f64 = simd_extract!(b, 0);
37633            let extractc: f64 = simd_extract!(c, 0);
37634            let extractc = -extractc;
37635            fmsub = fmaf64(extracta, extractb, extractc);
37636        }
37637        simd_insert!(a, 0, fmsub)
37638    }
37639}
37640
37641/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37642///
37643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37644#[inline]
37645#[target_feature(enable = "avx512f")]
37646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37647#[cfg_attr(test, assert_instr(vfmsub))]
37648pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37649    unsafe {
37650        let mut fmsub: f64 = simd_extract!(c, 0);
37651        if (k & 0b00000001) != 0 {
37652            let extracta: f64 = simd_extract!(a, 0);
37653            let extractb: f64 = simd_extract!(b, 0);
37654            let extractc = -fmsub;
37655            fmsub = fmaf64(extracta, extractb, extractc);
37656        }
37657        simd_insert!(c, 0, fmsub)
37658    }
37659}
37660
37661/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37662///
37663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37664#[inline]
37665#[target_feature(enable = "avx512f")]
37666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37667#[cfg_attr(test, assert_instr(vfnmadd))]
37668pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37669    unsafe {
37670        let mut fnmadd: f32 = simd_extract!(a, 0);
37671        if (k & 0b00000001) != 0 {
37672            let extracta = -fnmadd;
37673            let extractb: f32 = simd_extract!(b, 0);
37674            let extractc: f32 = simd_extract!(c, 0);
37675            fnmadd = fmaf32(extracta, extractb, extractc);
37676        }
37677        simd_insert!(a, 0, fnmadd)
37678    }
37679}
37680
37681/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37682///
37683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37684#[inline]
37685#[target_feature(enable = "avx512f")]
37686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37687#[cfg_attr(test, assert_instr(vfnmadd))]
37688pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37689    unsafe {
37690        let mut fnmadd: f32 = 0.;
37691        if (k & 0b00000001) != 0 {
37692            let extracta: f32 = simd_extract!(a, 0);
37693            let extracta = -extracta;
37694            let extractb: f32 = simd_extract!(b, 0);
37695            let extractc: f32 = simd_extract!(c, 0);
37696            fnmadd = fmaf32(extracta, extractb, extractc);
37697        }
37698        simd_insert!(a, 0, fnmadd)
37699    }
37700}
37701
37702/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37703///
37704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37705#[inline]
37706#[target_feature(enable = "avx512f")]
37707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37708#[cfg_attr(test, assert_instr(vfnmadd))]
37709pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37710    unsafe {
37711        let mut fnmadd: f32 = simd_extract!(c, 0);
37712        if (k & 0b00000001) != 0 {
37713            let extracta: f32 = simd_extract!(a, 0);
37714            let extracta = -extracta;
37715            let extractb: f32 = simd_extract!(b, 0);
37716            fnmadd = fmaf32(extracta, extractb, fnmadd);
37717        }
37718        simd_insert!(c, 0, fnmadd)
37719    }
37720}
37721
37722/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37723///
37724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37725#[inline]
37726#[target_feature(enable = "avx512f")]
37727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37728#[cfg_attr(test, assert_instr(vfnmadd))]
37729pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37730    unsafe {
37731        let mut fnmadd: f64 = simd_extract!(a, 0);
37732        if (k & 0b00000001) != 0 {
37733            let extracta = -fnmadd;
37734            let extractb: f64 = simd_extract!(b, 0);
37735            let extractc: f64 = simd_extract!(c, 0);
37736            fnmadd = fmaf64(extracta, extractb, extractc);
37737        }
37738        simd_insert!(a, 0, fnmadd)
37739    }
37740}
37741
37742/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37743///
37744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37745#[inline]
37746#[target_feature(enable = "avx512f")]
37747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37748#[cfg_attr(test, assert_instr(vfnmadd))]
37749pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37750    unsafe {
37751        let mut fnmadd: f64 = 0.;
37752        if (k & 0b00000001) != 0 {
37753            let extracta: f64 = simd_extract!(a, 0);
37754            let extracta = -extracta;
37755            let extractb: f64 = simd_extract!(b, 0);
37756            let extractc: f64 = simd_extract!(c, 0);
37757            fnmadd = fmaf64(extracta, extractb, extractc);
37758        }
37759        simd_insert!(a, 0, fnmadd)
37760    }
37761}
37762
37763/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37764///
37765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37766#[inline]
37767#[target_feature(enable = "avx512f")]
37768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37769#[cfg_attr(test, assert_instr(vfnmadd))]
37770pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37771    unsafe {
37772        let mut fnmadd: f64 = simd_extract!(c, 0);
37773        if (k & 0b00000001) != 0 {
37774            let extracta: f64 = simd_extract!(a, 0);
37775            let extracta = -extracta;
37776            let extractb: f64 = simd_extract!(b, 0);
37777            fnmadd = fmaf64(extracta, extractb, fnmadd);
37778        }
37779        simd_insert!(c, 0, fnmadd)
37780    }
37781}
37782
37783/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37784///
37785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37786#[inline]
37787#[target_feature(enable = "avx512f")]
37788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37789#[cfg_attr(test, assert_instr(vfnmsub))]
37790pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37791    unsafe {
37792        let mut fnmsub: f32 = simd_extract!(a, 0);
37793        if (k & 0b00000001) != 0 {
37794            let extracta = -fnmsub;
37795            let extractb: f32 = simd_extract!(b, 0);
37796            let extractc: f32 = simd_extract!(c, 0);
37797            let extractc = -extractc;
37798            fnmsub = fmaf32(extracta, extractb, extractc);
37799        }
37800        simd_insert!(a, 0, fnmsub)
37801    }
37802}
37803
37804/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37805///
37806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37807#[inline]
37808#[target_feature(enable = "avx512f")]
37809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37810#[cfg_attr(test, assert_instr(vfnmsub))]
37811pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37812    unsafe {
37813        let mut fnmsub: f32 = 0.;
37814        if (k & 0b00000001) != 0 {
37815            let extracta: f32 = simd_extract!(a, 0);
37816            let extracta = -extracta;
37817            let extractb: f32 = simd_extract!(b, 0);
37818            let extractc: f32 = simd_extract!(c, 0);
37819            let extractc = -extractc;
37820            fnmsub = fmaf32(extracta, extractb, extractc);
37821        }
37822        simd_insert!(a, 0, fnmsub)
37823    }
37824}
37825
37826/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37827///
37828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37829#[inline]
37830#[target_feature(enable = "avx512f")]
37831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37832#[cfg_attr(test, assert_instr(vfnmsub))]
37833pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37834    unsafe {
37835        let mut fnmsub: f32 = simd_extract!(c, 0);
37836        if (k & 0b00000001) != 0 {
37837            let extracta: f32 = simd_extract!(a, 0);
37838            let extracta = -extracta;
37839            let extractb: f32 = simd_extract!(b, 0);
37840            let extractc = -fnmsub;
37841            fnmsub = fmaf32(extracta, extractb, extractc);
37842        }
37843        simd_insert!(c, 0, fnmsub)
37844    }
37845}
37846
37847/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37848///
37849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37850#[inline]
37851#[target_feature(enable = "avx512f")]
37852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37853#[cfg_attr(test, assert_instr(vfnmsub))]
37854pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37855    unsafe {
37856        let mut fnmsub: f64 = simd_extract!(a, 0);
37857        if (k & 0b00000001) != 0 {
37858            let extracta = -fnmsub;
37859            let extractb: f64 = simd_extract!(b, 0);
37860            let extractc: f64 = simd_extract!(c, 0);
37861            let extractc = -extractc;
37862            fnmsub = fmaf64(extracta, extractb, extractc);
37863        }
37864        simd_insert!(a, 0, fnmsub)
37865    }
37866}
37867
37868/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37869///
37870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37871#[inline]
37872#[target_feature(enable = "avx512f")]
37873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37874#[cfg_attr(test, assert_instr(vfnmsub))]
37875pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37876    unsafe {
37877        let mut fnmsub: f64 = 0.;
37878        if (k & 0b00000001) != 0 {
37879            let extracta: f64 = simd_extract!(a, 0);
37880            let extracta = -extracta;
37881            let extractb: f64 = simd_extract!(b, 0);
37882            let extractc: f64 = simd_extract!(c, 0);
37883            let extractc = -extractc;
37884            fnmsub = fmaf64(extracta, extractb, extractc);
37885        }
37886        simd_insert!(a, 0, fnmsub)
37887    }
37888}
37889
37890/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37891///
37892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37893#[inline]
37894#[target_feature(enable = "avx512f")]
37895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37896#[cfg_attr(test, assert_instr(vfnmsub))]
37897pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37898    unsafe {
37899        let mut fnmsub: f64 = simd_extract!(c, 0);
37900        if (k & 0b00000001) != 0 {
37901            let extracta: f64 = simd_extract!(a, 0);
37902            let extracta = -extracta;
37903            let extractb: f64 = simd_extract!(b, 0);
37904            let extractc = -fnmsub;
37905            fnmsub = fmaf64(extracta, extractb, extractc);
37906        }
37907        simd_insert!(c, 0, fnmsub)
37908    }
37909}
37910
37911/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37912///
37913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37919///
37920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
37921#[inline]
37922#[target_feature(enable = "avx512f")]
37923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37924#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37925#[rustc_legacy_const_generics(2)]
37926pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37927    unsafe {
37928        static_assert_rounding!(ROUNDING);
37929        let a = a.as_f32x4();
37930        let b = b.as_f32x4();
37931        let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
37932        transmute(r)
37933    }
37934}
37935
37936/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37937///
37938/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37939/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37940/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37941/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37942/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37943/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37944///
37945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
37946#[inline]
37947#[target_feature(enable = "avx512f")]
37948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37949#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37950#[rustc_legacy_const_generics(4)]
37951pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
37952    src: __m128,
37953    k: __mmask8,
37954    a: __m128,
37955    b: __m128,
37956) -> __m128 {
37957    unsafe {
37958        static_assert_rounding!(ROUNDING);
37959        let a = a.as_f32x4();
37960        let b = b.as_f32x4();
37961        let src = src.as_f32x4();
37962        let r = vaddss(a, b, src, k, ROUNDING);
37963        transmute(r)
37964    }
37965}
37966
37967/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37968///
37969/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37970/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37971/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37972/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37973/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37974/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37975///
37976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
37977#[inline]
37978#[target_feature(enable = "avx512f")]
37979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37980#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37981#[rustc_legacy_const_generics(3)]
37982pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37983    unsafe {
37984        static_assert_rounding!(ROUNDING);
37985        let a = a.as_f32x4();
37986        let b = b.as_f32x4();
37987        let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
37988        transmute(r)
37989    }
37990}
37991
37992/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37993///
37994/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37995/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37996/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37997/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37998/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37999/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38000///
38001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38002#[inline]
38003#[target_feature(enable = "avx512f")]
38004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38005#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38006#[rustc_legacy_const_generics(2)]
38007pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38008    unsafe {
38009        static_assert_rounding!(ROUNDING);
38010        let a = a.as_f64x2();
38011        let b = b.as_f64x2();
38012        let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38013        transmute(r)
38014    }
38015}
38016
38017/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38018///
38019/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38020/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38021/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38022/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38023/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38024/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38025///
38026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38027#[inline]
38028#[target_feature(enable = "avx512f")]
38029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38030#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38031#[rustc_legacy_const_generics(4)]
38032pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38033    src: __m128d,
38034    k: __mmask8,
38035    a: __m128d,
38036    b: __m128d,
38037) -> __m128d {
38038    unsafe {
38039        static_assert_rounding!(ROUNDING);
38040        let a = a.as_f64x2();
38041        let b = b.as_f64x2();
38042        let src = src.as_f64x2();
38043        let r = vaddsd(a, b, src, k, ROUNDING);
38044        transmute(r)
38045    }
38046}
38047
38048/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38049///
38050/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38051/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38052/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38053/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38054/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38055/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38056///
38057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38058#[inline]
38059#[target_feature(enable = "avx512f")]
38060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38061#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38062#[rustc_legacy_const_generics(3)]
38063pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38064    unsafe {
38065        static_assert_rounding!(ROUNDING);
38066        let a = a.as_f64x2();
38067        let b = b.as_f64x2();
38068        let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
38069        transmute(r)
38070    }
38071}
38072
38073/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38074///
38075/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38076/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38077/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38078/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38079/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38080/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38081///
38082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38083#[inline]
38084#[target_feature(enable = "avx512f")]
38085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38086#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38087#[rustc_legacy_const_generics(2)]
38088pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38089    unsafe {
38090        static_assert_rounding!(ROUNDING);
38091        let a = a.as_f32x4();
38092        let b = b.as_f32x4();
38093        let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38094        transmute(r)
38095    }
38096}
38097
38098/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38099///
38100/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38101/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38102/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38103/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38104/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38105/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38106///
38107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38108#[inline]
38109#[target_feature(enable = "avx512f")]
38110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38111#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38112#[rustc_legacy_const_generics(4)]
38113pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38114    src: __m128,
38115    k: __mmask8,
38116    a: __m128,
38117    b: __m128,
38118) -> __m128 {
38119    unsafe {
38120        static_assert_rounding!(ROUNDING);
38121        let a = a.as_f32x4();
38122        let b = b.as_f32x4();
38123        let src = src.as_f32x4();
38124        let r = vsubss(a, b, src, k, ROUNDING);
38125        transmute(r)
38126    }
38127}
38128
38129/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38130///
38131/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38132/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38133/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38134/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38135/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38136/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38137///
38138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38139#[inline]
38140#[target_feature(enable = "avx512f")]
38141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38142#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38143#[rustc_legacy_const_generics(3)]
38144pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38145    unsafe {
38146        static_assert_rounding!(ROUNDING);
38147        let a = a.as_f32x4();
38148        let b = b.as_f32x4();
38149        let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
38150        transmute(r)
38151    }
38152}
38153
38154/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38155///
38156/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38157/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38158/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38159/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38160/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38161/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38162///
38163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38164#[inline]
38165#[target_feature(enable = "avx512f")]
38166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38167#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38168#[rustc_legacy_const_generics(2)]
38169pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38170    unsafe {
38171        static_assert_rounding!(ROUNDING);
38172        let a = a.as_f64x2();
38173        let b = b.as_f64x2();
38174        let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38175        transmute(r)
38176    }
38177}
38178
38179/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38180///
38181/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38182/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38183/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38184/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38185/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38186/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38187///
38188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38189#[inline]
38190#[target_feature(enable = "avx512f")]
38191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38192#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38193#[rustc_legacy_const_generics(4)]
38194pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38195    src: __m128d,
38196    k: __mmask8,
38197    a: __m128d,
38198    b: __m128d,
38199) -> __m128d {
38200    unsafe {
38201        static_assert_rounding!(ROUNDING);
38202        let a = a.as_f64x2();
38203        let b = b.as_f64x2();
38204        let src = src.as_f64x2();
38205        let r = vsubsd(a, b, src, k, ROUNDING);
38206        transmute(r)
38207    }
38208}
38209
38210/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38211///
38212/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38213/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38214/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38215/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38216/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38217/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38218///
38219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38220#[inline]
38221#[target_feature(enable = "avx512f")]
38222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38223#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38224#[rustc_legacy_const_generics(3)]
38225pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38226    unsafe {
38227        static_assert_rounding!(ROUNDING);
38228        let a = a.as_f64x2();
38229        let b = b.as_f64x2();
38230        let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
38231        transmute(r)
38232    }
38233}
38234
38235/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38236///
38237/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38238/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38239/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38240/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38241/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38243///
38244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38245#[inline]
38246#[target_feature(enable = "avx512f")]
38247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38248#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38249#[rustc_legacy_const_generics(2)]
38250pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38251    unsafe {
38252        static_assert_rounding!(ROUNDING);
38253        let a = a.as_f32x4();
38254        let b = b.as_f32x4();
38255        let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38256        transmute(r)
38257    }
38258}
38259
38260/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38261///
38262/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38263/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38264/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38265/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38266/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38267/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38268///
38269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38270#[inline]
38271#[target_feature(enable = "avx512f")]
38272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38273#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38274#[rustc_legacy_const_generics(4)]
38275pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38276    src: __m128,
38277    k: __mmask8,
38278    a: __m128,
38279    b: __m128,
38280) -> __m128 {
38281    unsafe {
38282        static_assert_rounding!(ROUNDING);
38283        let a = a.as_f32x4();
38284        let b = b.as_f32x4();
38285        let src = src.as_f32x4();
38286        let r = vmulss(a, b, src, k, ROUNDING);
38287        transmute(r)
38288    }
38289}
38290
38291/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38292///
38293/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38294/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38295/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38296/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38297/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38298/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38299///
38300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38301#[inline]
38302#[target_feature(enable = "avx512f")]
38303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38304#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38305#[rustc_legacy_const_generics(3)]
38306pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38307    unsafe {
38308        static_assert_rounding!(ROUNDING);
38309        let a = a.as_f32x4();
38310        let b = b.as_f32x4();
38311        let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
38312        transmute(r)
38313    }
38314}
38315
38316/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38317///
38318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38324///
38325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38326#[inline]
38327#[target_feature(enable = "avx512f")]
38328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38329#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38330#[rustc_legacy_const_generics(2)]
38331pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38332    unsafe {
38333        static_assert_rounding!(ROUNDING);
38334        let a = a.as_f64x2();
38335        let b = b.as_f64x2();
38336        let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38337        transmute(r)
38338    }
38339}
38340
38341/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38342///
38343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38349///
38350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38351#[inline]
38352#[target_feature(enable = "avx512f")]
38353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38354#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38355#[rustc_legacy_const_generics(4)]
38356pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38357    src: __m128d,
38358    k: __mmask8,
38359    a: __m128d,
38360    b: __m128d,
38361) -> __m128d {
38362    unsafe {
38363        static_assert_rounding!(ROUNDING);
38364        let a = a.as_f64x2();
38365        let b = b.as_f64x2();
38366        let src = src.as_f64x2();
38367        let r = vmulsd(a, b, src, k, ROUNDING);
38368        transmute(r)
38369    }
38370}
38371
38372/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38373///
38374/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38375/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38376/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38377/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38378/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38379/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38380///
38381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38382#[inline]
38383#[target_feature(enable = "avx512f")]
38384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38385#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38386#[rustc_legacy_const_generics(3)]
38387pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38388    unsafe {
38389        static_assert_rounding!(ROUNDING);
38390        let a = a.as_f64x2();
38391        let b = b.as_f64x2();
38392        let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
38393        transmute(r)
38394    }
38395}
38396
38397/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38398///
38399/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38400/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38401/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38402/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38403/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38404/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38405///
38406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38407#[inline]
38408#[target_feature(enable = "avx512f")]
38409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38410#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38411#[rustc_legacy_const_generics(2)]
38412pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38413    unsafe {
38414        static_assert_rounding!(ROUNDING);
38415        let a = a.as_f32x4();
38416        let b = b.as_f32x4();
38417        let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38418        transmute(r)
38419    }
38420}
38421
38422/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38423///
38424/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38425/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38426/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38427/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38428/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38429/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38430///
38431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38432#[inline]
38433#[target_feature(enable = "avx512f")]
38434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38435#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38436#[rustc_legacy_const_generics(4)]
38437pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38438    src: __m128,
38439    k: __mmask8,
38440    a: __m128,
38441    b: __m128,
38442) -> __m128 {
38443    unsafe {
38444        static_assert_rounding!(ROUNDING);
38445        let a = a.as_f32x4();
38446        let b = b.as_f32x4();
38447        let src = src.as_f32x4();
38448        let r = vdivss(a, b, src, k, ROUNDING);
38449        transmute(r)
38450    }
38451}
38452
38453/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38454///
38455/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38456/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38457/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38458/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38459/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38460/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38461///
38462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38463#[inline]
38464#[target_feature(enable = "avx512f")]
38465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38466#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38467#[rustc_legacy_const_generics(3)]
38468pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38469    unsafe {
38470        static_assert_rounding!(ROUNDING);
38471        let a = a.as_f32x4();
38472        let b = b.as_f32x4();
38473        let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
38474        transmute(r)
38475    }
38476}
38477
38478/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38479///
38480/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38481/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38482/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38483/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38484/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38485/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38486///
38487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38488#[inline]
38489#[target_feature(enable = "avx512f")]
38490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38491#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38492#[rustc_legacy_const_generics(2)]
38493pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38494    unsafe {
38495        static_assert_rounding!(ROUNDING);
38496        let a = a.as_f64x2();
38497        let b = b.as_f64x2();
38498        let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38499        transmute(r)
38500    }
38501}
38502
38503/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38504///
38505/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38506/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38507/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38508/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38509/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38510/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38511///
38512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38513#[inline]
38514#[target_feature(enable = "avx512f")]
38515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38516#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38517#[rustc_legacy_const_generics(4)]
38518pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38519    src: __m128d,
38520    k: __mmask8,
38521    a: __m128d,
38522    b: __m128d,
38523) -> __m128d {
38524    unsafe {
38525        static_assert_rounding!(ROUNDING);
38526        let a = a.as_f64x2();
38527        let b = b.as_f64x2();
38528        let src = src.as_f64x2();
38529        let r = vdivsd(a, b, src, k, ROUNDING);
38530        transmute(r)
38531    }
38532}
38533
38534/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38535///
38536/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38537/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38538/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38539/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38540/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38541/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38542///
38543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38544#[inline]
38545#[target_feature(enable = "avx512f")]
38546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38547#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38548#[rustc_legacy_const_generics(3)]
38549pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38550    unsafe {
38551        static_assert_rounding!(ROUNDING);
38552        let a = a.as_f64x2();
38553        let b = b.as_f64x2();
38554        let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
38555        transmute(r)
38556    }
38557}
38558
38559/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38560/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38561///
38562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38563#[inline]
38564#[target_feature(enable = "avx512f")]
38565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38566#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38567#[rustc_legacy_const_generics(2)]
38568pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38569    unsafe {
38570        static_assert_sae!(SAE);
38571        let a = a.as_f32x4();
38572        let b = b.as_f32x4();
38573        let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
38574        transmute(r)
38575    }
38576}
38577
38578/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38579/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38580///
38581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38582#[inline]
38583#[target_feature(enable = "avx512f")]
38584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38585#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38586#[rustc_legacy_const_generics(4)]
38587pub fn _mm_mask_max_round_ss<const SAE: i32>(
38588    src: __m128,
38589    k: __mmask8,
38590    a: __m128,
38591    b: __m128,
38592) -> __m128 {
38593    unsafe {
38594        static_assert_sae!(SAE);
38595        let a = a.as_f32x4();
38596        let b = b.as_f32x4();
38597        let src = src.as_f32x4();
38598        let r = vmaxss(a, b, src, k, SAE);
38599        transmute(r)
38600    }
38601}
38602
38603/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38604/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38605///
38606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38607#[inline]
38608#[target_feature(enable = "avx512f")]
38609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38610#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38611#[rustc_legacy_const_generics(3)]
38612pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38613    unsafe {
38614        static_assert_sae!(SAE);
38615        let a = a.as_f32x4();
38616        let b = b.as_f32x4();
38617        let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
38618        transmute(r)
38619    }
38620}
38621
38622/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38623/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38624///
38625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38626#[inline]
38627#[target_feature(enable = "avx512f")]
38628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38629#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38630#[rustc_legacy_const_generics(2)]
38631pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38632    unsafe {
38633        static_assert_sae!(SAE);
38634        let a = a.as_f64x2();
38635        let b = b.as_f64x2();
38636        let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
38637        transmute(r)
38638    }
38639}
38640
38641/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38642/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38643///
38644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38645#[inline]
38646#[target_feature(enable = "avx512f")]
38647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38648#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38649#[rustc_legacy_const_generics(4)]
38650pub fn _mm_mask_max_round_sd<const SAE: i32>(
38651    src: __m128d,
38652    k: __mmask8,
38653    a: __m128d,
38654    b: __m128d,
38655) -> __m128d {
38656    unsafe {
38657        static_assert_sae!(SAE);
38658        let a = a.as_f64x2();
38659        let b = b.as_f64x2();
38660        let src = src.as_f64x2();
38661        let r = vmaxsd(a, b, src, k, SAE);
38662        transmute(r)
38663    }
38664}
38665
38666/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38667/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38668///
38669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38670#[inline]
38671#[target_feature(enable = "avx512f")]
38672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38673#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38674#[rustc_legacy_const_generics(3)]
38675pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38676    unsafe {
38677        static_assert_sae!(SAE);
38678        let a = a.as_f64x2();
38679        let b = b.as_f64x2();
38680        let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
38681        transmute(r)
38682    }
38683}
38684
38685/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38686/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38687///
38688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38689#[inline]
38690#[target_feature(enable = "avx512f")]
38691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38692#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38693#[rustc_legacy_const_generics(2)]
38694pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38695    unsafe {
38696        static_assert_sae!(SAE);
38697        let a = a.as_f32x4();
38698        let b = b.as_f32x4();
38699        let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
38700        transmute(r)
38701    }
38702}
38703
38704/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38705/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38706///
38707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38708#[inline]
38709#[target_feature(enable = "avx512f")]
38710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38711#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38712#[rustc_legacy_const_generics(4)]
38713pub fn _mm_mask_min_round_ss<const SAE: i32>(
38714    src: __m128,
38715    k: __mmask8,
38716    a: __m128,
38717    b: __m128,
38718) -> __m128 {
38719    unsafe {
38720        static_assert_sae!(SAE);
38721        let a = a.as_f32x4();
38722        let b = b.as_f32x4();
38723        let src = src.as_f32x4();
38724        let r = vminss(a, b, src, k, SAE);
38725        transmute(r)
38726    }
38727}
38728
38729/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38730/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38731///
38732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38733#[inline]
38734#[target_feature(enable = "avx512f")]
38735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38736#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38737#[rustc_legacy_const_generics(3)]
38738pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38739    unsafe {
38740        static_assert_sae!(SAE);
38741        let a = a.as_f32x4();
38742        let b = b.as_f32x4();
38743        let r = vminss(a, b, f32x4::ZERO, k, SAE);
38744        transmute(r)
38745    }
38746}
38747
38748/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38749/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38750///
38751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38752#[inline]
38753#[target_feature(enable = "avx512f")]
38754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38755#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38756#[rustc_legacy_const_generics(2)]
38757pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38758    unsafe {
38759        static_assert_sae!(SAE);
38760        let a = a.as_f64x2();
38761        let b = b.as_f64x2();
38762        let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
38763        transmute(r)
38764    }
38765}
38766
38767/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38769///
38770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38771#[inline]
38772#[target_feature(enable = "avx512f")]
38773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38774#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38775#[rustc_legacy_const_generics(4)]
38776pub fn _mm_mask_min_round_sd<const SAE: i32>(
38777    src: __m128d,
38778    k: __mmask8,
38779    a: __m128d,
38780    b: __m128d,
38781) -> __m128d {
38782    unsafe {
38783        static_assert_sae!(SAE);
38784        let a = a.as_f64x2();
38785        let b = b.as_f64x2();
38786        let src = src.as_f64x2();
38787        let r = vminsd(a, b, src, k, SAE);
38788        transmute(r)
38789    }
38790}
38791
38792/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38793/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38794///
38795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38796#[inline]
38797#[target_feature(enable = "avx512f")]
38798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38799#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38800#[rustc_legacy_const_generics(3)]
38801pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38802    unsafe {
38803        static_assert_sae!(SAE);
38804        let a = a.as_f64x2();
38805        let b = b.as_f64x2();
38806        let r = vminsd(a, b, f64x2::ZERO, k, SAE);
38807        transmute(r)
38808    }
38809}
38810
38811/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38812///
38813/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38814/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38815/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38816/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38817/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38818/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38819///
38820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38821#[inline]
38822#[target_feature(enable = "avx512f")]
38823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38824#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38825#[rustc_legacy_const_generics(2)]
38826pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38827    unsafe {
38828        static_assert_rounding!(ROUNDING);
38829        vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
38830    }
38831}
38832
38833/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38834///
38835/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38836/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38837/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38838/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38839/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38840/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38841///
38842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38843#[inline]
38844#[target_feature(enable = "avx512f")]
38845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38846#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38847#[rustc_legacy_const_generics(4)]
38848pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38849    src: __m128,
38850    k: __mmask8,
38851    a: __m128,
38852    b: __m128,
38853) -> __m128 {
38854    unsafe {
38855        static_assert_rounding!(ROUNDING);
38856        vsqrtss(a, b, src, k, ROUNDING)
38857    }
38858}
38859
38860/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38861///
38862/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38863/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38864/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38865/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38866/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38867/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38868///
38869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38870#[inline]
38871#[target_feature(enable = "avx512f")]
38872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38873#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38874#[rustc_legacy_const_generics(3)]
38875pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38876    unsafe {
38877        static_assert_rounding!(ROUNDING);
38878        vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
38879    }
38880}
38881
38882/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38883///
38884/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38885/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38886/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38887/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38888/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38889/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38890///
38891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38892#[inline]
38893#[target_feature(enable = "avx512f")]
38894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38895#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38896#[rustc_legacy_const_generics(2)]
38897pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38898    unsafe {
38899        static_assert_rounding!(ROUNDING);
38900        vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
38901    }
38902}
38903
38904/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38905///
38906/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38907/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38908/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38909/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38910/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38911/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38912///
38913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
38914#[inline]
38915#[target_feature(enable = "avx512f")]
38916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38917#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38918#[rustc_legacy_const_generics(4)]
38919pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
38920    src: __m128d,
38921    k: __mmask8,
38922    a: __m128d,
38923    b: __m128d,
38924) -> __m128d {
38925    unsafe {
38926        static_assert_rounding!(ROUNDING);
38927        vsqrtsd(a, b, src, k, ROUNDING)
38928    }
38929}
38930
38931/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38932///
38933/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38934/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38935/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38936/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38937/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38938/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38939///
38940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
38941#[inline]
38942#[target_feature(enable = "avx512f")]
38943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38944#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38945#[rustc_legacy_const_generics(3)]
38946pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
38947    k: __mmask8,
38948    a: __m128d,
38949    b: __m128d,
38950) -> __m128d {
38951    unsafe {
38952        static_assert_rounding!(ROUNDING);
38953        vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
38954    }
38955}
38956
38957/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38958/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38959///
38960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
38961#[inline]
38962#[target_feature(enable = "avx512f")]
38963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38964#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
38965#[rustc_legacy_const_generics(2)]
38966pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38967    unsafe {
38968        static_assert_sae!(SAE);
38969        let a = a.as_f32x4();
38970        let b = b.as_f32x4();
38971        let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
38972        transmute(r)
38973    }
38974}
38975
38976/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38977/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38978///
38979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
38980#[inline]
38981#[target_feature(enable = "avx512f")]
38982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38983#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
38984#[rustc_legacy_const_generics(4)]
38985pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
38986    src: __m128,
38987    k: __mmask8,
38988    a: __m128,
38989    b: __m128,
38990) -> __m128 {
38991    unsafe {
38992        static_assert_sae!(SAE);
38993        let a = a.as_f32x4();
38994        let b = b.as_f32x4();
38995        let src = src.as_f32x4();
38996        let r = vgetexpss(a, b, src, k, SAE);
38997        transmute(r)
38998    }
38999}
39000
39001/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39002/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39003///
39004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39005#[inline]
39006#[target_feature(enable = "avx512f")]
39007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39008#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39009#[rustc_legacy_const_generics(3)]
39010pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39011    unsafe {
39012        static_assert_sae!(SAE);
39013        let a = a.as_f32x4();
39014        let b = b.as_f32x4();
39015        let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
39016        transmute(r)
39017    }
39018}
39019
39020/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39021/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39022///
39023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39024#[inline]
39025#[target_feature(enable = "avx512f")]
39026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39027#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39028#[rustc_legacy_const_generics(2)]
39029pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39030    unsafe {
39031        static_assert_sae!(SAE);
39032        let a = a.as_f64x2();
39033        let b = b.as_f64x2();
39034        let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
39035        transmute(r)
39036    }
39037}
39038
39039/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39041///
39042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39043#[inline]
39044#[target_feature(enable = "avx512f")]
39045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39046#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39047#[rustc_legacy_const_generics(4)]
39048pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39049    src: __m128d,
39050    k: __mmask8,
39051    a: __m128d,
39052    b: __m128d,
39053) -> __m128d {
39054    unsafe {
39055        static_assert_sae!(SAE);
39056        let a = a.as_f64x2();
39057        let b = b.as_f64x2();
39058        let src = src.as_f64x2();
39059        let r = vgetexpsd(a, b, src, k, SAE);
39060        transmute(r)
39061    }
39062}
39063
39064/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39065/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39066///
39067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39068#[inline]
39069#[target_feature(enable = "avx512f")]
39070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39071#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39072#[rustc_legacy_const_generics(3)]
39073pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39074    unsafe {
39075        static_assert_sae!(SAE);
39076        let a = a.as_f64x2();
39077        let b = b.as_f64x2();
39078        let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
39079        transmute(r)
39080    }
39081}
39082
39083/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39084/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39085///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39086///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39087///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39088///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39089/// The sign is determined by sc which can take the following values:\
39090///    _MM_MANT_SIGN_src     // sign = sign(src)\
39091///    _MM_MANT_SIGN_zero    // sign = 0\
39092///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39093/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39094///
39095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39096#[inline]
39097#[target_feature(enable = "avx512f")]
39098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39099#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39100#[rustc_legacy_const_generics(2, 3, 4)]
39101pub fn _mm_getmant_round_ss<
39102    const NORM: _MM_MANTISSA_NORM_ENUM,
39103    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39104    const SAE: i32,
39105>(
39106    a: __m128,
39107    b: __m128,
39108) -> __m128 {
39109    unsafe {
39110        static_assert_uimm_bits!(NORM, 4);
39111        static_assert_uimm_bits!(SIGN, 2);
39112        static_assert_mantissas_sae!(SAE);
39113        let a = a.as_f32x4();
39114        let b = b.as_f32x4();
39115        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
39116        transmute(r)
39117    }
39118}
39119
39120/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39121/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39122///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39123///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39124///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39125///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39126/// The sign is determined by sc which can take the following values:\
39127///    _MM_MANT_SIGN_src     // sign = sign(src)\
39128///    _MM_MANT_SIGN_zero    // sign = 0\
39129///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39130/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39131///
39132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39133#[inline]
39134#[target_feature(enable = "avx512f")]
39135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39136#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39137#[rustc_legacy_const_generics(4, 5, 6)]
39138pub fn _mm_mask_getmant_round_ss<
39139    const NORM: _MM_MANTISSA_NORM_ENUM,
39140    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39141    const SAE: i32,
39142>(
39143    src: __m128,
39144    k: __mmask8,
39145    a: __m128,
39146    b: __m128,
39147) -> __m128 {
39148    unsafe {
39149        static_assert_uimm_bits!(NORM, 4);
39150        static_assert_uimm_bits!(SIGN, 2);
39151        static_assert_mantissas_sae!(SAE);
39152        let a = a.as_f32x4();
39153        let b = b.as_f32x4();
39154        let src = src.as_f32x4();
39155        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
39156        transmute(r)
39157    }
39158}
39159
39160/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39161/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39162///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39163///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39164///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39165///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39166/// The sign is determined by sc which can take the following values:\
39167///    _MM_MANT_SIGN_src     // sign = sign(src)\
39168///    _MM_MANT_SIGN_zero    // sign = 0\
39169///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39170/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39171///
39172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39173#[inline]
39174#[target_feature(enable = "avx512f")]
39175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39176#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39177#[rustc_legacy_const_generics(3, 4, 5)]
39178pub fn _mm_maskz_getmant_round_ss<
39179    const NORM: _MM_MANTISSA_NORM_ENUM,
39180    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39181    const SAE: i32,
39182>(
39183    k: __mmask8,
39184    a: __m128,
39185    b: __m128,
39186) -> __m128 {
39187    unsafe {
39188        static_assert_uimm_bits!(NORM, 4);
39189        static_assert_uimm_bits!(SIGN, 2);
39190        static_assert_mantissas_sae!(SAE);
39191        let a = a.as_f32x4();
39192        let b = b.as_f32x4();
39193        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
39194        transmute(r)
39195    }
39196}
39197
39198/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39199/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39200///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39201///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39202///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39203///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39204/// The sign is determined by sc which can take the following values:\
39205///    _MM_MANT_SIGN_src     // sign = sign(src)\
39206///    _MM_MANT_SIGN_zero    // sign = 0\
39207///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39208/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39209///
39210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39211#[inline]
39212#[target_feature(enable = "avx512f")]
39213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39214#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39215#[rustc_legacy_const_generics(2, 3, 4)]
39216pub fn _mm_getmant_round_sd<
39217    const NORM: _MM_MANTISSA_NORM_ENUM,
39218    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39219    const SAE: i32,
39220>(
39221    a: __m128d,
39222    b: __m128d,
39223) -> __m128d {
39224    unsafe {
39225        static_assert_uimm_bits!(NORM, 4);
39226        static_assert_uimm_bits!(SIGN, 2);
39227        static_assert_mantissas_sae!(SAE);
39228        let a = a.as_f64x2();
39229        let b = b.as_f64x2();
39230        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
39231        transmute(r)
39232    }
39233}
39234
39235/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39236/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39237///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39238///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39239///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39240///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39241/// The sign is determined by sc which can take the following values:\
39242///    _MM_MANT_SIGN_src     // sign = sign(src)\
39243///    _MM_MANT_SIGN_zero    // sign = 0\
39244///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39245/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39246///
39247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39248#[inline]
39249#[target_feature(enable = "avx512f")]
39250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39251#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39252#[rustc_legacy_const_generics(4, 5, 6)]
39253pub fn _mm_mask_getmant_round_sd<
39254    const NORM: _MM_MANTISSA_NORM_ENUM,
39255    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39256    const SAE: i32,
39257>(
39258    src: __m128d,
39259    k: __mmask8,
39260    a: __m128d,
39261    b: __m128d,
39262) -> __m128d {
39263    unsafe {
39264        static_assert_uimm_bits!(NORM, 4);
39265        static_assert_uimm_bits!(SIGN, 2);
39266        static_assert_mantissas_sae!(SAE);
39267        let a = a.as_f64x2();
39268        let b = b.as_f64x2();
39269        let src = src.as_f64x2();
39270        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
39271        transmute(r)
39272    }
39273}
39274
39275/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39276/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39277///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39278///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39279///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39280///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39281/// The sign is determined by sc which can take the following values:\
39282///    _MM_MANT_SIGN_src     // sign = sign(src)\
39283///    _MM_MANT_SIGN_zero    // sign = 0\
39284///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39285/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39286///
39287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39288#[inline]
39289#[target_feature(enable = "avx512f")]
39290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39291#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39292#[rustc_legacy_const_generics(3, 4, 5)]
39293pub fn _mm_maskz_getmant_round_sd<
39294    const NORM: _MM_MANTISSA_NORM_ENUM,
39295    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39296    const SAE: i32,
39297>(
39298    k: __mmask8,
39299    a: __m128d,
39300    b: __m128d,
39301) -> __m128d {
39302    unsafe {
39303        static_assert_uimm_bits!(NORM, 4);
39304        static_assert_uimm_bits!(SIGN, 2);
39305        static_assert_mantissas_sae!(SAE);
39306        let a = a.as_f64x2();
39307        let b = b.as_f64x2();
39308        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
39309        transmute(r)
39310    }
39311}
39312
39313/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39314/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39315/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39316/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39317/// * [`_MM_FROUND_TO_POS_INF`] : round up
39318/// * [`_MM_FROUND_TO_ZERO`] : truncate
39319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39320///
39321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39323#[inline]
39324#[target_feature(enable = "avx512f")]
39325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39326#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39327#[rustc_legacy_const_generics(2, 3)]
39328pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39329    unsafe {
39330        static_assert_uimm_bits!(IMM8, 8);
39331        static_assert_mantissas_sae!(SAE);
39332        let a = a.as_f32x4();
39333        let b = b.as_f32x4();
39334        let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
39335        transmute(r)
39336    }
39337}
39338
39339/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39340/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39341/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39342/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39343/// * [`_MM_FROUND_TO_POS_INF`] : round up
39344/// * [`_MM_FROUND_TO_ZERO`] : truncate
39345/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39346///
39347/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39349#[inline]
39350#[target_feature(enable = "avx512f")]
39351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39352#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39353#[rustc_legacy_const_generics(4, 5)]
39354pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39355    src: __m128,
39356    k: __mmask8,
39357    a: __m128,
39358    b: __m128,
39359) -> __m128 {
39360    unsafe {
39361        static_assert_uimm_bits!(IMM8, 8);
39362        static_assert_mantissas_sae!(SAE);
39363        let a = a.as_f32x4();
39364        let b = b.as_f32x4();
39365        let src = src.as_f32x4();
39366        let r = vrndscaless(a, b, src, k, IMM8, SAE);
39367        transmute(r)
39368    }
39369}
39370
39371/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39372/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39373/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39374/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39375/// * [`_MM_FROUND_TO_POS_INF`] : round up
39376/// * [`_MM_FROUND_TO_ZERO`] : truncate
39377/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39378///
39379/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39381#[inline]
39382#[target_feature(enable = "avx512f")]
39383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39384#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39385#[rustc_legacy_const_generics(3, 4)]
39386pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39387    k: __mmask8,
39388    a: __m128,
39389    b: __m128,
39390) -> __m128 {
39391    unsafe {
39392        static_assert_uimm_bits!(IMM8, 8);
39393        static_assert_mantissas_sae!(SAE);
39394        let a = a.as_f32x4();
39395        let b = b.as_f32x4();
39396        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
39397        transmute(r)
39398    }
39399}
39400
39401/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39402/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39403/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39404/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39405/// * [`_MM_FROUND_TO_POS_INF`] : round up
39406/// * [`_MM_FROUND_TO_ZERO`] : truncate
39407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39408///
39409/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39411#[inline]
39412#[target_feature(enable = "avx512f")]
39413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39414#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39415#[rustc_legacy_const_generics(2, 3)]
39416pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39417    unsafe {
39418        static_assert_uimm_bits!(IMM8, 8);
39419        static_assert_mantissas_sae!(SAE);
39420        let a = a.as_f64x2();
39421        let b = b.as_f64x2();
39422        let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
39423        transmute(r)
39424    }
39425}
39426
39427/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39428/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39429/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39430/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39431/// * [`_MM_FROUND_TO_POS_INF`] : round up
39432/// * [`_MM_FROUND_TO_ZERO`] : truncate
39433/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39434///
39435/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39437#[inline]
39438#[target_feature(enable = "avx512f")]
39439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39440#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39441#[rustc_legacy_const_generics(4, 5)]
39442pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39443    src: __m128d,
39444    k: __mmask8,
39445    a: __m128d,
39446    b: __m128d,
39447) -> __m128d {
39448    unsafe {
39449        static_assert_uimm_bits!(IMM8, 8);
39450        static_assert_mantissas_sae!(SAE);
39451        let a = a.as_f64x2();
39452        let b = b.as_f64x2();
39453        let src = src.as_f64x2();
39454        let r = vrndscalesd(a, b, src, k, IMM8, SAE);
39455        transmute(r)
39456    }
39457}
39458
39459/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39460/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39461/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39462/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39463/// * [`_MM_FROUND_TO_POS_INF`] : round up
39464/// * [`_MM_FROUND_TO_ZERO`] : truncate
39465/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39466///
39467/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39469#[inline]
39470#[target_feature(enable = "avx512f")]
39471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39472#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39473#[rustc_legacy_const_generics(3, 4)]
39474pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39475    k: __mmask8,
39476    a: __m128d,
39477    b: __m128d,
39478) -> __m128d {
39479    unsafe {
39480        static_assert_uimm_bits!(IMM8, 8);
39481        static_assert_mantissas_sae!(SAE);
39482        let a = a.as_f64x2();
39483        let b = b.as_f64x2();
39484        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
39485        transmute(r)
39486    }
39487}
39488
39489/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39490///
39491/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39492/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39493/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39494/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39495/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39496/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39497///
39498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39499#[inline]
39500#[target_feature(enable = "avx512f")]
39501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39502#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39503#[rustc_legacy_const_generics(2)]
39504pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39505    unsafe {
39506        static_assert_rounding!(ROUNDING);
39507        let a = a.as_f32x4();
39508        let b = b.as_f32x4();
39509        let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
39510        transmute(r)
39511    }
39512}
39513
39514/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39515///
39516/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39517/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39518/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39519/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39520/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39521/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39522///
39523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39524#[inline]
39525#[target_feature(enable = "avx512f")]
39526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39527#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39528#[rustc_legacy_const_generics(4)]
39529pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39530    src: __m128,
39531    k: __mmask8,
39532    a: __m128,
39533    b: __m128,
39534) -> __m128 {
39535    unsafe {
39536        static_assert_rounding!(ROUNDING);
39537        let a = a.as_f32x4();
39538        let b = b.as_f32x4();
39539        let src = src.as_f32x4();
39540        let r = vscalefss(a, b, src, k, ROUNDING);
39541        transmute(r)
39542    }
39543}
39544
39545/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39546///
39547/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39553///
39554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39555#[inline]
39556#[target_feature(enable = "avx512f")]
39557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39558#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39559#[rustc_legacy_const_generics(3)]
39560pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39561    unsafe {
39562        static_assert_rounding!(ROUNDING);
39563        let a = a.as_f32x4();
39564        let b = b.as_f32x4();
39565        let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
39566        transmute(r)
39567    }
39568}
39569
39570/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39571///
39572/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39573/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39574/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39575/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39576/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39577/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39578///
39579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39580#[inline]
39581#[target_feature(enable = "avx512f")]
39582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39583#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39584#[rustc_legacy_const_generics(2)]
39585pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39586    unsafe {
39587        static_assert_rounding!(ROUNDING);
39588        let a = a.as_f64x2();
39589        let b = b.as_f64x2();
39590        let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
39591        transmute(r)
39592    }
39593}
39594
39595/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39596///
39597/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39598/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39599/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39600/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39601/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39602/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39603///
39604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39605#[inline]
39606#[target_feature(enable = "avx512f")]
39607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39608#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39609#[rustc_legacy_const_generics(4)]
39610pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39611    src: __m128d,
39612    k: __mmask8,
39613    a: __m128d,
39614    b: __m128d,
39615) -> __m128d {
39616    unsafe {
39617        let a = a.as_f64x2();
39618        let b = b.as_f64x2();
39619        let src = src.as_f64x2();
39620        let r = vscalefsd(a, b, src, k, ROUNDING);
39621        transmute(r)
39622    }
39623}
39624
39625/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39626///
39627/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39628/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39629/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39630/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39631/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39632/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39633///
39634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39635#[inline]
39636#[target_feature(enable = "avx512f")]
39637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39638#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39639#[rustc_legacy_const_generics(3)]
39640pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39641    k: __mmask8,
39642    a: __m128d,
39643    b: __m128d,
39644) -> __m128d {
39645    unsafe {
39646        static_assert_rounding!(ROUNDING);
39647        let a = a.as_f64x2();
39648        let b = b.as_f64x2();
39649        let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
39650        transmute(r)
39651    }
39652}
39653
39654/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39655///
39656/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39657/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39658/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39659/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39660/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39661/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39662///
39663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39664#[inline]
39665#[target_feature(enable = "avx512f")]
39666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39667#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39668#[rustc_legacy_const_generics(3)]
39669pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39670    unsafe {
39671        static_assert_rounding!(ROUNDING);
39672        let extracta: f32 = simd_extract!(a, 0);
39673        let extractb: f32 = simd_extract!(b, 0);
39674        let extractc: f32 = simd_extract!(c, 0);
39675        let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39676        simd_insert!(a, 0, r)
39677    }
39678}
39679
39680/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39681///
39682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39688///
39689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39690#[inline]
39691#[target_feature(enable = "avx512f")]
39692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39693#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39694#[rustc_legacy_const_generics(4)]
39695pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39696    a: __m128,
39697    k: __mmask8,
39698    b: __m128,
39699    c: __m128,
39700) -> __m128 {
39701    unsafe {
39702        static_assert_rounding!(ROUNDING);
39703        let mut fmadd: f32 = simd_extract!(a, 0);
39704        if (k & 0b00000001) != 0 {
39705            let extractb: f32 = simd_extract!(b, 0);
39706            let extractc: f32 = simd_extract!(c, 0);
39707            fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
39708        }
39709        simd_insert!(a, 0, fmadd)
39710    }
39711}
39712
39713/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39714///
39715/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39716/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39717/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39718/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39719/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39720/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39721///
39722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39723#[inline]
39724#[target_feature(enable = "avx512f")]
39725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39726#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39727#[rustc_legacy_const_generics(4)]
39728pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39729    k: __mmask8,
39730    a: __m128,
39731    b: __m128,
39732    c: __m128,
39733) -> __m128 {
39734    unsafe {
39735        static_assert_rounding!(ROUNDING);
39736        let mut fmadd: f32 = 0.;
39737        if (k & 0b00000001) != 0 {
39738            let extracta: f32 = simd_extract!(a, 0);
39739            let extractb: f32 = simd_extract!(b, 0);
39740            let extractc: f32 = simd_extract!(c, 0);
39741            fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39742        }
39743        simd_insert!(a, 0, fmadd)
39744    }
39745}
39746
39747/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39748///
39749/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39750/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39751/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39752/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39753/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39754/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39755///
39756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39757#[inline]
39758#[target_feature(enable = "avx512f")]
39759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39760#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39761#[rustc_legacy_const_generics(4)]
39762pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39763    a: __m128,
39764    b: __m128,
39765    c: __m128,
39766    k: __mmask8,
39767) -> __m128 {
39768    unsafe {
39769        static_assert_rounding!(ROUNDING);
39770        let mut fmadd: f32 = simd_extract!(c, 0);
39771        if (k & 0b00000001) != 0 {
39772            let extracta: f32 = simd_extract!(a, 0);
39773            let extractb: f32 = simd_extract!(b, 0);
39774            fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
39775        }
39776        simd_insert!(c, 0, fmadd)
39777    }
39778}
39779
39780/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39781///
39782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39788///
39789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39790#[inline]
39791#[target_feature(enable = "avx512f")]
39792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39793#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39794#[rustc_legacy_const_generics(3)]
39795pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39796    unsafe {
39797        static_assert_rounding!(ROUNDING);
39798        let extracta: f64 = simd_extract!(a, 0);
39799        let extractb: f64 = simd_extract!(b, 0);
39800        let extractc: f64 = simd_extract!(c, 0);
39801        let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39802        simd_insert!(a, 0, fmadd)
39803    }
39804}
39805
39806/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39807///
39808/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39809/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39810/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39811/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39812/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39813/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39814///
39815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39816#[inline]
39817#[target_feature(enable = "avx512f")]
39818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39819#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39820#[rustc_legacy_const_generics(4)]
39821pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39822    a: __m128d,
39823    k: __mmask8,
39824    b: __m128d,
39825    c: __m128d,
39826) -> __m128d {
39827    unsafe {
39828        static_assert_rounding!(ROUNDING);
39829        let mut fmadd: f64 = simd_extract!(a, 0);
39830        if (k & 0b00000001) != 0 {
39831            let extractb: f64 = simd_extract!(b, 0);
39832            let extractc: f64 = simd_extract!(c, 0);
39833            fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
39834        }
39835        simd_insert!(a, 0, fmadd)
39836    }
39837}
39838
39839/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39840///
39841/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39842/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39843/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39844/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39845/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39846/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39847///
39848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39849#[inline]
39850#[target_feature(enable = "avx512f")]
39851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39852#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39853#[rustc_legacy_const_generics(4)]
39854pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39855    k: __mmask8,
39856    a: __m128d,
39857    b: __m128d,
39858    c: __m128d,
39859) -> __m128d {
39860    unsafe {
39861        static_assert_rounding!(ROUNDING);
39862        let mut fmadd: f64 = 0.;
39863        if (k & 0b00000001) != 0 {
39864            let extracta: f64 = simd_extract!(a, 0);
39865            let extractb: f64 = simd_extract!(b, 0);
39866            let extractc: f64 = simd_extract!(c, 0);
39867            fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39868        }
39869        simd_insert!(a, 0, fmadd)
39870    }
39871}
39872
39873/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39874///
39875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39881///
39882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39883#[inline]
39884#[target_feature(enable = "avx512f")]
39885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39886#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39887#[rustc_legacy_const_generics(4)]
39888pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39889    a: __m128d,
39890    b: __m128d,
39891    c: __m128d,
39892    k: __mmask8,
39893) -> __m128d {
39894    unsafe {
39895        static_assert_rounding!(ROUNDING);
39896        let mut fmadd: f64 = simd_extract!(c, 0);
39897        if (k & 0b00000001) != 0 {
39898            let extracta: f64 = simd_extract!(a, 0);
39899            let extractb: f64 = simd_extract!(b, 0);
39900            fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
39901        }
39902        simd_insert!(c, 0, fmadd)
39903    }
39904}
39905
39906/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39907///
39908/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39909/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39910/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39911/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39912/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39913/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39914///
39915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
39916#[inline]
39917#[target_feature(enable = "avx512f")]
39918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39919#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39920#[rustc_legacy_const_generics(3)]
39921pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39922    unsafe {
39923        static_assert_rounding!(ROUNDING);
39924        let extracta: f32 = simd_extract!(a, 0);
39925        let extractb: f32 = simd_extract!(b, 0);
39926        let extractc: f32 = simd_extract!(c, 0);
39927        let extractc = -extractc;
39928        let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39929        simd_insert!(a, 0, fmsub)
39930    }
39931}
39932
39933/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39934///
39935/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39936/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39937/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39938/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39939/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39940/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39941///
39942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
39943#[inline]
39944#[target_feature(enable = "avx512f")]
39945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39946#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39947#[rustc_legacy_const_generics(4)]
39948pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
39949    a: __m128,
39950    k: __mmask8,
39951    b: __m128,
39952    c: __m128,
39953) -> __m128 {
39954    unsafe {
39955        static_assert_rounding!(ROUNDING);
39956        let mut fmsub: f32 = simd_extract!(a, 0);
39957        if (k & 0b00000001) != 0 {
39958            let extractb: f32 = simd_extract!(b, 0);
39959            let extractc: f32 = simd_extract!(c, 0);
39960            let extractc = -extractc;
39961            fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
39962        }
39963        simd_insert!(a, 0, fmsub)
39964    }
39965}
39966
39967/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39968///
39969/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39970/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39971/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39972/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39973/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39974/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39975///
39976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
39977#[inline]
39978#[target_feature(enable = "avx512f")]
39979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39980#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39981#[rustc_legacy_const_generics(4)]
39982pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
39983    k: __mmask8,
39984    a: __m128,
39985    b: __m128,
39986    c: __m128,
39987) -> __m128 {
39988    unsafe {
39989        static_assert_rounding!(ROUNDING);
39990        let mut fmsub: f32 = 0.;
39991        if (k & 0b00000001) != 0 {
39992            let extracta: f32 = simd_extract!(a, 0);
39993            let extractb: f32 = simd_extract!(b, 0);
39994            let extractc: f32 = simd_extract!(c, 0);
39995            let extractc = -extractc;
39996            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39997        }
39998        simd_insert!(a, 0, fmsub)
39999    }
40000}
40001
40002/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40003///
40004/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40005/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40006/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40007/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40008/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40009/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40010///
40011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40012#[inline]
40013#[target_feature(enable = "avx512f")]
40014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40015#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40016#[rustc_legacy_const_generics(4)]
40017pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40018    a: __m128,
40019    b: __m128,
40020    c: __m128,
40021    k: __mmask8,
40022) -> __m128 {
40023    unsafe {
40024        static_assert_rounding!(ROUNDING);
40025        let mut fmsub: f32 = simd_extract!(c, 0);
40026        if (k & 0b00000001) != 0 {
40027            let extracta: f32 = simd_extract!(a, 0);
40028            let extractb: f32 = simd_extract!(b, 0);
40029            let extractc = -fmsub;
40030            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40031        }
40032        simd_insert!(c, 0, fmsub)
40033    }
40034}
40035
40036/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40037///
40038/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40039/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40040/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40041/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40042/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40043/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40044///
40045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40046#[inline]
40047#[target_feature(enable = "avx512f")]
40048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40049#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40050#[rustc_legacy_const_generics(3)]
40051pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40052    unsafe {
40053        static_assert_rounding!(ROUNDING);
40054        let extracta: f64 = simd_extract!(a, 0);
40055        let extractb: f64 = simd_extract!(b, 0);
40056        let extractc: f64 = simd_extract!(c, 0);
40057        let extractc = -extractc;
40058        let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40059        simd_insert!(a, 0, fmsub)
40060    }
40061}
40062
40063/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40064///
40065/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40066/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40067/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40068/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40069/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40070/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40071///
40072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40073#[inline]
40074#[target_feature(enable = "avx512f")]
40075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40076#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40077#[rustc_legacy_const_generics(4)]
40078pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40079    a: __m128d,
40080    k: __mmask8,
40081    b: __m128d,
40082    c: __m128d,
40083) -> __m128d {
40084    unsafe {
40085        static_assert_rounding!(ROUNDING);
40086        let mut fmsub: f64 = simd_extract!(a, 0);
40087        if (k & 0b00000001) != 0 {
40088            let extractb: f64 = simd_extract!(b, 0);
40089            let extractc: f64 = simd_extract!(c, 0);
40090            let extractc = -extractc;
40091            fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
40092        }
40093        simd_insert!(a, 0, fmsub)
40094    }
40095}
40096
40097/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40098///
40099/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40100/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40101/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40102/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40103/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40105///
40106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40107#[inline]
40108#[target_feature(enable = "avx512f")]
40109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40110#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40111#[rustc_legacy_const_generics(4)]
40112pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40113    k: __mmask8,
40114    a: __m128d,
40115    b: __m128d,
40116    c: __m128d,
40117) -> __m128d {
40118    unsafe {
40119        static_assert_rounding!(ROUNDING);
40120        let mut fmsub: f64 = 0.;
40121        if (k & 0b00000001) != 0 {
40122            let extracta: f64 = simd_extract!(a, 0);
40123            let extractb: f64 = simd_extract!(b, 0);
40124            let extractc: f64 = simd_extract!(c, 0);
40125            let extractc = -extractc;
40126            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40127        }
40128        simd_insert!(a, 0, fmsub)
40129    }
40130}
40131
40132/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40133///
40134/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40135/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40136/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40137/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40138/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40139/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40140///
40141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40142#[inline]
40143#[target_feature(enable = "avx512f")]
40144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40145#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40146#[rustc_legacy_const_generics(4)]
40147pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40148    a: __m128d,
40149    b: __m128d,
40150    c: __m128d,
40151    k: __mmask8,
40152) -> __m128d {
40153    unsafe {
40154        static_assert_rounding!(ROUNDING);
40155        let mut fmsub: f64 = simd_extract!(c, 0);
40156        if (k & 0b00000001) != 0 {
40157            let extracta: f64 = simd_extract!(a, 0);
40158            let extractb: f64 = simd_extract!(b, 0);
40159            let extractc = -fmsub;
40160            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40161        }
40162        simd_insert!(c, 0, fmsub)
40163    }
40164}
40165
40166/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40167///
40168/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40169/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40170/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40171/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40172/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40173/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40174///
40175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40176#[inline]
40177#[target_feature(enable = "avx512f")]
40178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40179#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40180#[rustc_legacy_const_generics(3)]
40181pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40182    unsafe {
40183        static_assert_rounding!(ROUNDING);
40184        let extracta: f32 = simd_extract!(a, 0);
40185        let extracta = -extracta;
40186        let extractb: f32 = simd_extract!(b, 0);
40187        let extractc: f32 = simd_extract!(c, 0);
40188        let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40189        simd_insert!(a, 0, fnmadd)
40190    }
40191}
40192
40193/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40194///
40195/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40196/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40197/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40198/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40199/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40200/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40201///
40202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40203#[inline]
40204#[target_feature(enable = "avx512f")]
40205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40206#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40207#[rustc_legacy_const_generics(4)]
40208pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40209    a: __m128,
40210    k: __mmask8,
40211    b: __m128,
40212    c: __m128,
40213) -> __m128 {
40214    unsafe {
40215        static_assert_rounding!(ROUNDING);
40216        let mut fnmadd: f32 = simd_extract!(a, 0);
40217        if (k & 0b00000001) != 0 {
40218            let extracta = -fnmadd;
40219            let extractb: f32 = simd_extract!(b, 0);
40220            let extractc: f32 = simd_extract!(c, 0);
40221            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40222        }
40223        simd_insert!(a, 0, fnmadd)
40224    }
40225}
40226
40227/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40228///
40229/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40230/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40231/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40232/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40233/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40234/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40235///
40236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40237#[inline]
40238#[target_feature(enable = "avx512f")]
40239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40240#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40241#[rustc_legacy_const_generics(4)]
40242pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40243    k: __mmask8,
40244    a: __m128,
40245    b: __m128,
40246    c: __m128,
40247) -> __m128 {
40248    unsafe {
40249        static_assert_rounding!(ROUNDING);
40250        let mut fnmadd: f32 = 0.;
40251        if (k & 0b00000001) != 0 {
40252            let extracta: f32 = simd_extract!(a, 0);
40253            let extracta = -extracta;
40254            let extractb: f32 = simd_extract!(b, 0);
40255            let extractc: f32 = simd_extract!(c, 0);
40256            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40257        }
40258        simd_insert!(a, 0, fnmadd)
40259    }
40260}
40261
40262/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40263///
40264/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40265/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40266/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40267/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40268/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40269/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40270///
40271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40272#[inline]
40273#[target_feature(enable = "avx512f")]
40274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40275#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40276#[rustc_legacy_const_generics(4)]
40277pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40278    a: __m128,
40279    b: __m128,
40280    c: __m128,
40281    k: __mmask8,
40282) -> __m128 {
40283    unsafe {
40284        static_assert_rounding!(ROUNDING);
40285        let mut fnmadd: f32 = simd_extract!(c, 0);
40286        if (k & 0b00000001) != 0 {
40287            let extracta: f32 = simd_extract!(a, 0);
40288            let extracta = -extracta;
40289            let extractb: f32 = simd_extract!(b, 0);
40290            fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
40291        }
40292        simd_insert!(c, 0, fnmadd)
40293    }
40294}
40295
40296/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40297///
40298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40304///
40305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40306#[inline]
40307#[target_feature(enable = "avx512f")]
40308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40309#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40310#[rustc_legacy_const_generics(3)]
40311pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40312    unsafe {
40313        static_assert_rounding!(ROUNDING);
40314        let extracta: f64 = simd_extract!(a, 0);
40315        let extracta = -extracta;
40316        let extractb: f64 = simd_extract!(b, 0);
40317        let extractc: f64 = simd_extract!(c, 0);
40318        let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40319        simd_insert!(a, 0, fnmadd)
40320    }
40321}
40322
40323/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40324///
40325/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40326/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40327/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40328/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40329/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40330/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40331///
40332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40333#[inline]
40334#[target_feature(enable = "avx512f")]
40335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40336#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40337#[rustc_legacy_const_generics(4)]
40338pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40339    a: __m128d,
40340    k: __mmask8,
40341    b: __m128d,
40342    c: __m128d,
40343) -> __m128d {
40344    unsafe {
40345        static_assert_rounding!(ROUNDING);
40346        let mut fnmadd: f64 = simd_extract!(a, 0);
40347        if (k & 0b00000001) != 0 {
40348            let extracta = -fnmadd;
40349            let extractb: f64 = simd_extract!(b, 0);
40350            let extractc: f64 = simd_extract!(c, 0);
40351            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40352        }
40353        simd_insert!(a, 0, fnmadd)
40354    }
40355}
40356
40357/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40358///
40359/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40360/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40361/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40362/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40363/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40364/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40365///
40366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40367#[inline]
40368#[target_feature(enable = "avx512f")]
40369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40370#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40371#[rustc_legacy_const_generics(4)]
40372pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40373    k: __mmask8,
40374    a: __m128d,
40375    b: __m128d,
40376    c: __m128d,
40377) -> __m128d {
40378    unsafe {
40379        static_assert_rounding!(ROUNDING);
40380        let mut fnmadd: f64 = 0.;
40381        if (k & 0b00000001) != 0 {
40382            let extracta: f64 = simd_extract!(a, 0);
40383            let extracta = -extracta;
40384            let extractb: f64 = simd_extract!(b, 0);
40385            let extractc: f64 = simd_extract!(c, 0);
40386            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40387        }
40388        simd_insert!(a, 0, fnmadd)
40389    }
40390}
40391
40392/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40393///
40394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40400///
40401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40402#[inline]
40403#[target_feature(enable = "avx512f")]
40404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40405#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40406#[rustc_legacy_const_generics(4)]
40407pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40408    a: __m128d,
40409    b: __m128d,
40410    c: __m128d,
40411    k: __mmask8,
40412) -> __m128d {
40413    unsafe {
40414        static_assert_rounding!(ROUNDING);
40415        let mut fnmadd: f64 = simd_extract!(c, 0);
40416        if (k & 0b00000001) != 0 {
40417            let extracta: f64 = simd_extract!(a, 0);
40418            let extracta = -extracta;
40419            let extractb: f64 = simd_extract!(b, 0);
40420            fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
40421        }
40422        simd_insert!(c, 0, fnmadd)
40423    }
40424}
40425
40426/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40427///
40428/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40429/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40430/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40431/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40432/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40433/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40434///
40435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40436#[inline]
40437#[target_feature(enable = "avx512f")]
40438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40439#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40440#[rustc_legacy_const_generics(3)]
40441pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40442    unsafe {
40443        static_assert_rounding!(ROUNDING);
40444        let extracta: f32 = simd_extract!(a, 0);
40445        let extracta = -extracta;
40446        let extractb: f32 = simd_extract!(b, 0);
40447        let extractc: f32 = simd_extract!(c, 0);
40448        let extractc = -extractc;
40449        let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40450        simd_insert!(a, 0, fnmsub)
40451    }
40452}
40453
40454/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40455///
40456/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40457/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40458/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40459/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40460/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40461/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40462///
40463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40464#[inline]
40465#[target_feature(enable = "avx512f")]
40466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40467#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40468#[rustc_legacy_const_generics(4)]
40469pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40470    a: __m128,
40471    k: __mmask8,
40472    b: __m128,
40473    c: __m128,
40474) -> __m128 {
40475    unsafe {
40476        static_assert_rounding!(ROUNDING);
40477        let mut fnmsub: f32 = simd_extract!(a, 0);
40478        if (k & 0b00000001) != 0 {
40479            let extracta = -fnmsub;
40480            let extractb: f32 = simd_extract!(b, 0);
40481            let extractc: f32 = simd_extract!(c, 0);
40482            let extractc = -extractc;
40483            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40484        }
40485        simd_insert!(a, 0, fnmsub)
40486    }
40487}
40488
40489/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40490///
40491/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40492/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40493/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40494/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40495/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40496/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40497///
40498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40499#[inline]
40500#[target_feature(enable = "avx512f")]
40501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40502#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40503#[rustc_legacy_const_generics(4)]
40504pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40505    k: __mmask8,
40506    a: __m128,
40507    b: __m128,
40508    c: __m128,
40509) -> __m128 {
40510    unsafe {
40511        static_assert_rounding!(ROUNDING);
40512        let mut fnmsub: f32 = 0.;
40513        if (k & 0b00000001) != 0 {
40514            let extracta: f32 = simd_extract!(a, 0);
40515            let extracta = -extracta;
40516            let extractb: f32 = simd_extract!(b, 0);
40517            let extractc: f32 = simd_extract!(c, 0);
40518            let extractc = -extractc;
40519            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40520        }
40521        simd_insert!(a, 0, fnmsub)
40522    }
40523}
40524
40525/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40526///
40527/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40528/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40529/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40530/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40531/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40532/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40533///
40534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40535#[inline]
40536#[target_feature(enable = "avx512f")]
40537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40538#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40539#[rustc_legacy_const_generics(4)]
40540pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40541    a: __m128,
40542    b: __m128,
40543    c: __m128,
40544    k: __mmask8,
40545) -> __m128 {
40546    unsafe {
40547        static_assert_rounding!(ROUNDING);
40548        let mut fnmsub: f32 = simd_extract!(c, 0);
40549        if (k & 0b00000001) != 0 {
40550            let extracta: f32 = simd_extract!(a, 0);
40551            let extracta = -extracta;
40552            let extractb: f32 = simd_extract!(b, 0);
40553            let extractc = -fnmsub;
40554            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40555        }
40556        simd_insert!(c, 0, fnmsub)
40557    }
40558}
40559
40560/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40561///
40562/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40563/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40564/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40565/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40566/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40567/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40568///
40569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40570#[inline]
40571#[target_feature(enable = "avx512f")]
40572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40573#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40574#[rustc_legacy_const_generics(3)]
40575pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40576    unsafe {
40577        static_assert_rounding!(ROUNDING);
40578        let extracta: f64 = simd_extract!(a, 0);
40579        let extracta = -extracta;
40580        let extractb: f64 = simd_extract!(b, 0);
40581        let extractc: f64 = simd_extract!(c, 0);
40582        let extractc = -extractc;
40583        let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40584        simd_insert!(a, 0, fnmsub)
40585    }
40586}
40587
40588/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40589///
40590/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40591/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40592/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40593/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40594/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40595/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40596///
40597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40598#[inline]
40599#[target_feature(enable = "avx512f")]
40600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40601#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40602#[rustc_legacy_const_generics(4)]
40603pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40604    a: __m128d,
40605    k: __mmask8,
40606    b: __m128d,
40607    c: __m128d,
40608) -> __m128d {
40609    unsafe {
40610        static_assert_rounding!(ROUNDING);
40611        let mut fnmsub: f64 = simd_extract!(a, 0);
40612        if (k & 0b00000001) != 0 {
40613            let extracta = -fnmsub;
40614            let extractb: f64 = simd_extract!(b, 0);
40615            let extractc: f64 = simd_extract!(c, 0);
40616            let extractc = -extractc;
40617            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40618        }
40619        simd_insert!(a, 0, fnmsub)
40620    }
40621}
40622
40623/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40624///
40625/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40626/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40627/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40628/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40629/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40630/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40631///
40632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40633#[inline]
40634#[target_feature(enable = "avx512f")]
40635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40636#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40637#[rustc_legacy_const_generics(4)]
40638pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40639    k: __mmask8,
40640    a: __m128d,
40641    b: __m128d,
40642    c: __m128d,
40643) -> __m128d {
40644    unsafe {
40645        static_assert_rounding!(ROUNDING);
40646        let mut fnmsub: f64 = 0.;
40647        if (k & 0b00000001) != 0 {
40648            let extracta: f64 = simd_extract!(a, 0);
40649            let extracta = -extracta;
40650            let extractb: f64 = simd_extract!(b, 0);
40651            let extractc: f64 = simd_extract!(c, 0);
40652            let extractc = -extractc;
40653            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40654        }
40655        simd_insert!(a, 0, fnmsub)
40656    }
40657}
40658
40659/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40660///
40661/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40662/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40663/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40664/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40665/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40666/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40667///
40668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40669#[inline]
40670#[target_feature(enable = "avx512f")]
40671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40672#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40673#[rustc_legacy_const_generics(4)]
40674pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40675    a: __m128d,
40676    b: __m128d,
40677    c: __m128d,
40678    k: __mmask8,
40679) -> __m128d {
40680    unsafe {
40681        static_assert_rounding!(ROUNDING);
40682        let mut fnmsub: f64 = simd_extract!(c, 0);
40683        if (k & 0b00000001) != 0 {
40684            let extracta: f64 = simd_extract!(a, 0);
40685            let extracta = -extracta;
40686            let extractb: f64 = simd_extract!(b, 0);
40687            let extractc = -fnmsub;
40688            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40689        }
40690        simd_insert!(c, 0, fnmsub)
40691    }
40692}
40693
40694/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40695///
40696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40697#[inline]
40698#[target_feature(enable = "avx512f")]
40699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40700#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40701#[rustc_legacy_const_generics(3)]
40702pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40703    unsafe {
40704        static_assert_uimm_bits!(IMM8, 8);
40705        let a = a.as_f32x4();
40706        let b = b.as_f32x4();
40707        let c = c.as_i32x4();
40708        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40709        let fixupimm: f32 = simd_extract!(r, 0);
40710        let r = simd_insert!(a, 0, fixupimm);
40711        transmute(r)
40712    }
40713}
40714
40715/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40716///
40717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40718#[inline]
40719#[target_feature(enable = "avx512f")]
40720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40721#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40722#[rustc_legacy_const_generics(4)]
40723pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40724    a: __m128,
40725    k: __mmask8,
40726    b: __m128,
40727    c: __m128i,
40728) -> __m128 {
40729    unsafe {
40730        static_assert_uimm_bits!(IMM8, 8);
40731        let a = a.as_f32x4();
40732        let b = b.as_f32x4();
40733        let c = c.as_i32x4();
40734        let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40735        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40736        let r = simd_insert!(a, 0, fixupimm);
40737        transmute(r)
40738    }
40739}
40740
40741/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40742///
40743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40744#[inline]
40745#[target_feature(enable = "avx512f")]
40746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40747#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40748#[rustc_legacy_const_generics(4)]
40749pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40750    k: __mmask8,
40751    a: __m128,
40752    b: __m128,
40753    c: __m128i,
40754) -> __m128 {
40755    unsafe {
40756        static_assert_uimm_bits!(IMM8, 8);
40757        let a = a.as_f32x4();
40758        let b = b.as_f32x4();
40759        let c = c.as_i32x4();
40760        let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40761        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40762        let r = simd_insert!(a, 0, fixupimm);
40763        transmute(r)
40764    }
40765}
40766
40767/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40768///
40769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40770#[inline]
40771#[target_feature(enable = "avx512f")]
40772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40773#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40774#[rustc_legacy_const_generics(3)]
40775pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40776    unsafe {
40777        static_assert_uimm_bits!(IMM8, 8);
40778        let a = a.as_f64x2();
40779        let b = b.as_f64x2();
40780        let c = c.as_i64x2();
40781        let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40782        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40783        let r = simd_insert!(a, 0, fixupimm);
40784        transmute(r)
40785    }
40786}
40787
40788/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40789///
40790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40791#[inline]
40792#[target_feature(enable = "avx512f")]
40793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40794#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40795#[rustc_legacy_const_generics(4)]
40796pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40797    a: __m128d,
40798    k: __mmask8,
40799    b: __m128d,
40800    c: __m128i,
40801) -> __m128d {
40802    unsafe {
40803        static_assert_uimm_bits!(IMM8, 8);
40804        let a = a.as_f64x2();
40805        let b = b.as_f64x2();
40806        let c = c.as_i64x2();
40807        let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40808        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40809        let r = simd_insert!(a, 0, fixupimm);
40810        transmute(r)
40811    }
40812}
40813
40814/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40815///
40816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40817#[inline]
40818#[target_feature(enable = "avx512f")]
40819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40820#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40821#[rustc_legacy_const_generics(4)]
40822pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40823    k: __mmask8,
40824    a: __m128d,
40825    b: __m128d,
40826    c: __m128i,
40827) -> __m128d {
40828    unsafe {
40829        static_assert_uimm_bits!(IMM8, 8);
40830        let a = a.as_f64x2();
40831        let b = b.as_f64x2();
40832        let c = c.as_i64x2();
40833        let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40834        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40835        let r = simd_insert!(a, 0, fixupimm);
40836        transmute(r)
40837    }
40838}
40839
40840/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40841/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40842///
40843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40844#[inline]
40845#[target_feature(enable = "avx512f")]
40846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40847#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40848#[rustc_legacy_const_generics(3, 4)]
40849pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40850    a: __m128,
40851    b: __m128,
40852    c: __m128i,
40853) -> __m128 {
40854    unsafe {
40855        static_assert_uimm_bits!(IMM8, 8);
40856        static_assert_mantissas_sae!(SAE);
40857        let a = a.as_f32x4();
40858        let b = b.as_f32x4();
40859        let c = c.as_i32x4();
40860        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
40861        let fixupimm: f32 = simd_extract!(r, 0);
40862        let r = simd_insert!(a, 0, fixupimm);
40863        transmute(r)
40864    }
40865}
40866
40867/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40868/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40869///
40870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40871#[inline]
40872#[target_feature(enable = "avx512f")]
40873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40874#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40875#[rustc_legacy_const_generics(4, 5)]
40876pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40877    a: __m128,
40878    k: __mmask8,
40879    b: __m128,
40880    c: __m128i,
40881) -> __m128 {
40882    unsafe {
40883        static_assert_uimm_bits!(IMM8, 8);
40884        static_assert_mantissas_sae!(SAE);
40885        let a = a.as_f32x4();
40886        let b = b.as_f32x4();
40887        let c = c.as_i32x4();
40888        let r = vfixupimmss(a, b, c, IMM8, k, SAE);
40889        let fixupimm: f32 = simd_extract!(r, 0);
40890        let r = simd_insert!(a, 0, fixupimm);
40891        transmute(r)
40892    }
40893}
40894
40895/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40896/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40897///
40898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40899#[inline]
40900#[target_feature(enable = "avx512f")]
40901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40902#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40903#[rustc_legacy_const_generics(4, 5)]
40904pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40905    k: __mmask8,
40906    a: __m128,
40907    b: __m128,
40908    c: __m128i,
40909) -> __m128 {
40910    unsafe {
40911        static_assert_uimm_bits!(IMM8, 8);
40912        static_assert_mantissas_sae!(SAE);
40913        let a = a.as_f32x4();
40914        let b = b.as_f32x4();
40915        let c = c.as_i32x4();
40916        let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
40917        let fixupimm: f32 = simd_extract!(r, 0);
40918        let r = simd_insert!(a, 0, fixupimm);
40919        transmute(r)
40920    }
40921}
40922
40923/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40924/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40925///
40926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
40927#[inline]
40928#[target_feature(enable = "avx512f")]
40929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40930#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40931#[rustc_legacy_const_generics(3, 4)]
40932pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40933    a: __m128d,
40934    b: __m128d,
40935    c: __m128i,
40936) -> __m128d {
40937    unsafe {
40938        static_assert_uimm_bits!(IMM8, 8);
40939        static_assert_mantissas_sae!(SAE);
40940        let a = a.as_f64x2();
40941        let b = b.as_f64x2();
40942        let c = c.as_i64x2();
40943        let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
40944        let fixupimm: f64 = simd_extract!(r, 0);
40945        let r = simd_insert!(a, 0, fixupimm);
40946        transmute(r)
40947    }
40948}
40949
40950/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40951/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40952///
40953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
40954#[inline]
40955#[target_feature(enable = "avx512f")]
40956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40957#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40958#[rustc_legacy_const_generics(4, 5)]
40959pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40960    a: __m128d,
40961    k: __mmask8,
40962    b: __m128d,
40963    c: __m128i,
40964) -> __m128d {
40965    unsafe {
40966        static_assert_uimm_bits!(IMM8, 8);
40967        static_assert_mantissas_sae!(SAE);
40968        let a = a.as_f64x2();
40969        let b = b.as_f64x2();
40970        let c = c.as_i64x2();
40971        let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
40972        let fixupimm: f64 = simd_extract!(r, 0);
40973        let r = simd_insert!(a, 0, fixupimm);
40974        transmute(r)
40975    }
40976}
40977
40978/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40979/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40980///
40981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
40982#[inline]
40983#[target_feature(enable = "avx512f")]
40984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40985#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40986#[rustc_legacy_const_generics(4, 5)]
40987pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40988    k: __mmask8,
40989    a: __m128d,
40990    b: __m128d,
40991    c: __m128i,
40992) -> __m128d {
40993    unsafe {
40994        static_assert_uimm_bits!(IMM8, 8);
40995        static_assert_mantissas_sae!(SAE);
40996        let a = a.as_f64x2();
40997        let b = b.as_f64x2();
40998        let c = c.as_i64x2();
40999        let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
41000        let fixupimm: f64 = simd_extract!(r, 0);
41001        let r = simd_insert!(a, 0, fixupimm);
41002        transmute(r)
41003    }
41004}
41005
41006/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41007///
41008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41009#[inline]
41010#[target_feature(enable = "avx512f")]
41011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41012#[cfg_attr(test, assert_instr(vcvtss2sd))]
41013pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41014    unsafe {
41015        transmute(vcvtss2sd(
41016            a.as_f64x2(),
41017            b.as_f32x4(),
41018            src.as_f64x2(),
41019            k,
41020            _MM_FROUND_CUR_DIRECTION,
41021        ))
41022    }
41023}
41024
41025/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41026///
41027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41028#[inline]
41029#[target_feature(enable = "avx512f")]
41030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41031#[cfg_attr(test, assert_instr(vcvtss2sd))]
41032pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41033    unsafe {
41034        transmute(vcvtss2sd(
41035            a.as_f64x2(),
41036            b.as_f32x4(),
41037            f64x2::ZERO,
41038            k,
41039            _MM_FROUND_CUR_DIRECTION,
41040        ))
41041    }
41042}
41043
41044/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41045///
41046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41047#[inline]
41048#[target_feature(enable = "avx512f")]
41049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41050#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41051pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41052    unsafe {
41053        transmute(vcvtsd2ss(
41054            a.as_f32x4(),
41055            b.as_f64x2(),
41056            src.as_f32x4(),
41057            k,
41058            _MM_FROUND_CUR_DIRECTION,
41059        ))
41060    }
41061}
41062
41063/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41064///
41065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41066#[inline]
41067#[target_feature(enable = "avx512f")]
41068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41069#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41070pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41071    unsafe {
41072        transmute(vcvtsd2ss(
41073            a.as_f32x4(),
41074            b.as_f64x2(),
41075            f32x4::ZERO,
41076            k,
41077            _MM_FROUND_CUR_DIRECTION,
41078        ))
41079    }
41080}
41081
41082/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41083/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41084///
41085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41086#[inline]
41087#[target_feature(enable = "avx512f")]
41088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41089#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41090#[rustc_legacy_const_generics(2)]
41091pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41092    unsafe {
41093        static_assert_sae!(SAE);
41094        let a = a.as_f64x2();
41095        let b = b.as_f32x4();
41096        let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
41097        transmute(r)
41098    }
41099}
41100
41101/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41102/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41103///
41104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41105#[inline]
41106#[target_feature(enable = "avx512f")]
41107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41108#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41109#[rustc_legacy_const_generics(4)]
41110pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41111    src: __m128d,
41112    k: __mmask8,
41113    a: __m128d,
41114    b: __m128,
41115) -> __m128d {
41116    unsafe {
41117        static_assert_sae!(SAE);
41118        let a = a.as_f64x2();
41119        let b = b.as_f32x4();
41120        let src = src.as_f64x2();
41121        let r = vcvtss2sd(a, b, src, k, SAE);
41122        transmute(r)
41123    }
41124}
41125
41126/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41127/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41128///
41129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41130#[inline]
41131#[target_feature(enable = "avx512f")]
41132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41133#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41134#[rustc_legacy_const_generics(3)]
41135pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41136    unsafe {
41137        static_assert_sae!(SAE);
41138        let a = a.as_f64x2();
41139        let b = b.as_f32x4();
41140        let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
41141        transmute(r)
41142    }
41143}
41144
41145/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41146/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41147/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41148/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41149/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41150/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41151/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41152///
41153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41154#[inline]
41155#[target_feature(enable = "avx512f")]
41156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41157#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41158#[rustc_legacy_const_generics(2)]
41159pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41160    unsafe {
41161        static_assert_rounding!(ROUNDING);
41162        let a = a.as_f32x4();
41163        let b = b.as_f64x2();
41164        let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
41165        transmute(r)
41166    }
41167}
41168
41169/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41170/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41171/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41172/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41173/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41174/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41175/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41176///
41177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41178#[inline]
41179#[target_feature(enable = "avx512f")]
41180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41181#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41182#[rustc_legacy_const_generics(4)]
41183pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41184    src: __m128,
41185    k: __mmask8,
41186    a: __m128,
41187    b: __m128d,
41188) -> __m128 {
41189    unsafe {
41190        static_assert_rounding!(ROUNDING);
41191        let a = a.as_f32x4();
41192        let b = b.as_f64x2();
41193        let src = src.as_f32x4();
41194        let r = vcvtsd2ss(a, b, src, k, ROUNDING);
41195        transmute(r)
41196    }
41197}
41198
41199/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41200/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41201/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41202/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41203/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41204/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41205/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41206///
41207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41208#[inline]
41209#[target_feature(enable = "avx512f")]
41210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41211#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41212#[rustc_legacy_const_generics(3)]
41213pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41214    unsafe {
41215        static_assert_rounding!(ROUNDING);
41216        let a = a.as_f32x4();
41217        let b = b.as_f64x2();
41218        let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
41219        transmute(r)
41220    }
41221}
41222
41223/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41224/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41225/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41226/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41227/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41228/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41229/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41230///
41231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41232#[inline]
41233#[target_feature(enable = "avx512f")]
41234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41235#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41236#[rustc_legacy_const_generics(1)]
41237pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41238    unsafe {
41239        static_assert_rounding!(ROUNDING);
41240        let a = a.as_f32x4();
41241        vcvtss2si(a, ROUNDING)
41242    }
41243}
41244
41245/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41246/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41247/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41248/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41249/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41250/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41251/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41252///
41253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41254#[inline]
41255#[target_feature(enable = "avx512f")]
41256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41257#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41258#[rustc_legacy_const_generics(1)]
41259pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41260    unsafe {
41261        static_assert_rounding!(ROUNDING);
41262        let a = a.as_f32x4();
41263        vcvtss2si(a, ROUNDING)
41264    }
41265}
41266
41267/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41268/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41269/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41270/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41271/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41272/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41273/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41274///
41275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41276#[inline]
41277#[target_feature(enable = "avx512f")]
41278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41279#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
41280#[rustc_legacy_const_generics(1)]
41281pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41282    unsafe {
41283        static_assert_rounding!(ROUNDING);
41284        let a = a.as_f32x4();
41285        vcvtss2usi(a, ROUNDING)
41286    }
41287}
41288
41289/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41290///
41291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41292#[inline]
41293#[target_feature(enable = "avx512f")]
41294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41295#[cfg_attr(test, assert_instr(vcvtss2si))]
41296pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41297    unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41298}
41299
41300/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41301///
41302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41303#[inline]
41304#[target_feature(enable = "avx512f")]
41305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41306#[cfg_attr(test, assert_instr(vcvtss2usi))]
41307pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41308    unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41309}
41310
41311/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41312/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41313/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41314/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41315/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41316/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41317/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41318///
41319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41320#[inline]
41321#[target_feature(enable = "avx512f")]
41322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41323#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41324#[rustc_legacy_const_generics(1)]
41325pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41326    unsafe {
41327        static_assert_rounding!(ROUNDING);
41328        let a = a.as_f64x2();
41329        vcvtsd2si(a, ROUNDING)
41330    }
41331}
41332
41333/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41334/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41335/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41336/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41337/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41338/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41339/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41340///
41341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41342#[inline]
41343#[target_feature(enable = "avx512f")]
41344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41345#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41346#[rustc_legacy_const_generics(1)]
41347pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41348    unsafe {
41349        static_assert_rounding!(ROUNDING);
41350        let a = a.as_f64x2();
41351        vcvtsd2si(a, ROUNDING)
41352    }
41353}
41354
41355/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41356/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41357/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41358/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41359/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41360/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41361/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41362///
41363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41364#[inline]
41365#[target_feature(enable = "avx512f")]
41366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41367#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
41368#[rustc_legacy_const_generics(1)]
41369pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41370    unsafe {
41371        static_assert_rounding!(ROUNDING);
41372        let a = a.as_f64x2();
41373        vcvtsd2usi(a, ROUNDING)
41374    }
41375}
41376
41377/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41378///
41379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41380#[inline]
41381#[target_feature(enable = "avx512f")]
41382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41383#[cfg_attr(test, assert_instr(vcvtsd2si))]
41384pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41385    unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41386}
41387
41388/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41389///
41390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41391#[inline]
41392#[target_feature(enable = "avx512f")]
41393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41394#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41395pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41396    unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41397}
41398
41399/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41400///
41401/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41402/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41403/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41404/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41405/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41406/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41407///
41408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41409#[inline]
41410#[target_feature(enable = "avx512f")]
41411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41412#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41413#[rustc_legacy_const_generics(2)]
41414pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41415    unsafe {
41416        static_assert_rounding!(ROUNDING);
41417        let a = a.as_f32x4();
41418        let r = vcvtsi2ss(a, b, ROUNDING);
41419        transmute(r)
41420    }
41421}
41422
41423/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41424///
41425/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41426/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41427/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41428/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41429/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41430/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41431///
41432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41433#[inline]
41434#[target_feature(enable = "avx512f")]
41435#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41436#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41437#[rustc_legacy_const_generics(2)]
41438pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41439    unsafe {
41440        static_assert_rounding!(ROUNDING);
41441        let a = a.as_f32x4();
41442        let r = vcvtsi2ss(a, b, ROUNDING);
41443        transmute(r)
41444    }
41445}
41446
41447/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41448/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41449/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41450/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41451/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41452/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41453/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41454///
41455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41456#[inline]
41457#[target_feature(enable = "avx512f")]
41458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41459#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
41460#[rustc_legacy_const_generics(2)]
41461pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41462    unsafe {
41463        static_assert_rounding!(ROUNDING);
41464        let a = a.as_f32x4();
41465        let r = vcvtusi2ss(a, b, ROUNDING);
41466        transmute(r)
41467    }
41468}
41469
41470/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41471///
41472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41473#[inline]
41474#[target_feature(enable = "avx512f")]
41475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41476#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41477pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41478    unsafe {
41479        let b = b as f32;
41480        simd_insert!(a, 0, b)
41481    }
41482}
41483
41484/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41485///
41486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41487#[inline]
41488#[target_feature(enable = "avx512f")]
41489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41490#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41491pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41492    unsafe {
41493        let b = b as f64;
41494        simd_insert!(a, 0, b)
41495    }
41496}
41497
41498/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41499/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41500///
41501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41502#[inline]
41503#[target_feature(enable = "avx512f")]
41504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41505#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41506#[rustc_legacy_const_generics(1)]
41507pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41508    unsafe {
41509        static_assert_sae!(SAE);
41510        let a = a.as_f32x4();
41511        vcvttss2si(a, SAE)
41512    }
41513}
41514
41515/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41516/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41517///
41518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41519#[inline]
41520#[target_feature(enable = "avx512f")]
41521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41522#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41523#[rustc_legacy_const_generics(1)]
41524pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41525    unsafe {
41526        static_assert_sae!(SAE);
41527        let a = a.as_f32x4();
41528        vcvttss2si(a, SAE)
41529    }
41530}
41531
41532/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41533/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41534///
41535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41536#[inline]
41537#[target_feature(enable = "avx512f")]
41538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41539#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
41540#[rustc_legacy_const_generics(1)]
41541pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41542    unsafe {
41543        static_assert_sae!(SAE);
41544        let a = a.as_f32x4();
41545        vcvttss2usi(a, SAE)
41546    }
41547}
41548
41549/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41550///
41551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41552#[inline]
41553#[target_feature(enable = "avx512f")]
41554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41555#[cfg_attr(test, assert_instr(vcvttss2si))]
41556pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41557    unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41558}
41559
41560/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41561///
41562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41563#[inline]
41564#[target_feature(enable = "avx512f")]
41565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41566#[cfg_attr(test, assert_instr(vcvttss2usi))]
41567pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41568    unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41569}
41570
41571/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41572/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41573///
41574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41575#[inline]
41576#[target_feature(enable = "avx512f")]
41577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41578#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41579#[rustc_legacy_const_generics(1)]
41580pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41581    unsafe {
41582        static_assert_sae!(SAE);
41583        let a = a.as_f64x2();
41584        vcvttsd2si(a, SAE)
41585    }
41586}
41587
41588/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41589/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41590///
41591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41592#[inline]
41593#[target_feature(enable = "avx512f")]
41594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41595#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41596#[rustc_legacy_const_generics(1)]
41597pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41598    unsafe {
41599        static_assert_sae!(SAE);
41600        let a = a.as_f64x2();
41601        vcvttsd2si(a, SAE)
41602    }
41603}
41604
41605/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41606/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41607///
41608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41609#[inline]
41610#[target_feature(enable = "avx512f")]
41611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41612#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
41613#[rustc_legacy_const_generics(1)]
41614pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41615    unsafe {
41616        static_assert_sae!(SAE);
41617        let a = a.as_f64x2();
41618        vcvttsd2usi(a, SAE)
41619    }
41620}
41621
41622/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41623///
41624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41625#[inline]
41626#[target_feature(enable = "avx512f")]
41627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41628#[cfg_attr(test, assert_instr(vcvttsd2si))]
41629pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41630    unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41631}
41632
41633/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41634///
41635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41636#[inline]
41637#[target_feature(enable = "avx512f")]
41638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41639#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41640pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41641    unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41642}
41643
41644/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41645///
41646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41647#[inline]
41648#[target_feature(enable = "avx512f")]
41649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41650#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41651pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41652    unsafe {
41653        let b = b as f32;
41654        simd_insert!(a, 0, b)
41655    }
41656}
41657
41658/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41659///
41660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41661#[inline]
41662#[target_feature(enable = "avx512f")]
41663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41664#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41665pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41666    unsafe {
41667        let b = b as f64;
41668        simd_insert!(a, 0, b)
41669    }
41670}
41671
41672/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41673/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41674///
41675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41676#[inline]
41677#[target_feature(enable = "avx512f")]
41678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41679#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
41680#[rustc_legacy_const_generics(2, 3)]
41681pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41682    unsafe {
41683        static_assert_uimm_bits!(IMM5, 5);
41684        static_assert_mantissas_sae!(SAE);
41685        let a = a.as_f32x4();
41686        let b = b.as_f32x4();
41687        vcomiss(a, b, IMM5, SAE)
41688    }
41689}
41690
41691/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41692/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41693///
41694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41695#[inline]
41696#[target_feature(enable = "avx512f")]
41697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41698#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
41699#[rustc_legacy_const_generics(2, 3)]
41700pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41701    unsafe {
41702        static_assert_uimm_bits!(IMM5, 5);
41703        static_assert_mantissas_sae!(SAE);
41704        let a = a.as_f64x2();
41705        let b = b.as_f64x2();
41706        vcomisd(a, b, IMM5, SAE)
41707    }
41708}
41709
41710/// Equal
41711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41712pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
41713/// Less-than
41714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41715pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
41716/// Less-than-or-equal
41717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41718pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
41719/// False
41720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41721pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
41722/// Not-equal
41723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41724pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
41725/// Not less-than
41726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41727pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
41728/// Not less-than-or-equal
41729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41730pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
41731/// True
41732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41733pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
41734
41735/// interval [1, 2)
41736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41737pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
41738/// interval [0.5, 2)
41739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41740pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
41741/// interval [0.5, 1)
41742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41743pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
41744/// interval [0.75, 1.5)
41745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41746pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
41747
41748/// sign = sign(SRC)
41749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41750pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
41751/// sign = 0
41752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41753pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
41754/// DEST = NaN if sign(SRC) = 1
41755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41756pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
41757
41758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41759pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
41760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41761pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
41762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41763pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
41764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41765pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
41766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41767pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
41768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41769pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
41770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41771pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
41772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41773pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
41774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41775pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
41776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41777pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
41778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41779pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
41780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41781pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
41782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41783pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
41784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41785pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
41786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41787pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
41788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41789pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
41790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41791pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
41792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41793pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
41794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41795pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
41796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41797pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
41798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41799pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
41800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41801pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
41802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41803pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
41804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41805pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
41806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41807pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
41808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41809pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
41810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41811pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
41812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41813pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
41814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41815pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
41816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41817pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
41818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41819pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
41820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41821pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
41822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41823pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
41824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41825pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
41826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41827pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
41828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41829pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
41830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41831pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
41832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41833pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
41834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41835pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
41836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41837pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
41838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41839pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
41840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41841pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
41842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41843pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
41844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41845pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
41846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41847pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
41848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41849pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
41850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41851pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
41852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41853pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
41854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41855pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
41856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41857pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
41858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41859pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
41860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41861pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
41862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41863pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
41864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41865pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
41866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41867pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
41868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41869pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
41870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41871pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
41872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41873pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
41874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41875pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
41876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41877pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
41878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41879pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
41880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41881pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
41882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41883pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
41884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41885pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
41886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41887pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
41888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41889pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
41890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41891pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
41892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41893pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
41894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41895pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
41896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41897pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
41898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41899pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
41900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41901pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
41902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41903pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
41904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41905pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
41906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41907pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
41908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41909pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
41910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41911pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
41912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41913pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
41914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41915pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
41916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41917pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
41918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41919pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
41920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41921pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
41922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41923pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
41924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41925pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
41926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41927pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
41928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41929pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
41930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41931pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
41932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41933pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
41934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41935pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
41936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41937pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
41938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41939pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
41940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41941pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
41942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41943pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
41944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41945pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
41946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41947pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
41948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41949pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
41950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41951pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
41952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41953pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
41954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41955pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
41956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41957pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
41958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41959pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
41960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41961pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
41962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41963pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
41964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41965pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
41966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41967pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
41968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41969pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
41970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41971pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
41972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41973pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
41974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41975pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
41976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41977pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
41978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41979pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
41980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41981pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
41982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41983pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
41984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41985pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
41986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41987pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
41988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41989pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
41990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41991pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
41992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41993pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
41994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41995pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
41996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41997pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
41998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41999pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
42000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42001pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
42002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42003pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
42004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42005pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
42006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42007pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
42008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42009pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
42010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42011pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
42012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42013pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
42014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42015pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
42016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42017pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
42018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42019pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
42020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42021pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
42022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42023pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
42024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42025pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
42026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42027pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
42028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42029pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
42030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42031pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
42032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42033pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
42034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42035pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
42036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42037pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
42038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42039pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
42040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42041pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
42042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42043pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
42044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42045pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
42046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42047pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
42048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42049pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
42050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42051pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
42052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42053pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
42054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42055pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
42056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42057pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
42058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42059pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
42060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42061pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
42062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42063pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
42064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42065pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
42066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42067pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
42068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42069pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
42070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42071pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
42072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42073pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
42074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42075pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
42076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42077pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
42078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42079pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
42080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42081pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
42082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42083pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
42084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42085pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
42086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42087pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
42088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42089pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
42090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42091pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
42092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42093pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
42094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42095pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
42096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42097pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
42098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42099pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
42100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42101pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
42102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42103pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
42104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42105pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
42106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42107pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
42108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42109pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
42110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42111pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
42112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42113pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
42114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42115pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
42116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42117pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
42118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42119pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
42120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42121pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
42122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42123pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
42124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42125pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
42126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42127pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
42128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42129pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
42130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42131pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
42132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42133pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
42134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42135pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
42136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42137pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
42138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42139pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
42140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42141pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
42142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42143pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
42144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42145pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
42146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42147pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
42148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42149pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
42150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42151pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
42152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42153pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
42154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42155pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
42156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42157pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
42158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42159pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
42160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42161pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
42162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42163pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
42164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42165pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
42166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42167pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
42168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42169pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
42170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42171pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
42172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42173pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
42174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42175pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
42176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42177pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
42178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42179pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
42180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42181pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
42182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42183pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
42184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42185pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
42186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42187pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
42188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42189pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
42190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42191pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
42192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42193pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
42194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42195pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
42196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42197pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
42198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42199pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
42200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42201pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
42202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42203pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
42204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42205pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
42206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42207pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
42208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42209pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
42210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42211pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
42212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42213pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
42214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42215pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
42216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42217pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
42218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42219pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
42220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42221pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
42222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42223pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
42224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42225pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
42226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42227pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
42228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42229pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
42230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42231pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
42232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42233pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
42234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42235pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
42236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42237pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
42238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42239pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
42240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42241pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
42242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42243pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
42244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42245pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
42246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42247pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
42248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42249pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
42250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42251pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
42252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42253pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
42254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42255pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
42256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42257pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
42258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42259pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
42260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42261pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
42262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42263pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
42264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42265pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
42266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42267pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
42268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42269pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
42270
42271#[allow(improper_ctypes)]
42272unsafe extern "C" {
42273    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42274    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42275    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42276    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42277
42278    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42279    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42280    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42281    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42282
42283    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42284    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42285    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42286    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42287
42288    #[link_name = "llvm.x86.avx512.add.ps.512"]
42289    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42290    #[link_name = "llvm.x86.avx512.add.pd.512"]
42291    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42292    #[link_name = "llvm.x86.avx512.sub.ps.512"]
42293    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42294    #[link_name = "llvm.x86.avx512.sub.pd.512"]
42295    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42296    #[link_name = "llvm.x86.avx512.mul.ps.512"]
42297    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42298    #[link_name = "llvm.x86.avx512.mul.pd.512"]
42299    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42300    #[link_name = "llvm.x86.avx512.div.ps.512"]
42301    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42302    #[link_name = "llvm.x86.avx512.div.pd.512"]
42303    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42304
42305    #[link_name = "llvm.x86.avx512.max.ps.512"]
42306    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42307    #[link_name = "llvm.x86.avx512.max.pd.512"]
42308    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42309    #[link_name = "llvm.x86.avx512.min.ps.512"]
42310    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42311    #[link_name = "llvm.x86.avx512.min.pd.512"]
42312    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42313
42314    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42315    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42316
42317    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42318    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42319    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42320    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42321
42322    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42323    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42324    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42325    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42326    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42327    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42328
42329    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42330    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42331    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42332    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42333    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42334    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42335
42336    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42337    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42338    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42339    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42340    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42341    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42342
42343    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42344    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42345    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42346    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42347    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42348    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42349
42350    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42351    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42352    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42353    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42354    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42355    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42356
42357    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42358    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42359    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42360    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42361    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42362    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42363
42364    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42365    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42366    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42367    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42368    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42369    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42370
42371    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42372    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42373    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42374    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42375    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42376    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42377
42378    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42379    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42380    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42381    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42382    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42383    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42384
42385    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
42386    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42387    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
42388    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42389    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
42390    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42391
42392    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
42393    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42394    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
42395    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42396    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
42397    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42398
42399    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42400    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42401    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42402    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42403    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42404    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42405
42406    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42407    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42408    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42409    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42410    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42411    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42412
42413    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42414    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42415    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42416    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42417    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42418    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42419
42420    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42421    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42422    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42423    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42424    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42425    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42426
42427    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42428    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42429    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42430    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42431    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42432    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42433
42434    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42435    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42436    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42437    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42438    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42439    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42440
42441    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42442    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42443
42444    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42445    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42446    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42447    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42448    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42449    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42450
42451    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42452    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42453    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42454    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42455
42456    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42457    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42458
42459    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42460    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42461    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42462    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42463    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42464    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42465
42466    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42467    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42468    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42469    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42470
42471    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42472    fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
42473    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42474    fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
42475    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42476    fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
42477
42478    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42479    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42480
42481    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42482    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42483    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42484    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42485    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42486    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42487
42488    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42489    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42490    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42491    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42492    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42493    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42494
42495    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42496    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42497    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42498    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42499    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42500    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42501
42502    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42503    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42504    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42505    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42506    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42507    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42508
42509    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42510    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42511    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42512    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42513    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42514    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42515
42516    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42517    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42518    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42519    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42520    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42521    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42522    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42523    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42524    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42525    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42526
42527    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42528    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42529    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42530    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42531    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42532    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42533
42534    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42535    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42536    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42537    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42538    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42539    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42540
42541    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42542    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42543    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42544    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42545    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42546    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42547
42548    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42549    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42550    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42551    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42552    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42553    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42554
42555    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42556    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42557    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42558    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42559    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42560    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42561
42562    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42563    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42564    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42565    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42566    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42567    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42568
42569    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42570    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42571    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42572    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42573    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42574    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42575
42576    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42577    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42578    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42579    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42580    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42581    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42582
42583    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42584    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42585    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42586    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42587    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42588    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42589
42590    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42591    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42592    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42593    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42594    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42595    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42596
42597    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42598    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42599    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42600    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42601    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42602    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42603
42604    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42605    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42606    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42607    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42608    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42609    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42610
42611    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42612    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42613    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42614    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42615    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42616    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42617
42618    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42619    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42620    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42621    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42622    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42623    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42624
42625    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42626    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42627    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42628    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42629    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42630    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42631
42632    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42633    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42634
42635    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42636    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42637    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42638    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42639    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42640    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42641
42642    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42643    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42644    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42645    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42646    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42647    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42648
42649    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42650    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42651    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42652    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42653    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42654    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42655
42656    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42657    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42658    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42659    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42660    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42661    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42662
42663    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42664    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42665    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42666    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42667    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42668    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42669
42670    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42671    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42672    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42673    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42674    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42675    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42676
42677    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42678    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42679    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42680    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42681    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42682    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42683
42684    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42685    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42686    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42687    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42688    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42689    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42690
42691    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42692    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42693    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42694    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42695    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42696    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42697
42698    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42699    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42700    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42701    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42702    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42703    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42704
42705    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
42706    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42707    #[link_name = "llvm.x86.avx512.gather.dps.512"]
42708    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42709    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
42710    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42711    #[link_name = "llvm.x86.avx512.gather.qps.512"]
42712    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42713    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
42714    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42715    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
42716    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42717    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
42718    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42719    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
42720    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42721
42722    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42723    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42724    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
42725    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42726    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42727    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42728    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
42729    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42730    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42731    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42732
42733    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42734    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42735    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42736    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42737    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42738    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42739
42740    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
42741    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42742    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
42743    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42744    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
42745    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42746    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42747    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42748    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42749    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42750    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42751    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42752    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42753    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42754    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42755    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42756
42757    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
42758    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42759    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
42760    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42761    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
42762    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42763    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42764    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42765    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42766    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42767    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42768    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42769    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42770    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42771    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42772    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42773
42774    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
42775    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42776    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
42777    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42778    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
42779    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42780    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42781    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42782    #[link_name = "llvm.x86.avx512.gather3div4.si"]
42783    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42784    #[link_name = "llvm.x86.avx512.gather3div2.di"]
42785    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42786    #[link_name = "llvm.x86.avx512.gather3div2.df"]
42787    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42788    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
42789    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42790
42791    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
42792    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42793    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
42794    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42795    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
42796    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42797    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42798    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42799    #[link_name = "llvm.x86.avx512.gather3div8.si"]
42800    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42801    #[link_name = "llvm.x86.avx512.gather3div4.di"]
42802    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42803    #[link_name = "llvm.x86.avx512.gather3div4.df"]
42804    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42805    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
42806    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42807
42808    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42809    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42810    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42811    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42812
42813    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42814    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42815    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42816    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42817    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42818    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42819
42820    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42821    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42822    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42823    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42824    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42825    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42826
42827    #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
42828    fn vprold(a: i32x16, i8: i32) -> i32x16;
42829    #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
42830    fn vprold256(a: i32x8, i8: i32) -> i32x8;
42831    #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
42832    fn vprold128(a: i32x4, i8: i32) -> i32x4;
42833
42834    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
42835    fn vprord(a: i32x16, i8: i32) -> i32x16;
42836    #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
42837    fn vprord256(a: i32x8, i8: i32) -> i32x8;
42838    #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
42839    fn vprord128(a: i32x4, i8: i32) -> i32x4;
42840
42841    #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
42842    fn vprolq(a: i64x8, i8: i32) -> i64x8;
42843    #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
42844    fn vprolq256(a: i64x4, i8: i32) -> i64x4;
42845    #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
42846    fn vprolq128(a: i64x2, i8: i32) -> i64x2;
42847
42848    #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
42849    fn vprorq(a: i64x8, i8: i32) -> i64x8;
42850    #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
42851    fn vprorq256(a: i64x4, i8: i32) -> i64x4;
42852    #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
42853    fn vprorq128(a: i64x2, i8: i32) -> i64x2;
42854
42855    #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
42856    fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
42857    #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
42858    fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
42859    #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
42860    fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
42861
42862    #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
42863    fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
42864    #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
42865    fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
42866    #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
42867    fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
42868
42869    #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
42870    fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
42871    #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
42872    fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
42873    #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
42874    fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
42875
42876    #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
42877    fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
42878    #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
42879    fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
42880    #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
42881    fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
42882
42883    #[link_name = "llvm.x86.avx512.psllv.d.512"]
42884    fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
42885    #[link_name = "llvm.x86.avx512.psrlv.d.512"]
42886    fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
42887    #[link_name = "llvm.x86.avx512.psllv.q.512"]
42888    fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
42889    #[link_name = "llvm.x86.avx512.psrlv.q.512"]
42890    fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
42891
42892    #[link_name = "llvm.x86.avx512.psll.d.512"]
42893    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
42894    #[link_name = "llvm.x86.avx512.psrl.d.512"]
42895    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42896    #[link_name = "llvm.x86.avx512.psll.q.512"]
42897    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42898    #[link_name = "llvm.x86.avx512.psrl.q.512"]
42899    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42900
42901    #[link_name = "llvm.x86.avx512.psra.d.512"]
42902    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42903
42904    #[link_name = "llvm.x86.avx512.psra.q.512"]
42905    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42906    #[link_name = "llvm.x86.avx512.psra.q.256"]
42907    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42908    #[link_name = "llvm.x86.avx512.psra.q.128"]
42909    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42910
42911    #[link_name = "llvm.x86.avx512.psrav.d.512"]
42912    fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
42913
42914    #[link_name = "llvm.x86.avx512.psrav.q.512"]
42915    fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
42916    #[link_name = "llvm.x86.avx512.psrav.q.256"]
42917    fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
42918    #[link_name = "llvm.x86.avx512.psrav.q.128"]
42919    fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
42920
42921    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
42922    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
42923    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
42924    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
42925
42926    #[link_name = "llvm.x86.avx512.permvar.si.512"]
42927    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
42928
42929    #[link_name = "llvm.x86.avx512.permvar.di.512"]
42930    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
42931    #[link_name = "llvm.x86.avx512.permvar.di.256"]
42932    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
42933
42934    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
42935    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
42936
42937    #[link_name = "llvm.x86.avx512.permvar.df.512"]
42938    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
42939    #[link_name = "llvm.x86.avx512.permvar.df.256"]
42940    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
42941
42942    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
42943    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
42944    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
42945    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
42946    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
42947    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
42948
42949    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
42950    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
42951    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
42952    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
42953    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
42954    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
42955
42956    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
42957    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
42958    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
42959    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
42960    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
42961    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
42962
42963    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
42964    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
42965    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
42966    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
42967    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
42968    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
42969
42970    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
42971    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
42972    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
42973    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
42974    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
42975    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
42976
42977    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
42978    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
42979    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
42980    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
42981    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
42982    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
42983
42984    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
42985    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
42986    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
42987    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
42988    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
42989    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
42990
42991    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
42992    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
42993    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
42994    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
42995    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
42996    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
42997
42998    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
42999    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43000    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43001    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43002    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43003    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43004
43005    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43006    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43007    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43008    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43009    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43010    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43011
43012    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43013    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43014    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43015    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43016    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43017    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43018
43019    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43020    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43021    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43022    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43023    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43024    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43025
43026    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43027    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43028    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43029    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43030    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43031    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43032
43033    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43034    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43035    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43036    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43037    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43038    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43039
43040    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43041    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43042    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43043    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43044    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43045    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43046
43047    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43048    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43049    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43050    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43051    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43052    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43053
43054    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43055    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43056    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43057    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43058    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43059    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43060    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43061    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43062    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43063    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43064    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43065    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43066    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43067    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43068    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43069    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43070    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43071    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43072    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43073    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43074    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43075    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43076    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43077    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43078    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43079    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43080    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43081    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43082    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43083    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43084    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43085    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43086    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43087    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43088    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43089    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43090
43091    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43092    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43093    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43094    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43095    #[link_name = "llvm.x86.avx512.rcp14.ss"]
43096    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43097    #[link_name = "llvm.x86.avx512.rcp14.sd"]
43098    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43099
43100    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43101    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43102    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43103    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43104    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43105    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43106    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43107    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43108
43109    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
43110    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43111    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
43112    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43113
43114    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43115    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43116    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43117    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43118    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43119    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43120    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43121    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43122
43123    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43124    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43125    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43126    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43127
43128    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
43129    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43130    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43131    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43132
43133    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43134    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43135    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43136    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43137
43138    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43139    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43140
43141    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
43142    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43143
43144    #[link_name = "llvm.x86.avx512.cvttss2si"]
43145    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43146    #[link_name = "llvm.x86.avx512.cvttss2usi"]
43147    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43148
43149    #[link_name = "llvm.x86.avx512.cvttsd2si"]
43150    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43151    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
43152    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43153
43154    #[link_name = "llvm.x86.avx512.vcomi.ss"]
43155    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43156    #[link_name = "llvm.x86.avx512.vcomi.sd"]
43157    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43158
43159    #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
43160    fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43161    #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
43162    fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43163    #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
43164    fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43165    #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
43166    fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43167    #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
43168    fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43169    #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
43170    fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43171    #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
43172    fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43173    #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
43174    fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43175    #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
43176    fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43177    #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
43178    fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43179    #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
43180    fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43181    #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
43182    fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43183
43184    #[link_name = "llvm.x86.avx512.mask.load.d.128"]
43185    fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43186    #[link_name = "llvm.x86.avx512.mask.load.q.128"]
43187    fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43188    #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
43189    fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43190    #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
43191    fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43192    #[link_name = "llvm.x86.avx512.mask.load.d.256"]
43193    fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43194    #[link_name = "llvm.x86.avx512.mask.load.q.256"]
43195    fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43196    #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
43197    fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43198    #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
43199    fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43200    #[link_name = "llvm.x86.avx512.mask.load.d.512"]
43201    fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43202    #[link_name = "llvm.x86.avx512.mask.load.q.512"]
43203    fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43204    #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
43205    fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43206    #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
43207    fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43208
43209    #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
43210    fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43211    #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
43212    fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43213    #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
43214    fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43215    #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
43216    fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43217    #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
43218    fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43219    #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
43220    fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43221    #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
43222    fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43223    #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
43224    fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43225    #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
43226    fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43227    #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
43228    fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43229    #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
43230    fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43231    #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
43232    fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43233
43234    #[link_name = "llvm.x86.avx512.mask.store.d.128"]
43235    fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43236    #[link_name = "llvm.x86.avx512.mask.store.q.128"]
43237    fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43238    #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
43239    fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43240    #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
43241    fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43242    #[link_name = "llvm.x86.avx512.mask.store.d.256"]
43243    fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43244    #[link_name = "llvm.x86.avx512.mask.store.q.256"]
43245    fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43246    #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
43247    fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43248    #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
43249    fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43250    #[link_name = "llvm.x86.avx512.mask.store.d.512"]
43251    fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43252    #[link_name = "llvm.x86.avx512.mask.store.q.512"]
43253    fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43254    #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
43255    fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43256    #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
43257    fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43258
43259    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43260    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43261    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43262    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43263    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43264    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43265    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43266    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43267    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43268    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43269    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43270    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43271    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43272    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43273    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43274    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43275    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43276    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43277    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43278    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43279    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43280    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43281    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43282    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43283
43284}
43285
43286#[cfg(test)]
43287mod tests {
43288
43289    use stdarch_test::simd_test;
43290
43291    use crate::core_arch::x86::*;
43292    use crate::hint::black_box;
43293    use crate::mem::{self};
43294
43295    #[simd_test(enable = "avx512f")]
43296    unsafe fn test_mm512_abs_epi32() {
43297        #[rustfmt::skip]
43298        let a = _mm512_setr_epi32(
43299            0, 1, -1, i32::MAX,
43300            i32::MIN, 100, -100, -32,
43301            0, 1, -1, i32::MAX,
43302            i32::MIN, 100, -100, -32,
43303        );
43304        let r = _mm512_abs_epi32(a);
43305        #[rustfmt::skip]
43306        let e = _mm512_setr_epi32(
43307            0, 1, 1, i32::MAX,
43308            i32::MAX.wrapping_add(1), 100, 100, 32,
43309            0, 1, 1, i32::MAX,
43310            i32::MAX.wrapping_add(1), 100, 100, 32,
43311        );
43312        assert_eq_m512i(r, e);
43313    }
43314
43315    #[simd_test(enable = "avx512f")]
43316    unsafe fn test_mm512_mask_abs_epi32() {
43317        #[rustfmt::skip]
43318        let a = _mm512_setr_epi32(
43319            0, 1, -1, i32::MAX,
43320            i32::MIN, 100, -100, -32,
43321            0, 1, -1, i32::MAX,
43322            i32::MIN, 100, -100, -32,
43323        );
43324        let r = _mm512_mask_abs_epi32(a, 0, a);
43325        assert_eq_m512i(r, a);
43326        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
43327        #[rustfmt::skip]
43328        let e = _mm512_setr_epi32(
43329            0, 1, 1, i32::MAX,
43330            i32::MAX.wrapping_add(1), 100, 100, 32,
43331            0, 1, -1, i32::MAX,
43332            i32::MIN, 100, -100, -32,
43333        );
43334        assert_eq_m512i(r, e);
43335    }
43336
43337    #[simd_test(enable = "avx512f")]
43338    unsafe fn test_mm512_maskz_abs_epi32() {
43339        #[rustfmt::skip]
43340        let a = _mm512_setr_epi32(
43341            0, 1, -1, i32::MAX,
43342            i32::MIN, 100, -100, -32,
43343            0, 1, -1, i32::MAX,
43344            i32::MIN, 100, -100, -32,
43345        );
43346        let r = _mm512_maskz_abs_epi32(0, a);
43347        assert_eq_m512i(r, _mm512_setzero_si512());
43348        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
43349        #[rustfmt::skip]
43350        let e = _mm512_setr_epi32(
43351            0, 1, 1, i32::MAX,
43352            i32::MAX.wrapping_add(1), 100, 100, 32,
43353            0, 0, 0, 0,
43354            0, 0, 0, 0,
43355        );
43356        assert_eq_m512i(r, e);
43357    }
43358
43359    #[simd_test(enable = "avx512f,avx512vl")]
43360    unsafe fn test_mm256_mask_abs_epi32() {
43361        #[rustfmt::skip]
43362        let a = _mm256_setr_epi32(
43363            0, 1, -1, i32::MAX,
43364            i32::MIN, 100, -100, -32,
43365        );
43366        let r = _mm256_mask_abs_epi32(a, 0, a);
43367        assert_eq_m256i(r, a);
43368        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
43369        #[rustfmt::skip]
43370        let e = _mm256_setr_epi32(
43371            0, 1, 1, i32::MAX,
43372            i32::MAX.wrapping_add(1), 100, -100, -32,
43373        );
43374        assert_eq_m256i(r, e);
43375    }
43376
43377    #[simd_test(enable = "avx512f,avx512vl")]
43378    unsafe fn test_mm256_maskz_abs_epi32() {
43379        #[rustfmt::skip]
43380        let a = _mm256_setr_epi32(
43381            0, 1, -1, i32::MAX,
43382            i32::MIN, 100, -100, -32,
43383        );
43384        let r = _mm256_maskz_abs_epi32(0, a);
43385        assert_eq_m256i(r, _mm256_setzero_si256());
43386        let r = _mm256_maskz_abs_epi32(0b00001111, a);
43387        #[rustfmt::skip]
43388        let e = _mm256_setr_epi32(
43389            0, 1, 1, i32::MAX,
43390            0, 0, 0, 0,
43391        );
43392        assert_eq_m256i(r, e);
43393    }
43394
43395    #[simd_test(enable = "avx512f,avx512vl")]
43396    unsafe fn test_mm_mask_abs_epi32() {
43397        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43398        let r = _mm_mask_abs_epi32(a, 0, a);
43399        assert_eq_m128i(r, a);
43400        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
43401        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43402        assert_eq_m128i(r, e);
43403    }
43404
43405    #[simd_test(enable = "avx512f,avx512vl")]
43406    unsafe fn test_mm_maskz_abs_epi32() {
43407        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43408        let r = _mm_maskz_abs_epi32(0, a);
43409        assert_eq_m128i(r, _mm_setzero_si128());
43410        let r = _mm_maskz_abs_epi32(0b00001111, a);
43411        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43412        assert_eq_m128i(r, e);
43413    }
43414
43415    #[simd_test(enable = "avx512f")]
43416    unsafe fn test_mm512_abs_ps() {
43417        #[rustfmt::skip]
43418        let a = _mm512_setr_ps(
43419            0., 1., -1., f32::MAX,
43420            f32::MIN, 100., -100., -32.,
43421            0., 1., -1., f32::MAX,
43422            f32::MIN, 100., -100., -32.,
43423        );
43424        let r = _mm512_abs_ps(a);
43425        #[rustfmt::skip]
43426        let e = _mm512_setr_ps(
43427            0., 1., 1., f32::MAX,
43428            f32::MAX, 100., 100., 32.,
43429            0., 1., 1., f32::MAX,
43430            f32::MAX, 100., 100., 32.,
43431        );
43432        assert_eq_m512(r, e);
43433    }
43434
43435    #[simd_test(enable = "avx512f")]
43436    unsafe fn test_mm512_mask_abs_ps() {
43437        #[rustfmt::skip]
43438        let a = _mm512_setr_ps(
43439            0., 1., -1., f32::MAX,
43440            f32::MIN, 100., -100., -32.,
43441            0., 1., -1., f32::MAX,
43442            f32::MIN, 100., -100., -32.,
43443        );
43444        let r = _mm512_mask_abs_ps(a, 0, a);
43445        assert_eq_m512(r, a);
43446        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
43447        #[rustfmt::skip]
43448        let e = _mm512_setr_ps(
43449            0., 1., 1., f32::MAX,
43450            f32::MAX, 100., 100., 32.,
43451            0., 1., -1., f32::MAX,
43452            f32::MIN, 100., -100., -32.,
43453        );
43454        assert_eq_m512(r, e);
43455    }
43456
43457    #[simd_test(enable = "avx512f")]
43458    unsafe fn test_mm512_mask_mov_epi32() {
43459        let src = _mm512_set1_epi32(1);
43460        let a = _mm512_set1_epi32(2);
43461        let r = _mm512_mask_mov_epi32(src, 0, a);
43462        assert_eq_m512i(r, src);
43463        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
43464        assert_eq_m512i(r, a);
43465    }
43466
43467    #[simd_test(enable = "avx512f")]
43468    unsafe fn test_mm512_maskz_mov_epi32() {
43469        let a = _mm512_set1_epi32(2);
43470        let r = _mm512_maskz_mov_epi32(0, a);
43471        assert_eq_m512i(r, _mm512_setzero_si512());
43472        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
43473        assert_eq_m512i(r, a);
43474    }
43475
43476    #[simd_test(enable = "avx512f,avx512vl")]
43477    unsafe fn test_mm256_mask_mov_epi32() {
43478        let src = _mm256_set1_epi32(1);
43479        let a = _mm256_set1_epi32(2);
43480        let r = _mm256_mask_mov_epi32(src, 0, a);
43481        assert_eq_m256i(r, src);
43482        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
43483        assert_eq_m256i(r, a);
43484    }
43485
43486    #[simd_test(enable = "avx512f,avx512vl")]
43487    unsafe fn test_mm256_maskz_mov_epi32() {
43488        let a = _mm256_set1_epi32(2);
43489        let r = _mm256_maskz_mov_epi32(0, a);
43490        assert_eq_m256i(r, _mm256_setzero_si256());
43491        let r = _mm256_maskz_mov_epi32(0b11111111, a);
43492        assert_eq_m256i(r, a);
43493    }
43494
43495    #[simd_test(enable = "avx512f,avx512vl")]
43496    unsafe fn test_mm_mask_mov_epi32() {
43497        let src = _mm_set1_epi32(1);
43498        let a = _mm_set1_epi32(2);
43499        let r = _mm_mask_mov_epi32(src, 0, a);
43500        assert_eq_m128i(r, src);
43501        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
43502        assert_eq_m128i(r, a);
43503    }
43504
43505    #[simd_test(enable = "avx512f,avx512vl")]
43506    unsafe fn test_mm_maskz_mov_epi32() {
43507        let a = _mm_set1_epi32(2);
43508        let r = _mm_maskz_mov_epi32(0, a);
43509        assert_eq_m128i(r, _mm_setzero_si128());
43510        let r = _mm_maskz_mov_epi32(0b00001111, a);
43511        assert_eq_m128i(r, a);
43512    }
43513
43514    #[simd_test(enable = "avx512f")]
43515    unsafe fn test_mm512_mask_mov_ps() {
43516        let src = _mm512_set1_ps(1.);
43517        let a = _mm512_set1_ps(2.);
43518        let r = _mm512_mask_mov_ps(src, 0, a);
43519        assert_eq_m512(r, src);
43520        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
43521        assert_eq_m512(r, a);
43522    }
43523
43524    #[simd_test(enable = "avx512f")]
43525    unsafe fn test_mm512_maskz_mov_ps() {
43526        let a = _mm512_set1_ps(2.);
43527        let r = _mm512_maskz_mov_ps(0, a);
43528        assert_eq_m512(r, _mm512_setzero_ps());
43529        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
43530        assert_eq_m512(r, a);
43531    }
43532
43533    #[simd_test(enable = "avx512f,avx512vl")]
43534    unsafe fn test_mm256_mask_mov_ps() {
43535        let src = _mm256_set1_ps(1.);
43536        let a = _mm256_set1_ps(2.);
43537        let r = _mm256_mask_mov_ps(src, 0, a);
43538        assert_eq_m256(r, src);
43539        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
43540        assert_eq_m256(r, a);
43541    }
43542
43543    #[simd_test(enable = "avx512f,avx512vl")]
43544    unsafe fn test_mm256_maskz_mov_ps() {
43545        let a = _mm256_set1_ps(2.);
43546        let r = _mm256_maskz_mov_ps(0, a);
43547        assert_eq_m256(r, _mm256_setzero_ps());
43548        let r = _mm256_maskz_mov_ps(0b11111111, a);
43549        assert_eq_m256(r, a);
43550    }
43551
43552    #[simd_test(enable = "avx512f,avx512vl")]
43553    unsafe fn test_mm_mask_mov_ps() {
43554        let src = _mm_set1_ps(1.);
43555        let a = _mm_set1_ps(2.);
43556        let r = _mm_mask_mov_ps(src, 0, a);
43557        assert_eq_m128(r, src);
43558        let r = _mm_mask_mov_ps(src, 0b00001111, a);
43559        assert_eq_m128(r, a);
43560    }
43561
43562    #[simd_test(enable = "avx512f,avx512vl")]
43563    unsafe fn test_mm_maskz_mov_ps() {
43564        let a = _mm_set1_ps(2.);
43565        let r = _mm_maskz_mov_ps(0, a);
43566        assert_eq_m128(r, _mm_setzero_ps());
43567        let r = _mm_maskz_mov_ps(0b00001111, a);
43568        assert_eq_m128(r, a);
43569    }
43570
43571    #[simd_test(enable = "avx512f")]
43572    unsafe fn test_mm512_add_epi32() {
43573        #[rustfmt::skip]
43574        let a = _mm512_setr_epi32(
43575            0, 1, -1, i32::MAX,
43576            i32::MIN, 100, -100, -32,
43577            0, 1, -1, i32::MAX,
43578            i32::MIN, 100, -100, -32,
43579        );
43580        let b = _mm512_set1_epi32(1);
43581        let r = _mm512_add_epi32(a, b);
43582        #[rustfmt::skip]
43583        let e = _mm512_setr_epi32(
43584            1, 2, 0, i32::MIN,
43585            i32::MIN + 1, 101, -99, -31,
43586            1, 2, 0, i32::MIN,
43587            i32::MIN + 1, 101, -99, -31,
43588        );
43589        assert_eq_m512i(r, e);
43590    }
43591
43592    #[simd_test(enable = "avx512f")]
43593    unsafe fn test_mm512_mask_add_epi32() {
43594        #[rustfmt::skip]
43595        let a = _mm512_setr_epi32(
43596            0, 1, -1, i32::MAX,
43597            i32::MIN, 100, -100, -32,
43598            0, 1, -1, i32::MAX,
43599            i32::MIN, 100, -100, -32,
43600        );
43601        let b = _mm512_set1_epi32(1);
43602        let r = _mm512_mask_add_epi32(a, 0, a, b);
43603        assert_eq_m512i(r, a);
43604        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
43605        #[rustfmt::skip]
43606        let e = _mm512_setr_epi32(
43607            1, 2, 0, i32::MIN,
43608            i32::MIN + 1, 101, -99, -31,
43609            0, 1, -1, i32::MAX,
43610            i32::MIN, 100, -100, -32,
43611        );
43612        assert_eq_m512i(r, e);
43613    }
43614
43615    #[simd_test(enable = "avx512f")]
43616    unsafe fn test_mm512_maskz_add_epi32() {
43617        #[rustfmt::skip]
43618        let a = _mm512_setr_epi32(
43619            0, 1, -1, i32::MAX,
43620            i32::MIN, 100, -100, -32,
43621            0, 1, -1, i32::MAX,
43622            i32::MIN, 100, -100, -32,
43623        );
43624        let b = _mm512_set1_epi32(1);
43625        let r = _mm512_maskz_add_epi32(0, a, b);
43626        assert_eq_m512i(r, _mm512_setzero_si512());
43627        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
43628        #[rustfmt::skip]
43629        let e = _mm512_setr_epi32(
43630            1, 2, 0, i32::MIN,
43631            i32::MIN + 1, 101, -99, -31,
43632            0, 0, 0, 0,
43633            0, 0, 0, 0,
43634        );
43635        assert_eq_m512i(r, e);
43636    }
43637
43638    #[simd_test(enable = "avx512f,avx512vl")]
43639    unsafe fn test_mm256_mask_add_epi32() {
43640        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43641        let b = _mm256_set1_epi32(1);
43642        let r = _mm256_mask_add_epi32(a, 0, a, b);
43643        assert_eq_m256i(r, a);
43644        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
43645        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43646        assert_eq_m256i(r, e);
43647    }
43648
43649    #[simd_test(enable = "avx512f,avx512vl")]
43650    unsafe fn test_mm256_maskz_add_epi32() {
43651        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43652        let b = _mm256_set1_epi32(1);
43653        let r = _mm256_maskz_add_epi32(0, a, b);
43654        assert_eq_m256i(r, _mm256_setzero_si256());
43655        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
43656        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43657        assert_eq_m256i(r, e);
43658    }
43659
43660    #[simd_test(enable = "avx512f,avx512vl")]
43661    unsafe fn test_mm_mask_add_epi32() {
43662        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43663        let b = _mm_set1_epi32(1);
43664        let r = _mm_mask_add_epi32(a, 0, a, b);
43665        assert_eq_m128i(r, a);
43666        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
43667        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
43668        assert_eq_m128i(r, e);
43669    }
43670
43671    #[simd_test(enable = "avx512f,avx512vl")]
43672    unsafe fn test_mm_maskz_add_epi32() {
43673        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
43674        let b = _mm_set1_epi32(1);
43675        let r = _mm_maskz_add_epi32(0, a, b);
43676        assert_eq_m128i(r, _mm_setzero_si128());
43677        let r = _mm_maskz_add_epi32(0b00001111, a, b);
43678        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
43679        assert_eq_m128i(r, e);
43680    }
43681
43682    #[simd_test(enable = "avx512f")]
43683    unsafe fn test_mm512_add_ps() {
43684        #[rustfmt::skip]
43685        let a = _mm512_setr_ps(
43686            0., 1., -1., f32::MAX,
43687            f32::MIN, 100., -100., -32.,
43688            0., 1., -1., f32::MAX,
43689            f32::MIN, 100., -100., -32.,
43690        );
43691        let b = _mm512_set1_ps(1.);
43692        let r = _mm512_add_ps(a, b);
43693        #[rustfmt::skip]
43694        let e = _mm512_setr_ps(
43695            1., 2., 0., f32::MAX,
43696            f32::MIN + 1., 101., -99., -31.,
43697            1., 2., 0., f32::MAX,
43698            f32::MIN + 1., 101., -99., -31.,
43699        );
43700        assert_eq_m512(r, e);
43701    }
43702
43703    #[simd_test(enable = "avx512f")]
43704    unsafe fn test_mm512_mask_add_ps() {
43705        #[rustfmt::skip]
43706        let a = _mm512_setr_ps(
43707            0., 1., -1., f32::MAX,
43708            f32::MIN, 100., -100., -32.,
43709            0., 1., -1., f32::MAX,
43710            f32::MIN, 100., -100., -32.,
43711        );
43712        let b = _mm512_set1_ps(1.);
43713        let r = _mm512_mask_add_ps(a, 0, a, b);
43714        assert_eq_m512(r, a);
43715        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
43716        #[rustfmt::skip]
43717        let e = _mm512_setr_ps(
43718            1., 2., 0., f32::MAX,
43719            f32::MIN + 1., 101., -99., -31.,
43720            0., 1., -1., f32::MAX,
43721            f32::MIN, 100., -100., -32.,
43722        );
43723        assert_eq_m512(r, e);
43724    }
43725
43726    #[simd_test(enable = "avx512f")]
43727    unsafe fn test_mm512_maskz_add_ps() {
43728        #[rustfmt::skip]
43729        let a = _mm512_setr_ps(
43730            0., 1., -1., f32::MAX,
43731            f32::MIN, 100., -100., -32.,
43732            0., 1., -1., f32::MAX,
43733            f32::MIN, 100., -100., -32.,
43734        );
43735        let b = _mm512_set1_ps(1.);
43736        let r = _mm512_maskz_add_ps(0, a, b);
43737        assert_eq_m512(r, _mm512_setzero_ps());
43738        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
43739        #[rustfmt::skip]
43740        let e = _mm512_setr_ps(
43741            1., 2., 0., f32::MAX,
43742            f32::MIN + 1., 101., -99., -31.,
43743            0., 0., 0., 0.,
43744            0., 0., 0., 0.,
43745        );
43746        assert_eq_m512(r, e);
43747    }
43748
43749    #[simd_test(enable = "avx512f,avx512vl")]
43750    unsafe fn test_mm256_mask_add_ps() {
43751        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43752        let b = _mm256_set1_ps(1.);
43753        let r = _mm256_mask_add_ps(a, 0, a, b);
43754        assert_eq_m256(r, a);
43755        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
43756        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43757        assert_eq_m256(r, e);
43758    }
43759
43760    #[simd_test(enable = "avx512f,avx512vl")]
43761    unsafe fn test_mm256_maskz_add_ps() {
43762        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43763        let b = _mm256_set1_ps(1.);
43764        let r = _mm256_maskz_add_ps(0, a, b);
43765        assert_eq_m256(r, _mm256_setzero_ps());
43766        let r = _mm256_maskz_add_ps(0b11111111, a, b);
43767        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43768        assert_eq_m256(r, e);
43769    }
43770
43771    #[simd_test(enable = "avx512f,avx512vl")]
43772    unsafe fn test_mm_mask_add_ps() {
43773        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43774        let b = _mm_set1_ps(1.);
43775        let r = _mm_mask_add_ps(a, 0, a, b);
43776        assert_eq_m128(r, a);
43777        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
43778        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43779        assert_eq_m128(r, e);
43780    }
43781
43782    #[simd_test(enable = "avx512f,avx512vl")]
43783    unsafe fn test_mm_maskz_add_ps() {
43784        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43785        let b = _mm_set1_ps(1.);
43786        let r = _mm_maskz_add_ps(0, a, b);
43787        assert_eq_m128(r, _mm_setzero_ps());
43788        let r = _mm_maskz_add_ps(0b00001111, a, b);
43789        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43790        assert_eq_m128(r, e);
43791    }
43792
43793    #[simd_test(enable = "avx512f")]
43794    unsafe fn test_mm512_sub_epi32() {
43795        #[rustfmt::skip]
43796        let a = _mm512_setr_epi32(
43797            0, 1, -1, i32::MAX,
43798            i32::MIN, 100, -100, -32,
43799            0, 1, -1, i32::MAX,
43800            i32::MIN, 100, -100, -32,
43801        );
43802        let b = _mm512_set1_epi32(1);
43803        let r = _mm512_sub_epi32(a, b);
43804        #[rustfmt::skip]
43805        let e = _mm512_setr_epi32(
43806            -1, 0, -2, i32::MAX - 1,
43807            i32::MAX, 99, -101, -33,
43808            -1, 0, -2, i32::MAX - 1,
43809            i32::MAX, 99, -101, -33,
43810        );
43811        assert_eq_m512i(r, e);
43812    }
43813
43814    #[simd_test(enable = "avx512f")]
43815    unsafe fn test_mm512_mask_sub_epi32() {
43816        #[rustfmt::skip]
43817        let a = _mm512_setr_epi32(
43818            0, 1, -1, i32::MAX,
43819            i32::MIN, 100, -100, -32,
43820            0, 1, -1, i32::MAX,
43821            i32::MIN, 100, -100, -32,
43822        );
43823        let b = _mm512_set1_epi32(1);
43824        let r = _mm512_mask_sub_epi32(a, 0, a, b);
43825        assert_eq_m512i(r, a);
43826        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
43827        #[rustfmt::skip]
43828        let e = _mm512_setr_epi32(
43829            -1, 0, -2, i32::MAX - 1,
43830            i32::MAX, 99, -101, -33,
43831            0, 1, -1, i32::MAX,
43832            i32::MIN, 100, -100, -32,
43833        );
43834        assert_eq_m512i(r, e);
43835    }
43836
43837    #[simd_test(enable = "avx512f")]
43838    unsafe fn test_mm512_maskz_sub_epi32() {
43839        #[rustfmt::skip]
43840        let a = _mm512_setr_epi32(
43841            0, 1, -1, i32::MAX,
43842            i32::MIN, 100, -100, -32,
43843            0, 1, -1, i32::MAX,
43844            i32::MIN, 100, -100, -32,
43845        );
43846        let b = _mm512_set1_epi32(1);
43847        let r = _mm512_maskz_sub_epi32(0, a, b);
43848        assert_eq_m512i(r, _mm512_setzero_si512());
43849        let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
43850        #[rustfmt::skip]
43851        let e = _mm512_setr_epi32(
43852            -1, 0, -2, i32::MAX - 1,
43853            i32::MAX, 99, -101, -33,
43854            0, 0, 0, 0,
43855            0, 0, 0, 0,
43856        );
43857        assert_eq_m512i(r, e);
43858    }
43859
43860    #[simd_test(enable = "avx512f,avx512vl")]
43861    unsafe fn test_mm256_mask_sub_epi32() {
43862        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43863        let b = _mm256_set1_epi32(1);
43864        let r = _mm256_mask_sub_epi32(a, 0, a, b);
43865        assert_eq_m256i(r, a);
43866        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
43867        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43868        assert_eq_m256i(r, e);
43869    }
43870
43871    #[simd_test(enable = "avx512f,avx512vl")]
43872    unsafe fn test_mm256_maskz_sub_epi32() {
43873        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43874        let b = _mm256_set1_epi32(1);
43875        let r = _mm256_maskz_sub_epi32(0, a, b);
43876        assert_eq_m256i(r, _mm256_setzero_si256());
43877        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
43878        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43879        assert_eq_m256i(r, e);
43880    }
43881
43882    #[simd_test(enable = "avx512f,avx512vl")]
43883    unsafe fn test_mm_mask_sub_epi32() {
43884        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43885        let b = _mm_set1_epi32(1);
43886        let r = _mm_mask_sub_epi32(a, 0, a, b);
43887        assert_eq_m128i(r, a);
43888        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
43889        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43890        assert_eq_m128i(r, e);
43891    }
43892
43893    #[simd_test(enable = "avx512f,avx512vl")]
43894    unsafe fn test_mm_maskz_sub_epi32() {
43895        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43896        let b = _mm_set1_epi32(1);
43897        let r = _mm_maskz_sub_epi32(0, a, b);
43898        assert_eq_m128i(r, _mm_setzero_si128());
43899        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
43900        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43901        assert_eq_m128i(r, e);
43902    }
43903
43904    #[simd_test(enable = "avx512f")]
43905    unsafe fn test_mm512_sub_ps() {
43906        #[rustfmt::skip]
43907        let a = _mm512_setr_ps(
43908            0., 1., -1., f32::MAX,
43909            f32::MIN, 100., -100., -32.,
43910            0., 1., -1., f32::MAX,
43911            f32::MIN, 100., -100., -32.,
43912        );
43913        let b = _mm512_set1_ps(1.);
43914        let r = _mm512_sub_ps(a, b);
43915        #[rustfmt::skip]
43916        let e = _mm512_setr_ps(
43917            -1., 0., -2., f32::MAX - 1.,
43918            f32::MIN, 99., -101., -33.,
43919            -1., 0., -2., f32::MAX - 1.,
43920            f32::MIN, 99., -101., -33.,
43921        );
43922        assert_eq_m512(r, e);
43923    }
43924
43925    #[simd_test(enable = "avx512f")]
43926    unsafe fn test_mm512_mask_sub_ps() {
43927        #[rustfmt::skip]
43928        let a = _mm512_setr_ps(
43929            0., 1., -1., f32::MAX,
43930            f32::MIN, 100., -100., -32.,
43931            0., 1., -1., f32::MAX,
43932            f32::MIN, 100., -100., -32.,
43933        );
43934        let b = _mm512_set1_ps(1.);
43935        let r = _mm512_mask_sub_ps(a, 0, a, b);
43936        assert_eq_m512(r, a);
43937        let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
43938        #[rustfmt::skip]
43939        let e = _mm512_setr_ps(
43940            -1., 0., -2., f32::MAX - 1.,
43941            f32::MIN, 99., -101., -33.,
43942            0., 1., -1., f32::MAX,
43943            f32::MIN, 100., -100., -32.,
43944        );
43945        assert_eq_m512(r, e);
43946    }
43947
43948    #[simd_test(enable = "avx512f")]
43949    unsafe fn test_mm512_maskz_sub_ps() {
43950        #[rustfmt::skip]
43951        let a = _mm512_setr_ps(
43952            0., 1., -1., f32::MAX,
43953            f32::MIN, 100., -100., -32.,
43954            0., 1., -1., f32::MAX,
43955            f32::MIN, 100., -100., -32.,
43956        );
43957        let b = _mm512_set1_ps(1.);
43958        let r = _mm512_maskz_sub_ps(0, a, b);
43959        assert_eq_m512(r, _mm512_setzero_ps());
43960        let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
43961        #[rustfmt::skip]
43962        let e = _mm512_setr_ps(
43963            -1., 0., -2., f32::MAX - 1.,
43964            f32::MIN, 99., -101., -33.,
43965            0., 0., 0., 0.,
43966            0., 0., 0., 0.,
43967        );
43968        assert_eq_m512(r, e);
43969    }
43970
43971    #[simd_test(enable = "avx512f,avx512vl")]
43972    unsafe fn test_mm256_mask_sub_ps() {
43973        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43974        let b = _mm256_set1_ps(1.);
43975        let r = _mm256_mask_sub_ps(a, 0, a, b);
43976        assert_eq_m256(r, a);
43977        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
43978        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43979        assert_eq_m256(r, e);
43980    }
43981
43982    #[simd_test(enable = "avx512f,avx512vl")]
43983    unsafe fn test_mm256_maskz_sub_ps() {
43984        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43985        let b = _mm256_set1_ps(1.);
43986        let r = _mm256_maskz_sub_ps(0, a, b);
43987        assert_eq_m256(r, _mm256_setzero_ps());
43988        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
43989        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43990        assert_eq_m256(r, e);
43991    }
43992
43993    #[simd_test(enable = "avx512f,avx512vl")]
43994    unsafe fn test_mm_mask_sub_ps() {
43995        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43996        let b = _mm_set1_ps(1.);
43997        let r = _mm_mask_sub_ps(a, 0, a, b);
43998        assert_eq_m128(r, a);
43999        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
44000        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44001        assert_eq_m128(r, e);
44002    }
44003
44004    #[simd_test(enable = "avx512f,avx512vl")]
44005    unsafe fn test_mm_maskz_sub_ps() {
44006        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44007        let b = _mm_set1_ps(1.);
44008        let r = _mm_maskz_sub_ps(0, a, b);
44009        assert_eq_m128(r, _mm_setzero_ps());
44010        let r = _mm_maskz_sub_ps(0b00001111, a, b);
44011        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44012        assert_eq_m128(r, e);
44013    }
44014
44015    #[simd_test(enable = "avx512f")]
44016    unsafe fn test_mm512_mullo_epi32() {
44017        #[rustfmt::skip]
44018        let a = _mm512_setr_epi32(
44019            0, 1, -1, i32::MAX,
44020            i32::MIN, 100, -100, -32,
44021            0, 1, -1, i32::MAX,
44022            i32::MIN, 100, -100, -32,
44023        );
44024        let b = _mm512_set1_epi32(2);
44025        let r = _mm512_mullo_epi32(a, b);
44026        let e = _mm512_setr_epi32(
44027            0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
44028        );
44029        assert_eq_m512i(r, e);
44030    }
44031
44032    #[simd_test(enable = "avx512f")]
44033    unsafe fn test_mm512_mask_mullo_epi32() {
44034        #[rustfmt::skip]
44035        let a = _mm512_setr_epi32(
44036            0, 1, -1, i32::MAX,
44037            i32::MIN, 100, -100, -32,
44038            0, 1, -1, i32::MAX,
44039            i32::MIN, 100, -100, -32,
44040        );
44041        let b = _mm512_set1_epi32(2);
44042        let r = _mm512_mask_mullo_epi32(a, 0, a, b);
44043        assert_eq_m512i(r, a);
44044        let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
44045        #[rustfmt::skip]
44046        let e = _mm512_setr_epi32(
44047            0, 2, -2, -2,
44048            0, 200, -200, -64,
44049            0, 1, -1, i32::MAX,
44050            i32::MIN, 100, -100, -32,
44051        );
44052        assert_eq_m512i(r, e);
44053    }
44054
44055    #[simd_test(enable = "avx512f")]
44056    unsafe fn test_mm512_maskz_mullo_epi32() {
44057        #[rustfmt::skip]
44058        let a = _mm512_setr_epi32(
44059            0, 1, -1, i32::MAX,
44060            i32::MIN, 100, -100, -32,
44061            0, 1, -1, i32::MAX,
44062            i32::MIN, 100, -100, -32,
44063        );
44064        let b = _mm512_set1_epi32(2);
44065        let r = _mm512_maskz_mullo_epi32(0, a, b);
44066        assert_eq_m512i(r, _mm512_setzero_si512());
44067        let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
44068        let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
44069        assert_eq_m512i(r, e);
44070    }
44071
44072    #[simd_test(enable = "avx512f,avx512vl")]
44073    unsafe fn test_mm256_mask_mullo_epi32() {
44074        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44075        let b = _mm256_set1_epi32(2);
44076        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
44077        assert_eq_m256i(r, a);
44078        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
44079        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44080        assert_eq_m256i(r, e);
44081    }
44082
44083    #[simd_test(enable = "avx512f,avx512vl")]
44084    unsafe fn test_mm256_maskz_mullo_epi32() {
44085        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44086        let b = _mm256_set1_epi32(2);
44087        let r = _mm256_maskz_mullo_epi32(0, a, b);
44088        assert_eq_m256i(r, _mm256_setzero_si256());
44089        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
44090        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44091        assert_eq_m256i(r, e);
44092    }
44093
44094    #[simd_test(enable = "avx512f,avx512vl")]
44095    unsafe fn test_mm_mask_mullo_epi32() {
44096        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44097        let b = _mm_set1_epi32(2);
44098        let r = _mm_mask_mullo_epi32(a, 0, a, b);
44099        assert_eq_m128i(r, a);
44100        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
44101        let e = _mm_set_epi32(2, -2, -2, 0);
44102        assert_eq_m128i(r, e);
44103    }
44104
44105    #[simd_test(enable = "avx512f,avx512vl")]
44106    unsafe fn test_mm_maskz_mullo_epi32() {
44107        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44108        let b = _mm_set1_epi32(2);
44109        let r = _mm_maskz_mullo_epi32(0, a, b);
44110        assert_eq_m128i(r, _mm_setzero_si128());
44111        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
44112        let e = _mm_set_epi32(2, -2, -2, 0);
44113        assert_eq_m128i(r, e);
44114    }
44115
44116    #[simd_test(enable = "avx512f")]
44117    unsafe fn test_mm512_mul_ps() {
44118        #[rustfmt::skip]
44119        let a = _mm512_setr_ps(
44120            0., 1., -1., f32::MAX,
44121            f32::MIN, 100., -100., -32.,
44122            0., 1., -1., f32::MAX,
44123            f32::MIN, 100., -100., -32.,
44124        );
44125        let b = _mm512_set1_ps(2.);
44126        let r = _mm512_mul_ps(a, b);
44127        #[rustfmt::skip]
44128        let e = _mm512_setr_ps(
44129            0., 2., -2., f32::INFINITY,
44130            f32::NEG_INFINITY, 200., -200., -64.,
44131            0., 2., -2., f32::INFINITY,
44132            f32::NEG_INFINITY, 200., -200.,
44133            -64.,
44134        );
44135        assert_eq_m512(r, e);
44136    }
44137
44138    #[simd_test(enable = "avx512f")]
44139    unsafe fn test_mm512_mask_mul_ps() {
44140        #[rustfmt::skip]
44141        let a = _mm512_setr_ps(
44142            0., 1., -1., f32::MAX,
44143            f32::MIN, 100., -100., -32.,
44144            0., 1., -1., f32::MAX,
44145            f32::MIN, 100., -100., -32.,
44146        );
44147        let b = _mm512_set1_ps(2.);
44148        let r = _mm512_mask_mul_ps(a, 0, a, b);
44149        assert_eq_m512(r, a);
44150        let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
44151        #[rustfmt::skip]
44152        let e = _mm512_setr_ps(
44153            0., 2., -2., f32::INFINITY,
44154            f32::NEG_INFINITY, 200., -200., -64.,
44155            0., 1., -1., f32::MAX,
44156            f32::MIN, 100., -100., -32.,
44157        );
44158        assert_eq_m512(r, e);
44159    }
44160
44161    #[simd_test(enable = "avx512f")]
44162    unsafe fn test_mm512_maskz_mul_ps() {
44163        #[rustfmt::skip]
44164        let a = _mm512_setr_ps(
44165            0., 1., -1., f32::MAX,
44166            f32::MIN, 100., -100., -32.,
44167            0., 1., -1., f32::MAX,
44168            f32::MIN, 100., -100., -32.,
44169        );
44170        let b = _mm512_set1_ps(2.);
44171        let r = _mm512_maskz_mul_ps(0, a, b);
44172        assert_eq_m512(r, _mm512_setzero_ps());
44173        let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
44174        #[rustfmt::skip]
44175        let e = _mm512_setr_ps(
44176            0., 2., -2., f32::INFINITY,
44177            f32::NEG_INFINITY, 200., -200., -64.,
44178            0., 0., 0., 0.,
44179            0., 0., 0., 0.,
44180        );
44181        assert_eq_m512(r, e);
44182    }
44183
44184    #[simd_test(enable = "avx512f,avx512vl")]
44185    unsafe fn test_mm256_mask_mul_ps() {
44186        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44187        let b = _mm256_set1_ps(2.);
44188        let r = _mm256_mask_mul_ps(a, 0, a, b);
44189        assert_eq_m256(r, a);
44190        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
44191        #[rustfmt::skip]
44192        let e = _mm256_set_ps(
44193            0., 2., -2., f32::INFINITY,
44194            f32::NEG_INFINITY, 200., -200., -64.,
44195        );
44196        assert_eq_m256(r, e);
44197    }
44198
44199    #[simd_test(enable = "avx512f,avx512vl")]
44200    unsafe fn test_mm256_maskz_mul_ps() {
44201        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44202        let b = _mm256_set1_ps(2.);
44203        let r = _mm256_maskz_mul_ps(0, a, b);
44204        assert_eq_m256(r, _mm256_setzero_ps());
44205        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
44206        #[rustfmt::skip]
44207        let e = _mm256_set_ps(
44208            0., 2., -2., f32::INFINITY,
44209            f32::NEG_INFINITY, 200., -200., -64.,
44210        );
44211        assert_eq_m256(r, e);
44212    }
44213
44214    #[simd_test(enable = "avx512f,avx512vl")]
44215    unsafe fn test_mm_mask_mul_ps() {
44216        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44217        let b = _mm_set1_ps(2.);
44218        let r = _mm_mask_mul_ps(a, 0, a, b);
44219        assert_eq_m128(r, a);
44220        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
44221        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44222        assert_eq_m128(r, e);
44223    }
44224
44225    #[simd_test(enable = "avx512f,avx512vl")]
44226    unsafe fn test_mm_maskz_mul_ps() {
44227        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44228        let b = _mm_set1_ps(2.);
44229        let r = _mm_maskz_mul_ps(0, a, b);
44230        assert_eq_m128(r, _mm_setzero_ps());
44231        let r = _mm_maskz_mul_ps(0b00001111, a, b);
44232        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44233        assert_eq_m128(r, e);
44234    }
44235
44236    #[simd_test(enable = "avx512f")]
44237    unsafe fn test_mm512_div_ps() {
44238        let a = _mm512_setr_ps(
44239            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44240        );
44241        let b = _mm512_setr_ps(
44242            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44243        );
44244        let r = _mm512_div_ps(a, b);
44245        #[rustfmt::skip]
44246        let e = _mm512_setr_ps(
44247            0., 0.5, -0.5, -1.,
44248            50., f32::INFINITY, -50., -16.,
44249            0., 0.5, -0.5, 500.,
44250            f32::NEG_INFINITY, 50., -50., -16.,
44251        );
44252        assert_eq_m512(r, e); // 0/0 = NAN
44253    }
44254
44255    #[simd_test(enable = "avx512f")]
44256    unsafe fn test_mm512_mask_div_ps() {
44257        let a = _mm512_setr_ps(
44258            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44259        );
44260        let b = _mm512_setr_ps(
44261            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44262        );
44263        let r = _mm512_mask_div_ps(a, 0, a, b);
44264        assert_eq_m512(r, a);
44265        let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
44266        #[rustfmt::skip]
44267        let e = _mm512_setr_ps(
44268            0., 0.5, -0.5, -1.,
44269            50., f32::INFINITY, -50., -16.,
44270            0., 1., -1., 1000.,
44271            -131., 100., -100., -32.,
44272        );
44273        assert_eq_m512(r, e);
44274    }
44275
44276    #[simd_test(enable = "avx512f")]
44277    unsafe fn test_mm512_maskz_div_ps() {
44278        let a = _mm512_setr_ps(
44279            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44280        );
44281        let b = _mm512_setr_ps(
44282            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44283        );
44284        let r = _mm512_maskz_div_ps(0, a, b);
44285        assert_eq_m512(r, _mm512_setzero_ps());
44286        let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
44287        #[rustfmt::skip]
44288        let e = _mm512_setr_ps(
44289            0., 0.5, -0.5, -1.,
44290            50., f32::INFINITY, -50., -16.,
44291            0., 0., 0., 0.,
44292            0., 0., 0., 0.,
44293        );
44294        assert_eq_m512(r, e);
44295    }
44296
44297    #[simd_test(enable = "avx512f,avx512vl")]
44298    unsafe fn test_mm256_mask_div_ps() {
44299        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44300        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44301        let r = _mm256_mask_div_ps(a, 0, a, b);
44302        assert_eq_m256(r, a);
44303        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
44304        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44305        assert_eq_m256(r, e);
44306    }
44307
44308    #[simd_test(enable = "avx512f,avx512vl")]
44309    unsafe fn test_mm256_maskz_div_ps() {
44310        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44311        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44312        let r = _mm256_maskz_div_ps(0, a, b);
44313        assert_eq_m256(r, _mm256_setzero_ps());
44314        let r = _mm256_maskz_div_ps(0b11111111, a, b);
44315        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44316        assert_eq_m256(r, e);
44317    }
44318
44319    #[simd_test(enable = "avx512f,avx512vl")]
44320    unsafe fn test_mm_mask_div_ps() {
44321        let a = _mm_set_ps(100., 100., -100., -32.);
44322        let b = _mm_set_ps(2., 0., 2., 2.);
44323        let r = _mm_mask_div_ps(a, 0, a, b);
44324        assert_eq_m128(r, a);
44325        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
44326        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44327        assert_eq_m128(r, e);
44328    }
44329
44330    #[simd_test(enable = "avx512f,avx512vl")]
44331    unsafe fn test_mm_maskz_div_ps() {
44332        let a = _mm_set_ps(100., 100., -100., -32.);
44333        let b = _mm_set_ps(2., 0., 2., 2.);
44334        let r = _mm_maskz_div_ps(0, a, b);
44335        assert_eq_m128(r, _mm_setzero_ps());
44336        let r = _mm_maskz_div_ps(0b00001111, a, b);
44337        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44338        assert_eq_m128(r, e);
44339    }
44340
44341    #[simd_test(enable = "avx512f")]
44342    unsafe fn test_mm512_max_epi32() {
44343        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44344        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44345        let r = _mm512_max_epi32(a, b);
44346        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44347        assert_eq_m512i(r, e);
44348    }
44349
44350    #[simd_test(enable = "avx512f")]
44351    unsafe fn test_mm512_mask_max_epi32() {
44352        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44353        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44354        let r = _mm512_mask_max_epi32(a, 0, a, b);
44355        assert_eq_m512i(r, a);
44356        let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
44357        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44358        assert_eq_m512i(r, e);
44359    }
44360
44361    #[simd_test(enable = "avx512f")]
44362    unsafe fn test_mm512_maskz_max_epi32() {
44363        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44364        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44365        let r = _mm512_maskz_max_epi32(0, a, b);
44366        assert_eq_m512i(r, _mm512_setzero_si512());
44367        let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
44368        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44369        assert_eq_m512i(r, e);
44370    }
44371
44372    #[simd_test(enable = "avx512f,avx512vl")]
44373    unsafe fn test_mm256_mask_max_epi32() {
44374        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44375        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44376        let r = _mm256_mask_max_epi32(a, 0, a, b);
44377        assert_eq_m256i(r, a);
44378        let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
44379        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44380        assert_eq_m256i(r, e);
44381    }
44382
44383    #[simd_test(enable = "avx512f,avx512vl")]
44384    unsafe fn test_mm256_maskz_max_epi32() {
44385        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44386        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44387        let r = _mm256_maskz_max_epi32(0, a, b);
44388        assert_eq_m256i(r, _mm256_setzero_si256());
44389        let r = _mm256_maskz_max_epi32(0b11111111, a, b);
44390        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44391        assert_eq_m256i(r, e);
44392    }
44393
44394    #[simd_test(enable = "avx512f,avx512vl")]
44395    unsafe fn test_mm_mask_max_epi32() {
44396        let a = _mm_set_epi32(0, 1, 2, 3);
44397        let b = _mm_set_epi32(3, 2, 1, 0);
44398        let r = _mm_mask_max_epi32(a, 0, a, b);
44399        assert_eq_m128i(r, a);
44400        let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
44401        let e = _mm_set_epi32(3, 2, 2, 3);
44402        assert_eq_m128i(r, e);
44403    }
44404
44405    #[simd_test(enable = "avx512f,avx512vl")]
44406    unsafe fn test_mm_maskz_max_epi32() {
44407        let a = _mm_set_epi32(0, 1, 2, 3);
44408        let b = _mm_set_epi32(3, 2, 1, 0);
44409        let r = _mm_maskz_max_epi32(0, a, b);
44410        assert_eq_m128i(r, _mm_setzero_si128());
44411        let r = _mm_maskz_max_epi32(0b00001111, a, b);
44412        let e = _mm_set_epi32(3, 2, 2, 3);
44413        assert_eq_m128i(r, e);
44414    }
44415
44416    #[simd_test(enable = "avx512f")]
44417    unsafe fn test_mm512_max_ps() {
44418        let a = _mm512_setr_ps(
44419            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44420        );
44421        let b = _mm512_setr_ps(
44422            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44423        );
44424        let r = _mm512_max_ps(a, b);
44425        let e = _mm512_setr_ps(
44426            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44427        );
44428        assert_eq_m512(r, e);
44429    }
44430
44431    #[simd_test(enable = "avx512f")]
44432    unsafe fn test_mm512_mask_max_ps() {
44433        let a = _mm512_setr_ps(
44434            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44435        );
44436        let b = _mm512_setr_ps(
44437            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44438        );
44439        let r = _mm512_mask_max_ps(a, 0, a, b);
44440        assert_eq_m512(r, a);
44441        let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
44442        let e = _mm512_setr_ps(
44443            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44444        );
44445        assert_eq_m512(r, e);
44446    }
44447
44448    #[simd_test(enable = "avx512f")]
44449    unsafe fn test_mm512_maskz_max_ps() {
44450        let a = _mm512_setr_ps(
44451            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44452        );
44453        let b = _mm512_setr_ps(
44454            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44455        );
44456        let r = _mm512_maskz_max_ps(0, a, b);
44457        assert_eq_m512(r, _mm512_setzero_ps());
44458        let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
44459        let e = _mm512_setr_ps(
44460            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44461        );
44462        assert_eq_m512(r, e);
44463    }
44464
44465    #[simd_test(enable = "avx512f,avx512vl")]
44466    unsafe fn test_mm256_mask_max_ps() {
44467        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44468        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44469        let r = _mm256_mask_max_ps(a, 0, a, b);
44470        assert_eq_m256(r, a);
44471        let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
44472        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44473        assert_eq_m256(r, e);
44474    }
44475
44476    #[simd_test(enable = "avx512f,avx512vl")]
44477    unsafe fn test_mm256_maskz_max_ps() {
44478        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44479        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44480        let r = _mm256_maskz_max_ps(0, a, b);
44481        assert_eq_m256(r, _mm256_setzero_ps());
44482        let r = _mm256_maskz_max_ps(0b11111111, a, b);
44483        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44484        assert_eq_m256(r, e);
44485    }
44486
44487    #[simd_test(enable = "avx512f,avx512vl")]
44488    unsafe fn test_mm_mask_max_ps() {
44489        let a = _mm_set_ps(0., 1., 2., 3.);
44490        let b = _mm_set_ps(3., 2., 1., 0.);
44491        let r = _mm_mask_max_ps(a, 0, a, b);
44492        assert_eq_m128(r, a);
44493        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44494        let e = _mm_set_ps(3., 2., 2., 3.);
44495        assert_eq_m128(r, e);
44496    }
44497
44498    #[simd_test(enable = "avx512f,avx512vl")]
44499    unsafe fn test_mm_maskz_max_ps() {
44500        let a = _mm_set_ps(0., 1., 2., 3.);
44501        let b = _mm_set_ps(3., 2., 1., 0.);
44502        let r = _mm_maskz_max_ps(0, a, b);
44503        assert_eq_m128(r, _mm_setzero_ps());
44504        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44505        let e = _mm_set_ps(3., 2., 2., 3.);
44506        assert_eq_m128(r, e);
44507    }
44508
44509    #[simd_test(enable = "avx512f")]
44510    unsafe fn test_mm512_max_epu32() {
44511        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44512        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44513        let r = _mm512_max_epu32(a, b);
44514        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44515        assert_eq_m512i(r, e);
44516    }
44517
44518    #[simd_test(enable = "avx512f")]
44519    unsafe fn test_mm512_mask_max_epu32() {
44520        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44521        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44522        let r = _mm512_mask_max_epu32(a, 0, a, b);
44523        assert_eq_m512i(r, a);
44524        let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
44525        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44526        assert_eq_m512i(r, e);
44527    }
44528
44529    #[simd_test(enable = "avx512f")]
44530    unsafe fn test_mm512_maskz_max_epu32() {
44531        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44532        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44533        let r = _mm512_maskz_max_epu32(0, a, b);
44534        assert_eq_m512i(r, _mm512_setzero_si512());
44535        let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
44536        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44537        assert_eq_m512i(r, e);
44538    }
44539
44540    #[simd_test(enable = "avx512f,avx512vl")]
44541    unsafe fn test_mm256_mask_max_epu32() {
44542        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44543        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44544        let r = _mm256_mask_max_epu32(a, 0, a, b);
44545        assert_eq_m256i(r, a);
44546        let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
44547        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44548        assert_eq_m256i(r, e);
44549    }
44550
44551    #[simd_test(enable = "avx512f,avx512vl")]
44552    unsafe fn test_mm256_maskz_max_epu32() {
44553        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44554        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44555        let r = _mm256_maskz_max_epu32(0, a, b);
44556        assert_eq_m256i(r, _mm256_setzero_si256());
44557        let r = _mm256_maskz_max_epu32(0b11111111, a, b);
44558        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44559        assert_eq_m256i(r, e);
44560    }
44561
44562    #[simd_test(enable = "avx512f,avx512vl")]
44563    unsafe fn test_mm_mask_max_epu32() {
44564        let a = _mm_set_epi32(0, 1, 2, 3);
44565        let b = _mm_set_epi32(3, 2, 1, 0);
44566        let r = _mm_mask_max_epu32(a, 0, a, b);
44567        assert_eq_m128i(r, a);
44568        let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
44569        let e = _mm_set_epi32(3, 2, 2, 3);
44570        assert_eq_m128i(r, e);
44571    }
44572
44573    #[simd_test(enable = "avx512f,avx512vl")]
44574    unsafe fn test_mm_maskz_max_epu32() {
44575        let a = _mm_set_epi32(0, 1, 2, 3);
44576        let b = _mm_set_epi32(3, 2, 1, 0);
44577        let r = _mm_maskz_max_epu32(0, a, b);
44578        assert_eq_m128i(r, _mm_setzero_si128());
44579        let r = _mm_maskz_max_epu32(0b00001111, a, b);
44580        let e = _mm_set_epi32(3, 2, 2, 3);
44581        assert_eq_m128i(r, e);
44582    }
44583
44584    #[simd_test(enable = "avx512f")]
44585    unsafe fn test_mm512_min_epi32() {
44586        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44587        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44588        let r = _mm512_min_epi32(a, b);
44589        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44590        assert_eq_m512i(r, e);
44591    }
44592
44593    #[simd_test(enable = "avx512f")]
44594    unsafe fn test_mm512_mask_min_epi32() {
44595        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44596        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44597        let r = _mm512_mask_min_epi32(a, 0, a, b);
44598        assert_eq_m512i(r, a);
44599        let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
44600        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44601        assert_eq_m512i(r, e);
44602    }
44603
44604    #[simd_test(enable = "avx512f")]
44605    unsafe fn test_mm512_maskz_min_epi32() {
44606        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44607        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44608        let r = _mm512_maskz_min_epi32(0, a, b);
44609        assert_eq_m512i(r, _mm512_setzero_si512());
44610        let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
44611        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44612        assert_eq_m512i(r, e);
44613    }
44614
44615    #[simd_test(enable = "avx512f,avx512vl")]
44616    unsafe fn test_mm256_mask_min_epi32() {
44617        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44618        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44619        let r = _mm256_mask_min_epi32(a, 0, a, b);
44620        assert_eq_m256i(r, a);
44621        let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
44622        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44623        assert_eq_m256i(r, e);
44624    }
44625
44626    #[simd_test(enable = "avx512f,avx512vl")]
44627    unsafe fn test_mm256_maskz_min_epi32() {
44628        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44629        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44630        let r = _mm256_maskz_min_epi32(0, a, b);
44631        assert_eq_m256i(r, _mm256_setzero_si256());
44632        let r = _mm256_maskz_min_epi32(0b11111111, a, b);
44633        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44634        assert_eq_m256i(r, e);
44635    }
44636
44637    #[simd_test(enable = "avx512f,avx512vl")]
44638    unsafe fn test_mm_mask_min_epi32() {
44639        let a = _mm_set_epi32(0, 1, 2, 3);
44640        let b = _mm_set_epi32(3, 2, 1, 0);
44641        let r = _mm_mask_min_epi32(a, 0, a, b);
44642        assert_eq_m128i(r, a);
44643        let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
44644        let e = _mm_set_epi32(0, 1, 1, 0);
44645        assert_eq_m128i(r, e);
44646    }
44647
44648    #[simd_test(enable = "avx512f,avx512vl")]
44649    unsafe fn test_mm_maskz_min_epi32() {
44650        let a = _mm_set_epi32(0, 1, 2, 3);
44651        let b = _mm_set_epi32(3, 2, 1, 0);
44652        let r = _mm_maskz_min_epi32(0, a, b);
44653        assert_eq_m128i(r, _mm_setzero_si128());
44654        let r = _mm_maskz_min_epi32(0b00001111, a, b);
44655        let e = _mm_set_epi32(0, 1, 1, 0);
44656        assert_eq_m128i(r, e);
44657    }
44658
44659    #[simd_test(enable = "avx512f")]
44660    unsafe fn test_mm512_min_ps() {
44661        let a = _mm512_setr_ps(
44662            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44663        );
44664        let b = _mm512_setr_ps(
44665            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44666        );
44667        let r = _mm512_min_ps(a, b);
44668        let e = _mm512_setr_ps(
44669            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
44670        );
44671        assert_eq_m512(r, e);
44672    }
44673
44674    #[simd_test(enable = "avx512f")]
44675    unsafe fn test_mm512_mask_min_ps() {
44676        let a = _mm512_setr_ps(
44677            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44678        );
44679        let b = _mm512_setr_ps(
44680            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44681        );
44682        let r = _mm512_mask_min_ps(a, 0, a, b);
44683        assert_eq_m512(r, a);
44684        let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
44685        let e = _mm512_setr_ps(
44686            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44687        );
44688        assert_eq_m512(r, e);
44689    }
44690
44691    #[simd_test(enable = "avx512f")]
44692    unsafe fn test_mm512_maskz_min_ps() {
44693        let a = _mm512_setr_ps(
44694            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44695        );
44696        let b = _mm512_setr_ps(
44697            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44698        );
44699        let r = _mm512_maskz_min_ps(0, a, b);
44700        assert_eq_m512(r, _mm512_setzero_ps());
44701        let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
44702        let e = _mm512_setr_ps(
44703            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44704        );
44705        assert_eq_m512(r, e);
44706    }
44707
44708    #[simd_test(enable = "avx512f,avx512vl")]
44709    unsafe fn test_mm256_mask_min_ps() {
44710        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44711        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44712        let r = _mm256_mask_min_ps(a, 0, a, b);
44713        assert_eq_m256(r, a);
44714        let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
44715        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44716        assert_eq_m256(r, e);
44717    }
44718
44719    #[simd_test(enable = "avx512f,avx512vl")]
44720    unsafe fn test_mm256_maskz_min_ps() {
44721        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44722        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44723        let r = _mm256_maskz_min_ps(0, a, b);
44724        assert_eq_m256(r, _mm256_setzero_ps());
44725        let r = _mm256_maskz_min_ps(0b11111111, a, b);
44726        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44727        assert_eq_m256(r, e);
44728    }
44729
44730    #[simd_test(enable = "avx512f,avx512vl")]
44731    unsafe fn test_mm_mask_min_ps() {
44732        let a = _mm_set_ps(0., 1., 2., 3.);
44733        let b = _mm_set_ps(3., 2., 1., 0.);
44734        let r = _mm_mask_min_ps(a, 0, a, b);
44735        assert_eq_m128(r, a);
44736        let r = _mm_mask_min_ps(a, 0b00001111, a, b);
44737        let e = _mm_set_ps(0., 1., 1., 0.);
44738        assert_eq_m128(r, e);
44739    }
44740
44741    #[simd_test(enable = "avx512f,avx512vl")]
44742    unsafe fn test_mm_maskz_min_ps() {
44743        let a = _mm_set_ps(0., 1., 2., 3.);
44744        let b = _mm_set_ps(3., 2., 1., 0.);
44745        let r = _mm_maskz_min_ps(0, a, b);
44746        assert_eq_m128(r, _mm_setzero_ps());
44747        let r = _mm_maskz_min_ps(0b00001111, a, b);
44748        let e = _mm_set_ps(0., 1., 1., 0.);
44749        assert_eq_m128(r, e);
44750    }
44751
44752    #[simd_test(enable = "avx512f")]
44753    unsafe fn test_mm512_min_epu32() {
44754        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44755        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44756        let r = _mm512_min_epu32(a, b);
44757        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44758        assert_eq_m512i(r, e);
44759    }
44760
44761    #[simd_test(enable = "avx512f")]
44762    unsafe fn test_mm512_mask_min_epu32() {
44763        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44764        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44765        let r = _mm512_mask_min_epu32(a, 0, a, b);
44766        assert_eq_m512i(r, a);
44767        let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
44768        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44769        assert_eq_m512i(r, e);
44770    }
44771
44772    #[simd_test(enable = "avx512f")]
44773    unsafe fn test_mm512_maskz_min_epu32() {
44774        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44775        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44776        let r = _mm512_maskz_min_epu32(0, a, b);
44777        assert_eq_m512i(r, _mm512_setzero_si512());
44778        let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
44779        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44780        assert_eq_m512i(r, e);
44781    }
44782
44783    #[simd_test(enable = "avx512f,avx512vl")]
44784    unsafe fn test_mm256_mask_min_epu32() {
44785        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44786        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44787        let r = _mm256_mask_min_epu32(a, 0, a, b);
44788        assert_eq_m256i(r, a);
44789        let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
44790        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44791        assert_eq_m256i(r, e);
44792    }
44793
44794    #[simd_test(enable = "avx512f,avx512vl")]
44795    unsafe fn test_mm256_maskz_min_epu32() {
44796        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44797        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44798        let r = _mm256_maskz_min_epu32(0, a, b);
44799        assert_eq_m256i(r, _mm256_setzero_si256());
44800        let r = _mm256_maskz_min_epu32(0b11111111, a, b);
44801        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44802        assert_eq_m256i(r, e);
44803    }
44804
44805    #[simd_test(enable = "avx512f,avx512vl")]
44806    unsafe fn test_mm_mask_min_epu32() {
44807        let a = _mm_set_epi32(0, 1, 2, 3);
44808        let b = _mm_set_epi32(3, 2, 1, 0);
44809        let r = _mm_mask_min_epu32(a, 0, a, b);
44810        assert_eq_m128i(r, a);
44811        let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
44812        let e = _mm_set_epi32(0, 1, 1, 0);
44813        assert_eq_m128i(r, e);
44814    }
44815
44816    #[simd_test(enable = "avx512f,avx512vl")]
44817    unsafe fn test_mm_maskz_min_epu32() {
44818        let a = _mm_set_epi32(0, 1, 2, 3);
44819        let b = _mm_set_epi32(3, 2, 1, 0);
44820        let r = _mm_maskz_min_epu32(0, a, b);
44821        assert_eq_m128i(r, _mm_setzero_si128());
44822        let r = _mm_maskz_min_epu32(0b00001111, a, b);
44823        let e = _mm_set_epi32(0, 1, 1, 0);
44824        assert_eq_m128i(r, e);
44825    }
44826
44827    #[simd_test(enable = "avx512f")]
44828    unsafe fn test_mm512_sqrt_ps() {
44829        let a = _mm512_setr_ps(
44830            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44831        );
44832        let r = _mm512_sqrt_ps(a);
44833        let e = _mm512_setr_ps(
44834            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44835        );
44836        assert_eq_m512(r, e);
44837    }
44838
44839    #[simd_test(enable = "avx512f")]
44840    unsafe fn test_mm512_mask_sqrt_ps() {
44841        let a = _mm512_setr_ps(
44842            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44843        );
44844        let r = _mm512_mask_sqrt_ps(a, 0, a);
44845        assert_eq_m512(r, a);
44846        let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
44847        let e = _mm512_setr_ps(
44848            0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
44849        );
44850        assert_eq_m512(r, e);
44851    }
44852
44853    #[simd_test(enable = "avx512f")]
44854    unsafe fn test_mm512_maskz_sqrt_ps() {
44855        let a = _mm512_setr_ps(
44856            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44857        );
44858        let r = _mm512_maskz_sqrt_ps(0, a);
44859        assert_eq_m512(r, _mm512_setzero_ps());
44860        let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
44861        let e = _mm512_setr_ps(
44862            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44863        );
44864        assert_eq_m512(r, e);
44865    }
44866
44867    #[simd_test(enable = "avx512f,avx512vl")]
44868    unsafe fn test_mm256_mask_sqrt_ps() {
44869        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44870        let r = _mm256_mask_sqrt_ps(a, 0, a);
44871        assert_eq_m256(r, a);
44872        let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
44873        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44874        assert_eq_m256(r, e);
44875    }
44876
44877    #[simd_test(enable = "avx512f,avx512vl")]
44878    unsafe fn test_mm256_maskz_sqrt_ps() {
44879        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44880        let r = _mm256_maskz_sqrt_ps(0, a);
44881        assert_eq_m256(r, _mm256_setzero_ps());
44882        let r = _mm256_maskz_sqrt_ps(0b11111111, a);
44883        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44884        assert_eq_m256(r, e);
44885    }
44886
44887    #[simd_test(enable = "avx512f,avx512vl")]
44888    unsafe fn test_mm_mask_sqrt_ps() {
44889        let a = _mm_set_ps(0., 1., 4., 9.);
44890        let r = _mm_mask_sqrt_ps(a, 0, a);
44891        assert_eq_m128(r, a);
44892        let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
44893        let e = _mm_set_ps(0., 1., 2., 3.);
44894        assert_eq_m128(r, e);
44895    }
44896
44897    #[simd_test(enable = "avx512f,avx512vl")]
44898    unsafe fn test_mm_maskz_sqrt_ps() {
44899        let a = _mm_set_ps(0., 1., 4., 9.);
44900        let r = _mm_maskz_sqrt_ps(0, a);
44901        assert_eq_m128(r, _mm_setzero_ps());
44902        let r = _mm_maskz_sqrt_ps(0b00001111, a);
44903        let e = _mm_set_ps(0., 1., 2., 3.);
44904        assert_eq_m128(r, e);
44905    }
44906
44907    #[simd_test(enable = "avx512f")]
44908    unsafe fn test_mm512_fmadd_ps() {
44909        let a = _mm512_set1_ps(1.);
44910        let b = _mm512_setr_ps(
44911            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44912        );
44913        let c = _mm512_set1_ps(1.);
44914        let r = _mm512_fmadd_ps(a, b, c);
44915        let e = _mm512_setr_ps(
44916            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
44917        );
44918        assert_eq_m512(r, e);
44919    }
44920
44921    #[simd_test(enable = "avx512f")]
44922    unsafe fn test_mm512_mask_fmadd_ps() {
44923        let a = _mm512_set1_ps(1.);
44924        let b = _mm512_setr_ps(
44925            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44926        );
44927        let c = _mm512_set1_ps(1.);
44928        let r = _mm512_mask_fmadd_ps(a, 0, b, c);
44929        assert_eq_m512(r, a);
44930        let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
44931        let e = _mm512_setr_ps(
44932            1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
44933        );
44934        assert_eq_m512(r, e);
44935    }
44936
44937    #[simd_test(enable = "avx512f")]
44938    unsafe fn test_mm512_maskz_fmadd_ps() {
44939        let a = _mm512_set1_ps(1.);
44940        let b = _mm512_setr_ps(
44941            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44942        );
44943        let c = _mm512_set1_ps(1.);
44944        let r = _mm512_maskz_fmadd_ps(0, a, b, c);
44945        assert_eq_m512(r, _mm512_setzero_ps());
44946        let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
44947        let e = _mm512_setr_ps(
44948            1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44949        );
44950        assert_eq_m512(r, e);
44951    }
44952
44953    #[simd_test(enable = "avx512f")]
44954    unsafe fn test_mm512_mask3_fmadd_ps() {
44955        let a = _mm512_set1_ps(1.);
44956        let b = _mm512_setr_ps(
44957            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44958        );
44959        let c = _mm512_set1_ps(2.);
44960        let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
44961        assert_eq_m512(r, c);
44962        let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
44963        let e = _mm512_setr_ps(
44964            2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
44965        );
44966        assert_eq_m512(r, e);
44967    }
44968
44969    #[simd_test(enable = "avx512f,avx512vl")]
44970    unsafe fn test_mm256_mask_fmadd_ps() {
44971        let a = _mm256_set1_ps(1.);
44972        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44973        let c = _mm256_set1_ps(1.);
44974        let r = _mm256_mask_fmadd_ps(a, 0, b, c);
44975        assert_eq_m256(r, a);
44976        let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
44977        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44978        assert_eq_m256(r, e);
44979    }
44980
44981    #[simd_test(enable = "avx512f,avx512vl")]
44982    unsafe fn test_mm256_maskz_fmadd_ps() {
44983        let a = _mm256_set1_ps(1.);
44984        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44985        let c = _mm256_set1_ps(1.);
44986        let r = _mm256_maskz_fmadd_ps(0, a, b, c);
44987        assert_eq_m256(r, _mm256_setzero_ps());
44988        let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
44989        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44990        assert_eq_m256(r, e);
44991    }
44992
44993    #[simd_test(enable = "avx512f,avx512vl")]
44994    unsafe fn test_mm256_mask3_fmadd_ps() {
44995        let a = _mm256_set1_ps(1.);
44996        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44997        let c = _mm256_set1_ps(1.);
44998        let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
44999        assert_eq_m256(r, c);
45000        let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
45001        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45002        assert_eq_m256(r, e);
45003    }
45004
45005    #[simd_test(enable = "avx512f,avx512vl")]
45006    unsafe fn test_mm_mask_fmadd_ps() {
45007        let a = _mm_set1_ps(1.);
45008        let b = _mm_set_ps(0., 1., 2., 3.);
45009        let c = _mm_set1_ps(1.);
45010        let r = _mm_mask_fmadd_ps(a, 0, b, c);
45011        assert_eq_m128(r, a);
45012        let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
45013        let e = _mm_set_ps(1., 2., 3., 4.);
45014        assert_eq_m128(r, e);
45015    }
45016
45017    #[simd_test(enable = "avx512f,avx512vl")]
45018    unsafe fn test_mm_maskz_fmadd_ps() {
45019        let a = _mm_set1_ps(1.);
45020        let b = _mm_set_ps(0., 1., 2., 3.);
45021        let c = _mm_set1_ps(1.);
45022        let r = _mm_maskz_fmadd_ps(0, a, b, c);
45023        assert_eq_m128(r, _mm_setzero_ps());
45024        let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
45025        let e = _mm_set_ps(1., 2., 3., 4.);
45026        assert_eq_m128(r, e);
45027    }
45028
45029    #[simd_test(enable = "avx512f,avx512vl")]
45030    unsafe fn test_mm_mask3_fmadd_ps() {
45031        let a = _mm_set1_ps(1.);
45032        let b = _mm_set_ps(0., 1., 2., 3.);
45033        let c = _mm_set1_ps(1.);
45034        let r = _mm_mask3_fmadd_ps(a, b, c, 0);
45035        assert_eq_m128(r, c);
45036        let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
45037        let e = _mm_set_ps(1., 2., 3., 4.);
45038        assert_eq_m128(r, e);
45039    }
45040
45041    #[simd_test(enable = "avx512f")]
45042    unsafe fn test_mm512_fmsub_ps() {
45043        let a = _mm512_setr_ps(
45044            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45045        );
45046        let b = _mm512_setr_ps(
45047            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45048        );
45049        let c = _mm512_setr_ps(
45050            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45051        );
45052        let r = _mm512_fmsub_ps(a, b, c);
45053        let e = _mm512_setr_ps(
45054            -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
45055        );
45056        assert_eq_m512(r, e);
45057    }
45058
45059    #[simd_test(enable = "avx512f")]
45060    unsafe fn test_mm512_mask_fmsub_ps() {
45061        let a = _mm512_set1_ps(1.);
45062        let b = _mm512_setr_ps(
45063            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45064        );
45065        let c = _mm512_set1_ps(1.);
45066        let r = _mm512_mask_fmsub_ps(a, 0, b, c);
45067        assert_eq_m512(r, a);
45068        let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
45069        let e = _mm512_setr_ps(
45070            -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45071        );
45072        assert_eq_m512(r, e);
45073    }
45074
45075    #[simd_test(enable = "avx512f")]
45076    unsafe fn test_mm512_maskz_fmsub_ps() {
45077        let a = _mm512_set1_ps(1.);
45078        let b = _mm512_setr_ps(
45079            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45080        );
45081        let c = _mm512_set1_ps(1.);
45082        let r = _mm512_maskz_fmsub_ps(0, a, b, c);
45083        assert_eq_m512(r, _mm512_setzero_ps());
45084        let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
45085        let e = _mm512_setr_ps(
45086            -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45087        );
45088        assert_eq_m512(r, e);
45089    }
45090
45091    #[simd_test(enable = "avx512f")]
45092    unsafe fn test_mm512_mask3_fmsub_ps() {
45093        let a = _mm512_set1_ps(1.);
45094        let b = _mm512_setr_ps(
45095            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45096        );
45097        let c = _mm512_setr_ps(
45098            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45099        );
45100        let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
45101        assert_eq_m512(r, c);
45102        let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
45103        let e = _mm512_setr_ps(
45104            -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45105        );
45106        assert_eq_m512(r, e);
45107    }
45108
45109    #[simd_test(enable = "avx512f,avx512vl")]
45110    unsafe fn test_mm256_mask_fmsub_ps() {
45111        let a = _mm256_set1_ps(1.);
45112        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45113        let c = _mm256_set1_ps(1.);
45114        let r = _mm256_mask_fmsub_ps(a, 0, b, c);
45115        assert_eq_m256(r, a);
45116        let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
45117        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45118        assert_eq_m256(r, e);
45119    }
45120
45121    #[simd_test(enable = "avx512f,avx512vl")]
45122    unsafe fn test_mm256_maskz_fmsub_ps() {
45123        let a = _mm256_set1_ps(1.);
45124        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45125        let c = _mm256_set1_ps(1.);
45126        let r = _mm256_maskz_fmsub_ps(0, a, b, c);
45127        assert_eq_m256(r, _mm256_setzero_ps());
45128        let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
45129        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45130        assert_eq_m256(r, e);
45131    }
45132
45133    #[simd_test(enable = "avx512f,avx512vl")]
45134    unsafe fn test_mm256_mask3_fmsub_ps() {
45135        let a = _mm256_set1_ps(1.);
45136        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45137        let c = _mm256_set1_ps(1.);
45138        let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
45139        assert_eq_m256(r, c);
45140        let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
45141        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45142        assert_eq_m256(r, e);
45143    }
45144
45145    #[simd_test(enable = "avx512f,avx512vl")]
45146    unsafe fn test_mm_mask_fmsub_ps() {
45147        let a = _mm_set1_ps(1.);
45148        let b = _mm_set_ps(0., 1., 2., 3.);
45149        let c = _mm_set1_ps(1.);
45150        let r = _mm_mask_fmsub_ps(a, 0, b, c);
45151        assert_eq_m128(r, a);
45152        let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
45153        let e = _mm_set_ps(-1., 0., 1., 2.);
45154        assert_eq_m128(r, e);
45155    }
45156
45157    #[simd_test(enable = "avx512f,avx512vl")]
45158    unsafe fn test_mm_maskz_fmsub_ps() {
45159        let a = _mm_set1_ps(1.);
45160        let b = _mm_set_ps(0., 1., 2., 3.);
45161        let c = _mm_set1_ps(1.);
45162        let r = _mm_maskz_fmsub_ps(0, a, b, c);
45163        assert_eq_m128(r, _mm_setzero_ps());
45164        let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
45165        let e = _mm_set_ps(-1., 0., 1., 2.);
45166        assert_eq_m128(r, e);
45167    }
45168
45169    #[simd_test(enable = "avx512f,avx512vl")]
45170    unsafe fn test_mm_mask3_fmsub_ps() {
45171        let a = _mm_set1_ps(1.);
45172        let b = _mm_set_ps(0., 1., 2., 3.);
45173        let c = _mm_set1_ps(1.);
45174        let r = _mm_mask3_fmsub_ps(a, b, c, 0);
45175        assert_eq_m128(r, c);
45176        let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
45177        let e = _mm_set_ps(-1., 0., 1., 2.);
45178        assert_eq_m128(r, e);
45179    }
45180
45181    #[simd_test(enable = "avx512f")]
45182    unsafe fn test_mm512_fmaddsub_ps() {
45183        let a = _mm512_set1_ps(1.);
45184        let b = _mm512_setr_ps(
45185            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45186        );
45187        let c = _mm512_set1_ps(1.);
45188        let r = _mm512_fmaddsub_ps(a, b, c);
45189        let e = _mm512_setr_ps(
45190            -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
45191        );
45192        assert_eq_m512(r, e);
45193    }
45194
45195    #[simd_test(enable = "avx512f")]
45196    unsafe fn test_mm512_mask_fmaddsub_ps() {
45197        let a = _mm512_set1_ps(1.);
45198        let b = _mm512_setr_ps(
45199            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45200        );
45201        let c = _mm512_set1_ps(1.);
45202        let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
45203        assert_eq_m512(r, a);
45204        let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
45205        let e = _mm512_setr_ps(
45206            -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45207        );
45208        assert_eq_m512(r, e);
45209    }
45210
45211    #[simd_test(enable = "avx512f")]
45212    unsafe fn test_mm512_maskz_fmaddsub_ps() {
45213        let a = _mm512_set1_ps(1.);
45214        let b = _mm512_setr_ps(
45215            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45216        );
45217        let c = _mm512_set1_ps(1.);
45218        let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
45219        assert_eq_m512(r, _mm512_setzero_ps());
45220        let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
45221        let e = _mm512_setr_ps(
45222            -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45223        );
45224        assert_eq_m512(r, e);
45225    }
45226
45227    #[simd_test(enable = "avx512f")]
45228    unsafe fn test_mm512_mask3_fmaddsub_ps() {
45229        let a = _mm512_set1_ps(1.);
45230        let b = _mm512_setr_ps(
45231            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45232        );
45233        let c = _mm512_setr_ps(
45234            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45235        );
45236        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
45237        assert_eq_m512(r, c);
45238        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
45239        let e = _mm512_setr_ps(
45240            -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
45241        );
45242        assert_eq_m512(r, e);
45243    }
45244
45245    #[simd_test(enable = "avx512f,avx512vl")]
45246    unsafe fn test_mm256_mask_fmaddsub_ps() {
45247        let a = _mm256_set1_ps(1.);
45248        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45249        let c = _mm256_set1_ps(1.);
45250        let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
45251        assert_eq_m256(r, a);
45252        let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
45253        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45254        assert_eq_m256(r, e);
45255    }
45256
45257    #[simd_test(enable = "avx512f,avx512vl")]
45258    unsafe fn test_mm256_maskz_fmaddsub_ps() {
45259        let a = _mm256_set1_ps(1.);
45260        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45261        let c = _mm256_set1_ps(1.);
45262        let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
45263        assert_eq_m256(r, _mm256_setzero_ps());
45264        let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
45265        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45266        assert_eq_m256(r, e);
45267    }
45268
45269    #[simd_test(enable = "avx512f,avx512vl")]
45270    unsafe fn test_mm256_mask3_fmaddsub_ps() {
45271        let a = _mm256_set1_ps(1.);
45272        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45273        let c = _mm256_set1_ps(1.);
45274        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
45275        assert_eq_m256(r, c);
45276        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
45277        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45278        assert_eq_m256(r, e);
45279    }
45280
45281    #[simd_test(enable = "avx512f,avx512vl")]
45282    unsafe fn test_mm_mask_fmaddsub_ps() {
45283        let a = _mm_set1_ps(1.);
45284        let b = _mm_set_ps(0., 1., 2., 3.);
45285        let c = _mm_set1_ps(1.);
45286        let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
45287        assert_eq_m128(r, a);
45288        let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
45289        let e = _mm_set_ps(1., 0., 3., 2.);
45290        assert_eq_m128(r, e);
45291    }
45292
45293    #[simd_test(enable = "avx512f,avx512vl")]
45294    unsafe fn test_mm_maskz_fmaddsub_ps() {
45295        let a = _mm_set1_ps(1.);
45296        let b = _mm_set_ps(0., 1., 2., 3.);
45297        let c = _mm_set1_ps(1.);
45298        let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
45299        assert_eq_m128(r, _mm_setzero_ps());
45300        let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
45301        let e = _mm_set_ps(1., 0., 3., 2.);
45302        assert_eq_m128(r, e);
45303    }
45304
45305    #[simd_test(enable = "avx512f,avx512vl")]
45306    unsafe fn test_mm_mask3_fmaddsub_ps() {
45307        let a = _mm_set1_ps(1.);
45308        let b = _mm_set_ps(0., 1., 2., 3.);
45309        let c = _mm_set1_ps(1.);
45310        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
45311        assert_eq_m128(r, c);
45312        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
45313        let e = _mm_set_ps(1., 0., 3., 2.);
45314        assert_eq_m128(r, e);
45315    }
45316
45317    #[simd_test(enable = "avx512f")]
45318    unsafe fn test_mm512_fmsubadd_ps() {
45319        let a = _mm512_setr_ps(
45320            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45321        );
45322        let b = _mm512_setr_ps(
45323            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45324        );
45325        let c = _mm512_setr_ps(
45326            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45327        );
45328        let r = _mm512_fmsubadd_ps(a, b, c);
45329        let e = _mm512_setr_ps(
45330            1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
45331        );
45332        assert_eq_m512(r, e);
45333    }
45334
45335    #[simd_test(enable = "avx512f")]
45336    unsafe fn test_mm512_mask_fmsubadd_ps() {
45337        let a = _mm512_set1_ps(1.);
45338        let b = _mm512_setr_ps(
45339            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45340        );
45341        let c = _mm512_set1_ps(1.);
45342        let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
45343        assert_eq_m512(r, a);
45344        let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
45345        let e = _mm512_setr_ps(
45346            1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45347        );
45348        assert_eq_m512(r, e);
45349    }
45350
45351    #[simd_test(enable = "avx512f")]
45352    unsafe fn test_mm512_maskz_fmsubadd_ps() {
45353        let a = _mm512_set1_ps(1.);
45354        let b = _mm512_setr_ps(
45355            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45356        );
45357        let c = _mm512_set1_ps(1.);
45358        let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
45359        assert_eq_m512(r, _mm512_setzero_ps());
45360        let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
45361        let e = _mm512_setr_ps(
45362            1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45363        );
45364        assert_eq_m512(r, e);
45365    }
45366
45367    #[simd_test(enable = "avx512f")]
45368    unsafe fn test_mm512_mask3_fmsubadd_ps() {
45369        let a = _mm512_set1_ps(1.);
45370        let b = _mm512_setr_ps(
45371            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45372        );
45373        let c = _mm512_setr_ps(
45374            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45375        );
45376        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
45377        assert_eq_m512(r, c);
45378        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
45379        let e = _mm512_setr_ps(
45380            1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45381        );
45382        assert_eq_m512(r, e);
45383    }
45384
45385    #[simd_test(enable = "avx512f,avx512vl")]
45386    unsafe fn test_mm256_mask_fmsubadd_ps() {
45387        let a = _mm256_set1_ps(1.);
45388        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45389        let c = _mm256_set1_ps(1.);
45390        let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
45391        assert_eq_m256(r, a);
45392        let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
45393        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45394        assert_eq_m256(r, e);
45395    }
45396
45397    #[simd_test(enable = "avx512f,avx512vl")]
45398    unsafe fn test_mm256_maskz_fmsubadd_ps() {
45399        let a = _mm256_set1_ps(1.);
45400        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45401        let c = _mm256_set1_ps(1.);
45402        let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
45403        assert_eq_m256(r, _mm256_setzero_ps());
45404        let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
45405        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45406        assert_eq_m256(r, e);
45407    }
45408
45409    #[simd_test(enable = "avx512f,avx512vl")]
45410    unsafe fn test_mm256_mask3_fmsubadd_ps() {
45411        let a = _mm256_set1_ps(1.);
45412        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45413        let c = _mm256_set1_ps(1.);
45414        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
45415        assert_eq_m256(r, c);
45416        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
45417        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45418        assert_eq_m256(r, e);
45419    }
45420
45421    #[simd_test(enable = "avx512f,avx512vl")]
45422    unsafe fn test_mm_mask_fmsubadd_ps() {
45423        let a = _mm_set1_ps(1.);
45424        let b = _mm_set_ps(0., 1., 2., 3.);
45425        let c = _mm_set1_ps(1.);
45426        let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
45427        assert_eq_m128(r, a);
45428        let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
45429        let e = _mm_set_ps(-1., 2., 1., 4.);
45430        assert_eq_m128(r, e);
45431    }
45432
45433    #[simd_test(enable = "avx512f,avx512vl")]
45434    unsafe fn test_mm_maskz_fmsubadd_ps() {
45435        let a = _mm_set1_ps(1.);
45436        let b = _mm_set_ps(0., 1., 2., 3.);
45437        let c = _mm_set1_ps(1.);
45438        let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
45439        assert_eq_m128(r, _mm_setzero_ps());
45440        let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
45441        let e = _mm_set_ps(-1., 2., 1., 4.);
45442        assert_eq_m128(r, e);
45443    }
45444
45445    #[simd_test(enable = "avx512f,avx512vl")]
45446    unsafe fn test_mm_mask3_fmsubadd_ps() {
45447        let a = _mm_set1_ps(1.);
45448        let b = _mm_set_ps(0., 1., 2., 3.);
45449        let c = _mm_set1_ps(1.);
45450        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
45451        assert_eq_m128(r, c);
45452        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
45453        let e = _mm_set_ps(-1., 2., 1., 4.);
45454        assert_eq_m128(r, e);
45455    }
45456
45457    #[simd_test(enable = "avx512f")]
45458    unsafe fn test_mm512_fnmadd_ps() {
45459        let a = _mm512_set1_ps(1.);
45460        let b = _mm512_setr_ps(
45461            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45462        );
45463        let c = _mm512_set1_ps(1.);
45464        let r = _mm512_fnmadd_ps(a, b, c);
45465        let e = _mm512_setr_ps(
45466            1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
45467        );
45468        assert_eq_m512(r, e);
45469    }
45470
45471    #[simd_test(enable = "avx512f")]
45472    unsafe fn test_mm512_mask_fnmadd_ps() {
45473        let a = _mm512_set1_ps(1.);
45474        let b = _mm512_setr_ps(
45475            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45476        );
45477        let c = _mm512_set1_ps(1.);
45478        let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
45479        assert_eq_m512(r, a);
45480        let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
45481        let e = _mm512_setr_ps(
45482            1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
45483        );
45484        assert_eq_m512(r, e);
45485    }
45486
45487    #[simd_test(enable = "avx512f")]
45488    unsafe fn test_mm512_maskz_fnmadd_ps() {
45489        let a = _mm512_set1_ps(1.);
45490        let b = _mm512_setr_ps(
45491            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45492        );
45493        let c = _mm512_set1_ps(1.);
45494        let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
45495        assert_eq_m512(r, _mm512_setzero_ps());
45496        let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
45497        let e = _mm512_setr_ps(
45498            1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
45499        );
45500        assert_eq_m512(r, e);
45501    }
45502
45503    #[simd_test(enable = "avx512f")]
45504    unsafe fn test_mm512_mask3_fnmadd_ps() {
45505        let a = _mm512_set1_ps(1.);
45506        let b = _mm512_setr_ps(
45507            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45508        );
45509        let c = _mm512_setr_ps(
45510            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45511        );
45512        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
45513        assert_eq_m512(r, c);
45514        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
45515        let e = _mm512_setr_ps(
45516            1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
45517        );
45518        assert_eq_m512(r, e);
45519    }
45520
45521    #[simd_test(enable = "avx512f,avx512vl")]
45522    unsafe fn test_mm256_mask_fnmadd_ps() {
45523        let a = _mm256_set1_ps(1.);
45524        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45525        let c = _mm256_set1_ps(1.);
45526        let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
45527        assert_eq_m256(r, a);
45528        let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
45529        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45530        assert_eq_m256(r, e);
45531    }
45532
45533    #[simd_test(enable = "avx512f,avx512vl")]
45534    unsafe fn test_mm256_maskz_fnmadd_ps() {
45535        let a = _mm256_set1_ps(1.);
45536        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45537        let c = _mm256_set1_ps(1.);
45538        let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
45539        assert_eq_m256(r, _mm256_setzero_ps());
45540        let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
45541        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45542        assert_eq_m256(r, e);
45543    }
45544
45545    #[simd_test(enable = "avx512f,avx512vl")]
45546    unsafe fn test_mm256_mask3_fnmadd_ps() {
45547        let a = _mm256_set1_ps(1.);
45548        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45549        let c = _mm256_set1_ps(1.);
45550        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
45551        assert_eq_m256(r, c);
45552        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
45553        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45554        assert_eq_m256(r, e);
45555    }
45556
45557    #[simd_test(enable = "avx512f,avx512vl")]
45558    unsafe fn test_mm_mask_fnmadd_ps() {
45559        let a = _mm_set1_ps(1.);
45560        let b = _mm_set_ps(0., 1., 2., 3.);
45561        let c = _mm_set1_ps(1.);
45562        let r = _mm_mask_fnmadd_ps(a, 0, b, c);
45563        assert_eq_m128(r, a);
45564        let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
45565        let e = _mm_set_ps(1., 0., -1., -2.);
45566        assert_eq_m128(r, e);
45567    }
45568
45569    #[simd_test(enable = "avx512f,avx512vl")]
45570    unsafe fn test_mm_maskz_fnmadd_ps() {
45571        let a = _mm_set1_ps(1.);
45572        let b = _mm_set_ps(0., 1., 2., 3.);
45573        let c = _mm_set1_ps(1.);
45574        let r = _mm_maskz_fnmadd_ps(0, a, b, c);
45575        assert_eq_m128(r, _mm_setzero_ps());
45576        let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
45577        let e = _mm_set_ps(1., 0., -1., -2.);
45578        assert_eq_m128(r, e);
45579    }
45580
45581    #[simd_test(enable = "avx512f,avx512vl")]
45582    unsafe fn test_mm_mask3_fnmadd_ps() {
45583        let a = _mm_set1_ps(1.);
45584        let b = _mm_set_ps(0., 1., 2., 3.);
45585        let c = _mm_set1_ps(1.);
45586        let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
45587        assert_eq_m128(r, c);
45588        let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
45589        let e = _mm_set_ps(1., 0., -1., -2.);
45590        assert_eq_m128(r, e);
45591    }
45592
45593    #[simd_test(enable = "avx512f")]
45594    unsafe fn test_mm512_fnmsub_ps() {
45595        let a = _mm512_set1_ps(1.);
45596        let b = _mm512_setr_ps(
45597            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45598        );
45599        let c = _mm512_set1_ps(1.);
45600        let r = _mm512_fnmsub_ps(a, b, c);
45601        let e = _mm512_setr_ps(
45602            -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
45603        );
45604        assert_eq_m512(r, e);
45605    }
45606
45607    #[simd_test(enable = "avx512f")]
45608    unsafe fn test_mm512_mask_fnmsub_ps() {
45609        let a = _mm512_set1_ps(1.);
45610        let b = _mm512_setr_ps(
45611            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45612        );
45613        let c = _mm512_set1_ps(1.);
45614        let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
45615        assert_eq_m512(r, a);
45616        let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
45617        let e = _mm512_setr_ps(
45618            -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
45619        );
45620        assert_eq_m512(r, e);
45621    }
45622
45623    #[simd_test(enable = "avx512f")]
45624    unsafe fn test_mm512_maskz_fnmsub_ps() {
45625        let a = _mm512_set1_ps(1.);
45626        let b = _mm512_setr_ps(
45627            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45628        );
45629        let c = _mm512_set1_ps(1.);
45630        let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
45631        assert_eq_m512(r, _mm512_setzero_ps());
45632        let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
45633        let e = _mm512_setr_ps(
45634            -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
45635        );
45636        assert_eq_m512(r, e);
45637    }
45638
45639    #[simd_test(enable = "avx512f")]
45640    unsafe fn test_mm512_mask3_fnmsub_ps() {
45641        let a = _mm512_set1_ps(1.);
45642        let b = _mm512_setr_ps(
45643            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45644        );
45645        let c = _mm512_setr_ps(
45646            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45647        );
45648        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
45649        assert_eq_m512(r, c);
45650        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
45651        let e = _mm512_setr_ps(
45652            -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
45653        );
45654        assert_eq_m512(r, e);
45655    }
45656
45657    #[simd_test(enable = "avx512f,avx512vl")]
45658    unsafe fn test_mm256_mask_fnmsub_ps() {
45659        let a = _mm256_set1_ps(1.);
45660        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45661        let c = _mm256_set1_ps(1.);
45662        let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
45663        assert_eq_m256(r, a);
45664        let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
45665        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45666        assert_eq_m256(r, e);
45667    }
45668
45669    #[simd_test(enable = "avx512f,avx512vl")]
45670    unsafe fn test_mm256_maskz_fnmsub_ps() {
45671        let a = _mm256_set1_ps(1.);
45672        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45673        let c = _mm256_set1_ps(1.);
45674        let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
45675        assert_eq_m256(r, _mm256_setzero_ps());
45676        let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
45677        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45678        assert_eq_m256(r, e);
45679    }
45680
45681    #[simd_test(enable = "avx512f,avx512vl")]
45682    unsafe fn test_mm256_mask3_fnmsub_ps() {
45683        let a = _mm256_set1_ps(1.);
45684        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45685        let c = _mm256_set1_ps(1.);
45686        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
45687        assert_eq_m256(r, c);
45688        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
45689        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45690        assert_eq_m256(r, e);
45691    }
45692
45693    #[simd_test(enable = "avx512f,avx512vl")]
45694    unsafe fn test_mm_mask_fnmsub_ps() {
45695        let a = _mm_set1_ps(1.);
45696        let b = _mm_set_ps(0., 1., 2., 3.);
45697        let c = _mm_set1_ps(1.);
45698        let r = _mm_mask_fnmsub_ps(a, 0, b, c);
45699        assert_eq_m128(r, a);
45700        let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
45701        let e = _mm_set_ps(-1., -2., -3., -4.);
45702        assert_eq_m128(r, e);
45703    }
45704
45705    #[simd_test(enable = "avx512f,avx512vl")]
45706    unsafe fn test_mm_maskz_fnmsub_ps() {
45707        let a = _mm_set1_ps(1.);
45708        let b = _mm_set_ps(0., 1., 2., 3.);
45709        let c = _mm_set1_ps(1.);
45710        let r = _mm_maskz_fnmsub_ps(0, a, b, c);
45711        assert_eq_m128(r, _mm_setzero_ps());
45712        let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
45713        let e = _mm_set_ps(-1., -2., -3., -4.);
45714        assert_eq_m128(r, e);
45715    }
45716
45717    #[simd_test(enable = "avx512f,avx512vl")]
45718    unsafe fn test_mm_mask3_fnmsub_ps() {
45719        let a = _mm_set1_ps(1.);
45720        let b = _mm_set_ps(0., 1., 2., 3.);
45721        let c = _mm_set1_ps(1.);
45722        let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
45723        assert_eq_m128(r, c);
45724        let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
45725        let e = _mm_set_ps(-1., -2., -3., -4.);
45726        assert_eq_m128(r, e);
45727    }
45728
45729    #[simd_test(enable = "avx512f")]
45730    unsafe fn test_mm512_rcp14_ps() {
45731        let a = _mm512_set1_ps(3.);
45732        let r = _mm512_rcp14_ps(a);
45733        let e = _mm512_set1_ps(0.33333206);
45734        assert_eq_m512(r, e);
45735    }
45736
45737    #[simd_test(enable = "avx512f")]
45738    unsafe fn test_mm512_mask_rcp14_ps() {
45739        let a = _mm512_set1_ps(3.);
45740        let r = _mm512_mask_rcp14_ps(a, 0, a);
45741        assert_eq_m512(r, a);
45742        let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
45743        let e = _mm512_setr_ps(
45744            3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45745            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45746        );
45747        assert_eq_m512(r, e);
45748    }
45749
45750    #[simd_test(enable = "avx512f")]
45751    unsafe fn test_mm512_maskz_rcp14_ps() {
45752        let a = _mm512_set1_ps(3.);
45753        let r = _mm512_maskz_rcp14_ps(0, a);
45754        assert_eq_m512(r, _mm512_setzero_ps());
45755        let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
45756        let e = _mm512_setr_ps(
45757            0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45758            0.33333206, 0.33333206, 0.33333206, 0.33333206,
45759        );
45760        assert_eq_m512(r, e);
45761    }
45762
45763    #[simd_test(enable = "avx512f,avx512vl")]
45764    unsafe fn test_mm256_rcp14_ps() {
45765        let a = _mm256_set1_ps(3.);
45766        let r = _mm256_rcp14_ps(a);
45767        let e = _mm256_set1_ps(0.33333206);
45768        assert_eq_m256(r, e);
45769    }
45770
45771    #[simd_test(enable = "avx512f,avx512vl")]
45772    unsafe fn test_mm256_mask_rcp14_ps() {
45773        let a = _mm256_set1_ps(3.);
45774        let r = _mm256_mask_rcp14_ps(a, 0, a);
45775        assert_eq_m256(r, a);
45776        let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
45777        let e = _mm256_set1_ps(0.33333206);
45778        assert_eq_m256(r, e);
45779    }
45780
45781    #[simd_test(enable = "avx512f,avx512vl")]
45782    unsafe fn test_mm256_maskz_rcp14_ps() {
45783        let a = _mm256_set1_ps(3.);
45784        let r = _mm256_maskz_rcp14_ps(0, a);
45785        assert_eq_m256(r, _mm256_setzero_ps());
45786        let r = _mm256_maskz_rcp14_ps(0b11111111, a);
45787        let e = _mm256_set1_ps(0.33333206);
45788        assert_eq_m256(r, e);
45789    }
45790
45791    #[simd_test(enable = "avx512f,avx512vl")]
45792    unsafe fn test_mm_rcp14_ps() {
45793        let a = _mm_set1_ps(3.);
45794        let r = _mm_rcp14_ps(a);
45795        let e = _mm_set1_ps(0.33333206);
45796        assert_eq_m128(r, e);
45797    }
45798
45799    #[simd_test(enable = "avx512f,avx512vl")]
45800    unsafe fn test_mm_mask_rcp14_ps() {
45801        let a = _mm_set1_ps(3.);
45802        let r = _mm_mask_rcp14_ps(a, 0, a);
45803        assert_eq_m128(r, a);
45804        let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
45805        let e = _mm_set1_ps(0.33333206);
45806        assert_eq_m128(r, e);
45807    }
45808
45809    #[simd_test(enable = "avx512f,avx512vl")]
45810    unsafe fn test_mm_maskz_rcp14_ps() {
45811        let a = _mm_set1_ps(3.);
45812        let r = _mm_maskz_rcp14_ps(0, a);
45813        assert_eq_m128(r, _mm_setzero_ps());
45814        let r = _mm_maskz_rcp14_ps(0b00001111, a);
45815        let e = _mm_set1_ps(0.33333206);
45816        assert_eq_m128(r, e);
45817    }
45818
45819    #[simd_test(enable = "avx512f")]
45820    unsafe fn test_mm512_rsqrt14_ps() {
45821        let a = _mm512_set1_ps(3.);
45822        let r = _mm512_rsqrt14_ps(a);
45823        let e = _mm512_set1_ps(0.5773392);
45824        assert_eq_m512(r, e);
45825    }
45826
45827    #[simd_test(enable = "avx512f")]
45828    unsafe fn test_mm512_mask_rsqrt14_ps() {
45829        let a = _mm512_set1_ps(3.);
45830        let r = _mm512_mask_rsqrt14_ps(a, 0, a);
45831        assert_eq_m512(r, a);
45832        let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
45833        let e = _mm512_setr_ps(
45834            3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45835            0.5773392, 0.5773392, 0.5773392,
45836        );
45837        assert_eq_m512(r, e);
45838    }
45839
45840    #[simd_test(enable = "avx512f")]
45841    unsafe fn test_mm512_maskz_rsqrt14_ps() {
45842        let a = _mm512_set1_ps(3.);
45843        let r = _mm512_maskz_rsqrt14_ps(0, a);
45844        assert_eq_m512(r, _mm512_setzero_ps());
45845        let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
45846        let e = _mm512_setr_ps(
45847            0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45848            0.5773392, 0.5773392, 0.5773392,
45849        );
45850        assert_eq_m512(r, e);
45851    }
45852
45853    #[simd_test(enable = "avx512f,avx512vl")]
45854    unsafe fn test_mm256_rsqrt14_ps() {
45855        let a = _mm256_set1_ps(3.);
45856        let r = _mm256_rsqrt14_ps(a);
45857        let e = _mm256_set1_ps(0.5773392);
45858        assert_eq_m256(r, e);
45859    }
45860
45861    #[simd_test(enable = "avx512f,avx512vl")]
45862    unsafe fn test_mm256_mask_rsqrt14_ps() {
45863        let a = _mm256_set1_ps(3.);
45864        let r = _mm256_mask_rsqrt14_ps(a, 0, a);
45865        assert_eq_m256(r, a);
45866        let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
45867        let e = _mm256_set1_ps(0.5773392);
45868        assert_eq_m256(r, e);
45869    }
45870
45871    #[simd_test(enable = "avx512f,avx512vl")]
45872    unsafe fn test_mm256_maskz_rsqrt14_ps() {
45873        let a = _mm256_set1_ps(3.);
45874        let r = _mm256_maskz_rsqrt14_ps(0, a);
45875        assert_eq_m256(r, _mm256_setzero_ps());
45876        let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
45877        let e = _mm256_set1_ps(0.5773392);
45878        assert_eq_m256(r, e);
45879    }
45880
45881    #[simd_test(enable = "avx512f,avx512vl")]
45882    unsafe fn test_mm_rsqrt14_ps() {
45883        let a = _mm_set1_ps(3.);
45884        let r = _mm_rsqrt14_ps(a);
45885        let e = _mm_set1_ps(0.5773392);
45886        assert_eq_m128(r, e);
45887    }
45888
45889    #[simd_test(enable = "avx512f,avx512vl")]
45890    unsafe fn test_mm_mask_rsqrt14_ps() {
45891        let a = _mm_set1_ps(3.);
45892        let r = _mm_mask_rsqrt14_ps(a, 0, a);
45893        assert_eq_m128(r, a);
45894        let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
45895        let e = _mm_set1_ps(0.5773392);
45896        assert_eq_m128(r, e);
45897    }
45898
45899    #[simd_test(enable = "avx512f,avx512vl")]
45900    unsafe fn test_mm_maskz_rsqrt14_ps() {
45901        let a = _mm_set1_ps(3.);
45902        let r = _mm_maskz_rsqrt14_ps(0, a);
45903        assert_eq_m128(r, _mm_setzero_ps());
45904        let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
45905        let e = _mm_set1_ps(0.5773392);
45906        assert_eq_m128(r, e);
45907    }
45908
45909    #[simd_test(enable = "avx512f")]
45910    unsafe fn test_mm512_getexp_ps() {
45911        let a = _mm512_set1_ps(3.);
45912        let r = _mm512_getexp_ps(a);
45913        let e = _mm512_set1_ps(1.);
45914        assert_eq_m512(r, e);
45915    }
45916
45917    #[simd_test(enable = "avx512f")]
45918    unsafe fn test_mm512_mask_getexp_ps() {
45919        let a = _mm512_set1_ps(3.);
45920        let r = _mm512_mask_getexp_ps(a, 0, a);
45921        assert_eq_m512(r, a);
45922        let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
45923        let e = _mm512_setr_ps(
45924            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
45925        );
45926        assert_eq_m512(r, e);
45927    }
45928
45929    #[simd_test(enable = "avx512f")]
45930    unsafe fn test_mm512_maskz_getexp_ps() {
45931        let a = _mm512_set1_ps(3.);
45932        let r = _mm512_maskz_getexp_ps(0, a);
45933        assert_eq_m512(r, _mm512_setzero_ps());
45934        let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
45935        let e = _mm512_setr_ps(
45936            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45937        );
45938        assert_eq_m512(r, e);
45939    }
45940
45941    #[simd_test(enable = "avx512f,avx512vl")]
45942    unsafe fn test_mm256_getexp_ps() {
45943        let a = _mm256_set1_ps(3.);
45944        let r = _mm256_getexp_ps(a);
45945        let e = _mm256_set1_ps(1.);
45946        assert_eq_m256(r, e);
45947    }
45948
45949    #[simd_test(enable = "avx512f,avx512vl")]
45950    unsafe fn test_mm256_mask_getexp_ps() {
45951        let a = _mm256_set1_ps(3.);
45952        let r = _mm256_mask_getexp_ps(a, 0, a);
45953        assert_eq_m256(r, a);
45954        let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
45955        let e = _mm256_set1_ps(1.);
45956        assert_eq_m256(r, e);
45957    }
45958
45959    #[simd_test(enable = "avx512f,avx512vl")]
45960    unsafe fn test_mm256_maskz_getexp_ps() {
45961        let a = _mm256_set1_ps(3.);
45962        let r = _mm256_maskz_getexp_ps(0, a);
45963        assert_eq_m256(r, _mm256_setzero_ps());
45964        let r = _mm256_maskz_getexp_ps(0b11111111, a);
45965        let e = _mm256_set1_ps(1.);
45966        assert_eq_m256(r, e);
45967    }
45968
45969    #[simd_test(enable = "avx512f,avx512vl")]
45970    unsafe fn test_mm_getexp_ps() {
45971        let a = _mm_set1_ps(3.);
45972        let r = _mm_getexp_ps(a);
45973        let e = _mm_set1_ps(1.);
45974        assert_eq_m128(r, e);
45975    }
45976
45977    #[simd_test(enable = "avx512f,avx512vl")]
45978    unsafe fn test_mm_mask_getexp_ps() {
45979        let a = _mm_set1_ps(3.);
45980        let r = _mm_mask_getexp_ps(a, 0, a);
45981        assert_eq_m128(r, a);
45982        let r = _mm_mask_getexp_ps(a, 0b00001111, a);
45983        let e = _mm_set1_ps(1.);
45984        assert_eq_m128(r, e);
45985    }
45986
45987    #[simd_test(enable = "avx512f,avx512vl")]
45988    unsafe fn test_mm_maskz_getexp_ps() {
45989        let a = _mm_set1_ps(3.);
45990        let r = _mm_maskz_getexp_ps(0, a);
45991        assert_eq_m128(r, _mm_setzero_ps());
45992        let r = _mm_maskz_getexp_ps(0b00001111, a);
45993        let e = _mm_set1_ps(1.);
45994        assert_eq_m128(r, e);
45995    }
45996
45997    #[simd_test(enable = "avx512f")]
45998    unsafe fn test_mm512_roundscale_ps() {
45999        let a = _mm512_set1_ps(1.1);
46000        let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
46001        let e = _mm512_set1_ps(1.0);
46002        assert_eq_m512(r, e);
46003    }
46004
46005    #[simd_test(enable = "avx512f")]
46006    unsafe fn test_mm512_mask_roundscale_ps() {
46007        let a = _mm512_set1_ps(1.1);
46008        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46009        let e = _mm512_set1_ps(1.1);
46010        assert_eq_m512(r, e);
46011        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
46012        let e = _mm512_set1_ps(1.0);
46013        assert_eq_m512(r, e);
46014    }
46015
46016    #[simd_test(enable = "avx512f")]
46017    unsafe fn test_mm512_maskz_roundscale_ps() {
46018        let a = _mm512_set1_ps(1.1);
46019        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46020        assert_eq_m512(r, _mm512_setzero_ps());
46021        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
46022        let e = _mm512_set1_ps(1.0);
46023        assert_eq_m512(r, e);
46024    }
46025
46026    #[simd_test(enable = "avx512f,avx512vl")]
46027    unsafe fn test_mm256_roundscale_ps() {
46028        let a = _mm256_set1_ps(1.1);
46029        let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
46030        let e = _mm256_set1_ps(1.0);
46031        assert_eq_m256(r, e);
46032    }
46033
46034    #[simd_test(enable = "avx512f,avx512vl")]
46035    unsafe fn test_mm256_mask_roundscale_ps() {
46036        let a = _mm256_set1_ps(1.1);
46037        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46038        let e = _mm256_set1_ps(1.1);
46039        assert_eq_m256(r, e);
46040        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
46041        let e = _mm256_set1_ps(1.0);
46042        assert_eq_m256(r, e);
46043    }
46044
46045    #[simd_test(enable = "avx512f,avx512vl")]
46046    unsafe fn test_mm256_maskz_roundscale_ps() {
46047        let a = _mm256_set1_ps(1.1);
46048        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46049        assert_eq_m256(r, _mm256_setzero_ps());
46050        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
46051        let e = _mm256_set1_ps(1.0);
46052        assert_eq_m256(r, e);
46053    }
46054
46055    #[simd_test(enable = "avx512f,avx512vl")]
46056    unsafe fn test_mm_roundscale_ps() {
46057        let a = _mm_set1_ps(1.1);
46058        let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
46059        let e = _mm_set1_ps(1.0);
46060        assert_eq_m128(r, e);
46061    }
46062
46063    #[simd_test(enable = "avx512f,avx512vl")]
46064    unsafe fn test_mm_mask_roundscale_ps() {
46065        let a = _mm_set1_ps(1.1);
46066        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46067        let e = _mm_set1_ps(1.1);
46068        assert_eq_m128(r, e);
46069        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
46070        let e = _mm_set1_ps(1.0);
46071        assert_eq_m128(r, e);
46072    }
46073
46074    #[simd_test(enable = "avx512f,avx512vl")]
46075    unsafe fn test_mm_maskz_roundscale_ps() {
46076        let a = _mm_set1_ps(1.1);
46077        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46078        assert_eq_m128(r, _mm_setzero_ps());
46079        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
46080        let e = _mm_set1_ps(1.0);
46081        assert_eq_m128(r, e);
46082    }
46083
46084    #[simd_test(enable = "avx512f")]
46085    unsafe fn test_mm512_scalef_ps() {
46086        let a = _mm512_set1_ps(1.);
46087        let b = _mm512_set1_ps(3.);
46088        let r = _mm512_scalef_ps(a, b);
46089        let e = _mm512_set1_ps(8.);
46090        assert_eq_m512(r, e);
46091    }
46092
46093    #[simd_test(enable = "avx512f")]
46094    unsafe fn test_mm512_mask_scalef_ps() {
46095        let a = _mm512_set1_ps(1.);
46096        let b = _mm512_set1_ps(3.);
46097        let r = _mm512_mask_scalef_ps(a, 0, a, b);
46098        assert_eq_m512(r, a);
46099        let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
46100        let e = _mm512_set_ps(
46101            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46102        );
46103        assert_eq_m512(r, e);
46104    }
46105
46106    #[simd_test(enable = "avx512f")]
46107    unsafe fn test_mm512_maskz_scalef_ps() {
46108        let a = _mm512_set1_ps(1.);
46109        let b = _mm512_set1_ps(3.);
46110        let r = _mm512_maskz_scalef_ps(0, a, b);
46111        assert_eq_m512(r, _mm512_setzero_ps());
46112        let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
46113        let e = _mm512_set_ps(
46114            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46115        );
46116        assert_eq_m512(r, e);
46117    }
46118
46119    #[simd_test(enable = "avx512f,avx512vl")]
46120    unsafe fn test_mm256_scalef_ps() {
46121        let a = _mm256_set1_ps(1.);
46122        let b = _mm256_set1_ps(3.);
46123        let r = _mm256_scalef_ps(a, b);
46124        let e = _mm256_set1_ps(8.);
46125        assert_eq_m256(r, e);
46126    }
46127
46128    #[simd_test(enable = "avx512f,avx512vl")]
46129    unsafe fn test_mm256_mask_scalef_ps() {
46130        let a = _mm256_set1_ps(1.);
46131        let b = _mm256_set1_ps(3.);
46132        let r = _mm256_mask_scalef_ps(a, 0, a, b);
46133        assert_eq_m256(r, a);
46134        let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
46135        let e = _mm256_set1_ps(8.);
46136        assert_eq_m256(r, e);
46137    }
46138
46139    #[simd_test(enable = "avx512f,avx512vl")]
46140    unsafe fn test_mm256_maskz_scalef_ps() {
46141        let a = _mm256_set1_ps(1.);
46142        let b = _mm256_set1_ps(3.);
46143        let r = _mm256_maskz_scalef_ps(0, a, b);
46144        assert_eq_m256(r, _mm256_setzero_ps());
46145        let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
46146        let e = _mm256_set1_ps(8.);
46147        assert_eq_m256(r, e);
46148    }
46149
46150    #[simd_test(enable = "avx512f,avx512vl")]
46151    unsafe fn test_mm_scalef_ps() {
46152        let a = _mm_set1_ps(1.);
46153        let b = _mm_set1_ps(3.);
46154        let r = _mm_scalef_ps(a, b);
46155        let e = _mm_set1_ps(8.);
46156        assert_eq_m128(r, e);
46157    }
46158
46159    #[simd_test(enable = "avx512f,avx512vl")]
46160    unsafe fn test_mm_mask_scalef_ps() {
46161        let a = _mm_set1_ps(1.);
46162        let b = _mm_set1_ps(3.);
46163        let r = _mm_mask_scalef_ps(a, 0, a, b);
46164        assert_eq_m128(r, a);
46165        let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
46166        let e = _mm_set1_ps(8.);
46167        assert_eq_m128(r, e);
46168    }
46169
46170    #[simd_test(enable = "avx512f,avx512vl")]
46171    unsafe fn test_mm_maskz_scalef_ps() {
46172        let a = _mm_set1_ps(1.);
46173        let b = _mm_set1_ps(3.);
46174        let r = _mm_maskz_scalef_ps(0, a, b);
46175        assert_eq_m128(r, _mm_setzero_ps());
46176        let r = _mm_maskz_scalef_ps(0b00001111, a, b);
46177        let e = _mm_set1_ps(8.);
46178        assert_eq_m128(r, e);
46179    }
46180
46181    #[simd_test(enable = "avx512f")]
46182    unsafe fn test_mm512_fixupimm_ps() {
46183        let a = _mm512_set1_ps(f32::NAN);
46184        let b = _mm512_set1_ps(f32::MAX);
46185        let c = _mm512_set1_epi32(i32::MAX);
46186        //let r = _mm512_fixupimm_ps(a, b, c, 5);
46187        let r = _mm512_fixupimm_ps::<5>(a, b, c);
46188        let e = _mm512_set1_ps(0.0);
46189        assert_eq_m512(r, e);
46190    }
46191
46192    #[simd_test(enable = "avx512f")]
46193    unsafe fn test_mm512_mask_fixupimm_ps() {
46194        #[rustfmt::skip]
46195        let a = _mm512_set_ps(
46196            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46197            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46198            1., 1., 1., 1.,
46199            1., 1., 1., 1.,
46200        );
46201        let b = _mm512_set1_ps(f32::MAX);
46202        let c = _mm512_set1_epi32(i32::MAX);
46203        let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
46204        let e = _mm512_set_ps(
46205            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46206        );
46207        assert_eq_m512(r, e);
46208    }
46209
46210    #[simd_test(enable = "avx512f")]
46211    unsafe fn test_mm512_maskz_fixupimm_ps() {
46212        #[rustfmt::skip]
46213        let a = _mm512_set_ps(
46214            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46215            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46216            1., 1., 1., 1.,
46217            1., 1., 1., 1.,
46218        );
46219        let b = _mm512_set1_ps(f32::MAX);
46220        let c = _mm512_set1_epi32(i32::MAX);
46221        let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
46222        let e = _mm512_set_ps(
46223            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
46224        );
46225        assert_eq_m512(r, e);
46226    }
46227
46228    #[simd_test(enable = "avx512f,avx512vl")]
46229    unsafe fn test_mm256_fixupimm_ps() {
46230        let a = _mm256_set1_ps(f32::NAN);
46231        let b = _mm256_set1_ps(f32::MAX);
46232        let c = _mm256_set1_epi32(i32::MAX);
46233        let r = _mm256_fixupimm_ps::<5>(a, b, c);
46234        let e = _mm256_set1_ps(0.0);
46235        assert_eq_m256(r, e);
46236    }
46237
46238    #[simd_test(enable = "avx512f,avx512vl")]
46239    unsafe fn test_mm256_mask_fixupimm_ps() {
46240        let a = _mm256_set1_ps(f32::NAN);
46241        let b = _mm256_set1_ps(f32::MAX);
46242        let c = _mm256_set1_epi32(i32::MAX);
46243        let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
46244        let e = _mm256_set1_ps(0.0);
46245        assert_eq_m256(r, e);
46246    }
46247
46248    #[simd_test(enable = "avx512f,avx512vl")]
46249    unsafe fn test_mm256_maskz_fixupimm_ps() {
46250        let a = _mm256_set1_ps(f32::NAN);
46251        let b = _mm256_set1_ps(f32::MAX);
46252        let c = _mm256_set1_epi32(i32::MAX);
46253        let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
46254        let e = _mm256_set1_ps(0.0);
46255        assert_eq_m256(r, e);
46256    }
46257
46258    #[simd_test(enable = "avx512f,avx512vl")]
46259    unsafe fn test_mm_fixupimm_ps() {
46260        let a = _mm_set1_ps(f32::NAN);
46261        let b = _mm_set1_ps(f32::MAX);
46262        let c = _mm_set1_epi32(i32::MAX);
46263        let r = _mm_fixupimm_ps::<5>(a, b, c);
46264        let e = _mm_set1_ps(0.0);
46265        assert_eq_m128(r, e);
46266    }
46267
46268    #[simd_test(enable = "avx512f,avx512vl")]
46269    unsafe fn test_mm_mask_fixupimm_ps() {
46270        let a = _mm_set1_ps(f32::NAN);
46271        let b = _mm_set1_ps(f32::MAX);
46272        let c = _mm_set1_epi32(i32::MAX);
46273        let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
46274        let e = _mm_set1_ps(0.0);
46275        assert_eq_m128(r, e);
46276    }
46277
46278    #[simd_test(enable = "avx512f,avx512vl")]
46279    unsafe fn test_mm_maskz_fixupimm_ps() {
46280        let a = _mm_set1_ps(f32::NAN);
46281        let b = _mm_set1_ps(f32::MAX);
46282        let c = _mm_set1_epi32(i32::MAX);
46283        let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
46284        let e = _mm_set1_ps(0.0);
46285        assert_eq_m128(r, e);
46286    }
46287
46288    #[simd_test(enable = "avx512f")]
46289    unsafe fn test_mm512_ternarylogic_epi32() {
46290        let a = _mm512_set1_epi32(1 << 2);
46291        let b = _mm512_set1_epi32(1 << 1);
46292        let c = _mm512_set1_epi32(1 << 0);
46293        let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
46294        let e = _mm512_set1_epi32(0);
46295        assert_eq_m512i(r, e);
46296    }
46297
46298    #[simd_test(enable = "avx512f")]
46299    unsafe fn test_mm512_mask_ternarylogic_epi32() {
46300        let src = _mm512_set1_epi32(1 << 2);
46301        let a = _mm512_set1_epi32(1 << 1);
46302        let b = _mm512_set1_epi32(1 << 0);
46303        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46304        assert_eq_m512i(r, src);
46305        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
46306        let e = _mm512_set1_epi32(0);
46307        assert_eq_m512i(r, e);
46308    }
46309
46310    #[simd_test(enable = "avx512f")]
46311    unsafe fn test_mm512_maskz_ternarylogic_epi32() {
46312        let a = _mm512_set1_epi32(1 << 2);
46313        let b = _mm512_set1_epi32(1 << 1);
46314        let c = _mm512_set1_epi32(1 << 0);
46315        let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46316        assert_eq_m512i(r, _mm512_setzero_si512());
46317        let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
46318        let e = _mm512_set1_epi32(0);
46319        assert_eq_m512i(r, e);
46320    }
46321
46322    #[simd_test(enable = "avx512f,avx512vl")]
46323    unsafe fn test_mm256_ternarylogic_epi32() {
46324        let a = _mm256_set1_epi32(1 << 2);
46325        let b = _mm256_set1_epi32(1 << 1);
46326        let c = _mm256_set1_epi32(1 << 0);
46327        let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
46328        let e = _mm256_set1_epi32(0);
46329        assert_eq_m256i(r, e);
46330    }
46331
46332    #[simd_test(enable = "avx512f,avx512vl")]
46333    unsafe fn test_mm256_mask_ternarylogic_epi32() {
46334        let src = _mm256_set1_epi32(1 << 2);
46335        let a = _mm256_set1_epi32(1 << 1);
46336        let b = _mm256_set1_epi32(1 << 0);
46337        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46338        assert_eq_m256i(r, src);
46339        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
46340        let e = _mm256_set1_epi32(0);
46341        assert_eq_m256i(r, e);
46342    }
46343
46344    #[simd_test(enable = "avx512f,avx512vl")]
46345    unsafe fn test_mm256_maskz_ternarylogic_epi32() {
46346        let a = _mm256_set1_epi32(1 << 2);
46347        let b = _mm256_set1_epi32(1 << 1);
46348        let c = _mm256_set1_epi32(1 << 0);
46349        let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46350        assert_eq_m256i(r, _mm256_setzero_si256());
46351        let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
46352        let e = _mm256_set1_epi32(0);
46353        assert_eq_m256i(r, e);
46354    }
46355
46356    #[simd_test(enable = "avx512f,avx512vl")]
46357    unsafe fn test_mm_ternarylogic_epi32() {
46358        let a = _mm_set1_epi32(1 << 2);
46359        let b = _mm_set1_epi32(1 << 1);
46360        let c = _mm_set1_epi32(1 << 0);
46361        let r = _mm_ternarylogic_epi32::<8>(a, b, c);
46362        let e = _mm_set1_epi32(0);
46363        assert_eq_m128i(r, e);
46364    }
46365
46366    #[simd_test(enable = "avx512f,avx512vl")]
46367    unsafe fn test_mm_mask_ternarylogic_epi32() {
46368        let src = _mm_set1_epi32(1 << 2);
46369        let a = _mm_set1_epi32(1 << 1);
46370        let b = _mm_set1_epi32(1 << 0);
46371        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46372        assert_eq_m128i(r, src);
46373        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
46374        let e = _mm_set1_epi32(0);
46375        assert_eq_m128i(r, e);
46376    }
46377
46378    #[simd_test(enable = "avx512f,avx512vl")]
46379    unsafe fn test_mm_maskz_ternarylogic_epi32() {
46380        let a = _mm_set1_epi32(1 << 2);
46381        let b = _mm_set1_epi32(1 << 1);
46382        let c = _mm_set1_epi32(1 << 0);
46383        let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46384        assert_eq_m128i(r, _mm_setzero_si128());
46385        let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
46386        let e = _mm_set1_epi32(0);
46387        assert_eq_m128i(r, e);
46388    }
46389
46390    #[simd_test(enable = "avx512f")]
46391    unsafe fn test_mm512_getmant_ps() {
46392        let a = _mm512_set1_ps(10.);
46393        let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46394        let e = _mm512_set1_ps(1.25);
46395        assert_eq_m512(r, e);
46396    }
46397
46398    #[simd_test(enable = "avx512f")]
46399    unsafe fn test_mm512_mask_getmant_ps() {
46400        let a = _mm512_set1_ps(10.);
46401        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46402        assert_eq_m512(r, a);
46403        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
46404            a,
46405            0b11111111_00000000,
46406            a,
46407        );
46408        let e = _mm512_setr_ps(
46409            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46410        );
46411        assert_eq_m512(r, e);
46412    }
46413
46414    #[simd_test(enable = "avx512f")]
46415    unsafe fn test_mm512_maskz_getmant_ps() {
46416        let a = _mm512_set1_ps(10.);
46417        let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46418        assert_eq_m512(r, _mm512_setzero_ps());
46419        let r =
46420            _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
46421        let e = _mm512_setr_ps(
46422            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46423        );
46424        assert_eq_m512(r, e);
46425    }
46426
46427    #[simd_test(enable = "avx512f,avx512vl")]
46428    unsafe fn test_mm256_getmant_ps() {
46429        let a = _mm256_set1_ps(10.);
46430        let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46431        let e = _mm256_set1_ps(1.25);
46432        assert_eq_m256(r, e);
46433    }
46434
46435    #[simd_test(enable = "avx512f,avx512vl")]
46436    unsafe fn test_mm256_mask_getmant_ps() {
46437        let a = _mm256_set1_ps(10.);
46438        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46439        assert_eq_m256(r, a);
46440        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
46441        let e = _mm256_set1_ps(1.25);
46442        assert_eq_m256(r, e);
46443    }
46444
46445    #[simd_test(enable = "avx512f,avx512vl")]
46446    unsafe fn test_mm256_maskz_getmant_ps() {
46447        let a = _mm256_set1_ps(10.);
46448        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46449        assert_eq_m256(r, _mm256_setzero_ps());
46450        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
46451        let e = _mm256_set1_ps(1.25);
46452        assert_eq_m256(r, e);
46453    }
46454
46455    #[simd_test(enable = "avx512f,avx512vl")]
46456    unsafe fn test_mm_getmant_ps() {
46457        let a = _mm_set1_ps(10.);
46458        let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46459        let e = _mm_set1_ps(1.25);
46460        assert_eq_m128(r, e);
46461    }
46462
46463    #[simd_test(enable = "avx512f,avx512vl")]
46464    unsafe fn test_mm_mask_getmant_ps() {
46465        let a = _mm_set1_ps(10.);
46466        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46467        assert_eq_m128(r, a);
46468        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
46469        let e = _mm_set1_ps(1.25);
46470        assert_eq_m128(r, e);
46471    }
46472
46473    #[simd_test(enable = "avx512f,avx512vl")]
46474    unsafe fn test_mm_maskz_getmant_ps() {
46475        let a = _mm_set1_ps(10.);
46476        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46477        assert_eq_m128(r, _mm_setzero_ps());
46478        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
46479        let e = _mm_set1_ps(1.25);
46480        assert_eq_m128(r, e);
46481    }
46482
46483    #[simd_test(enable = "avx512f")]
46484    unsafe fn test_mm512_add_round_ps() {
46485        let a = _mm512_setr_ps(
46486            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46487        );
46488        let b = _mm512_set1_ps(-1.);
46489        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46490        #[rustfmt::skip]
46491        let e = _mm512_setr_ps(
46492            -1., 0.5, 1., 2.5,
46493            3., 4.5, 5., 6.5,
46494            7., 8.5, 9., 10.5,
46495            11., 12.5, 13., -0.99999994,
46496        );
46497        assert_eq_m512(r, e);
46498        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46499        let e = _mm512_setr_ps(
46500            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46501        );
46502        assert_eq_m512(r, e);
46503    }
46504
46505    #[simd_test(enable = "avx512f")]
46506    unsafe fn test_mm512_mask_add_round_ps() {
46507        let a = _mm512_setr_ps(
46508            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46509        );
46510        let b = _mm512_set1_ps(-1.);
46511        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
46512        assert_eq_m512(r, a);
46513        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46514            a,
46515            0b11111111_00000000,
46516            a,
46517            b,
46518        );
46519        #[rustfmt::skip]
46520        let e = _mm512_setr_ps(
46521            0., 1.5, 2., 3.5,
46522            4., 5.5, 6., 7.5,
46523            7., 8.5, 9., 10.5,
46524            11., 12.5, 13., -0.99999994,
46525        );
46526        assert_eq_m512(r, e);
46527    }
46528
46529    #[simd_test(enable = "avx512f")]
46530    unsafe fn test_mm512_maskz_add_round_ps() {
46531        let a = _mm512_setr_ps(
46532            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46533        );
46534        let b = _mm512_set1_ps(-1.);
46535        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
46536        assert_eq_m512(r, _mm512_setzero_ps());
46537        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46538            0b11111111_00000000,
46539            a,
46540            b,
46541        );
46542        #[rustfmt::skip]
46543        let e = _mm512_setr_ps(
46544            0., 0., 0., 0.,
46545            0., 0., 0., 0.,
46546            7., 8.5, 9., 10.5,
46547            11., 12.5, 13., -0.99999994,
46548        );
46549        assert_eq_m512(r, e);
46550    }
46551
46552    #[simd_test(enable = "avx512f")]
46553    unsafe fn test_mm512_sub_round_ps() {
46554        let a = _mm512_setr_ps(
46555            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46556        );
46557        let b = _mm512_set1_ps(1.);
46558        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46559        #[rustfmt::skip]
46560        let e = _mm512_setr_ps(
46561            -1., 0.5, 1., 2.5,
46562            3., 4.5, 5., 6.5,
46563            7., 8.5, 9., 10.5,
46564            11., 12.5, 13., -0.99999994,
46565        );
46566        assert_eq_m512(r, e);
46567        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46568        let e = _mm512_setr_ps(
46569            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46570        );
46571        assert_eq_m512(r, e);
46572    }
46573
46574    #[simd_test(enable = "avx512f")]
46575    unsafe fn test_mm512_mask_sub_round_ps() {
46576        let a = _mm512_setr_ps(
46577            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46578        );
46579        let b = _mm512_set1_ps(1.);
46580        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46581            a, 0, a, b,
46582        );
46583        assert_eq_m512(r, a);
46584        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46585            a,
46586            0b11111111_00000000,
46587            a,
46588            b,
46589        );
46590        #[rustfmt::skip]
46591        let e = _mm512_setr_ps(
46592            0., 1.5, 2., 3.5,
46593            4., 5.5, 6., 7.5,
46594            7., 8.5, 9., 10.5,
46595            11., 12.5, 13., -0.99999994,
46596        );
46597        assert_eq_m512(r, e);
46598    }
46599
46600    #[simd_test(enable = "avx512f")]
46601    unsafe fn test_mm512_maskz_sub_round_ps() {
46602        let a = _mm512_setr_ps(
46603            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46604        );
46605        let b = _mm512_set1_ps(1.);
46606        let r =
46607            _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46608        assert_eq_m512(r, _mm512_setzero_ps());
46609        let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46610            0b11111111_00000000,
46611            a,
46612            b,
46613        );
46614        #[rustfmt::skip]
46615        let e = _mm512_setr_ps(
46616            0., 0., 0., 0.,
46617            0., 0., 0., 0.,
46618            7., 8.5, 9., 10.5,
46619            11., 12.5, 13., -0.99999994,
46620        );
46621        assert_eq_m512(r, e);
46622    }
46623
46624    #[simd_test(enable = "avx512f")]
46625    unsafe fn test_mm512_mul_round_ps() {
46626        #[rustfmt::skip]
46627        let a = _mm512_setr_ps(
46628            0., 1.5, 2., 3.5,
46629            4., 5.5, 6., 7.5,
46630            8., 9.5, 10., 11.5,
46631            12., 13.5, 14., 0.00000000000000000000007,
46632        );
46633        let b = _mm512_set1_ps(0.1);
46634        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46635        #[rustfmt::skip]
46636        let e = _mm512_setr_ps(
46637            0., 0.15, 0.2, 0.35,
46638            0.4, 0.55, 0.6, 0.75,
46639            0.8, 0.95, 1.0, 1.15,
46640            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46641        );
46642        assert_eq_m512(r, e);
46643        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46644        #[rustfmt::skip]
46645        let e = _mm512_setr_ps(
46646            0., 0.14999999, 0.2, 0.35,
46647            0.4, 0.54999995, 0.59999996, 0.75,
46648            0.8, 0.95, 1.0, 1.15,
46649            1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
46650        );
46651        assert_eq_m512(r, e);
46652    }
46653
46654    #[simd_test(enable = "avx512f")]
46655    unsafe fn test_mm512_mask_mul_round_ps() {
46656        #[rustfmt::skip]
46657        let a = _mm512_setr_ps(
46658            0., 1.5, 2., 3.5,
46659            4., 5.5, 6., 7.5,
46660            8., 9.5, 10., 11.5,
46661            12., 13.5, 14., 0.00000000000000000000007,
46662        );
46663        let b = _mm512_set1_ps(0.1);
46664        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46665            a, 0, a, b,
46666        );
46667        assert_eq_m512(r, a);
46668        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46669            a,
46670            0b11111111_00000000,
46671            a,
46672            b,
46673        );
46674        #[rustfmt::skip]
46675        let e = _mm512_setr_ps(
46676            0., 1.5, 2., 3.5,
46677            4., 5.5, 6., 7.5,
46678            0.8, 0.95, 1.0, 1.15,
46679            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46680        );
46681        assert_eq_m512(r, e);
46682    }
46683
46684    #[simd_test(enable = "avx512f")]
46685    unsafe fn test_mm512_maskz_mul_round_ps() {
46686        #[rustfmt::skip]
46687        let a = _mm512_setr_ps(
46688            0., 1.5, 2., 3.5,
46689            4., 5.5, 6., 7.5,
46690            8., 9.5, 10., 11.5,
46691            12., 13.5, 14., 0.00000000000000000000007,
46692        );
46693        let b = _mm512_set1_ps(0.1);
46694        let r =
46695            _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46696        assert_eq_m512(r, _mm512_setzero_ps());
46697        let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46698            0b11111111_00000000,
46699            a,
46700            b,
46701        );
46702        #[rustfmt::skip]
46703        let e = _mm512_setr_ps(
46704            0., 0., 0., 0.,
46705            0., 0., 0., 0.,
46706            0.8, 0.95, 1.0, 1.15,
46707            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46708        );
46709        assert_eq_m512(r, e);
46710    }
46711
46712    #[simd_test(enable = "avx512f")]
46713    unsafe fn test_mm512_div_round_ps() {
46714        let a = _mm512_set1_ps(1.);
46715        let b = _mm512_set1_ps(3.);
46716        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46717        let e = _mm512_set1_ps(0.33333334);
46718        assert_eq_m512(r, e);
46719        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46720        let e = _mm512_set1_ps(0.3333333);
46721        assert_eq_m512(r, e);
46722    }
46723
46724    #[simd_test(enable = "avx512f")]
46725    unsafe fn test_mm512_mask_div_round_ps() {
46726        let a = _mm512_set1_ps(1.);
46727        let b = _mm512_set1_ps(3.);
46728        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46729            a, 0, a, b,
46730        );
46731        assert_eq_m512(r, a);
46732        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46733            a,
46734            0b11111111_00000000,
46735            a,
46736            b,
46737        );
46738        let e = _mm512_setr_ps(
46739            1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46740            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46741        );
46742        assert_eq_m512(r, e);
46743    }
46744
46745    #[simd_test(enable = "avx512f")]
46746    unsafe fn test_mm512_maskz_div_round_ps() {
46747        let a = _mm512_set1_ps(1.);
46748        let b = _mm512_set1_ps(3.);
46749        let r =
46750            _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46751        assert_eq_m512(r, _mm512_setzero_ps());
46752        let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46753            0b11111111_00000000,
46754            a,
46755            b,
46756        );
46757        let e = _mm512_setr_ps(
46758            0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46759            0.33333334, 0.33333334, 0.33333334, 0.33333334,
46760        );
46761        assert_eq_m512(r, e);
46762    }
46763
46764    #[simd_test(enable = "avx512f")]
46765    unsafe fn test_mm512_sqrt_round_ps() {
46766        let a = _mm512_set1_ps(3.);
46767        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46768        let e = _mm512_set1_ps(1.7320508);
46769        assert_eq_m512(r, e);
46770        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
46771        let e = _mm512_set1_ps(1.7320509);
46772        assert_eq_m512(r, e);
46773    }
46774
46775    #[simd_test(enable = "avx512f")]
46776    unsafe fn test_mm512_mask_sqrt_round_ps() {
46777        let a = _mm512_set1_ps(3.);
46778        let r =
46779            _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
46780        assert_eq_m512(r, a);
46781        let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46782            a,
46783            0b11111111_00000000,
46784            a,
46785        );
46786        let e = _mm512_setr_ps(
46787            3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46788            1.7320508, 1.7320508, 1.7320508,
46789        );
46790        assert_eq_m512(r, e);
46791    }
46792
46793    #[simd_test(enable = "avx512f")]
46794    unsafe fn test_mm512_maskz_sqrt_round_ps() {
46795        let a = _mm512_set1_ps(3.);
46796        let r =
46797            _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
46798        assert_eq_m512(r, _mm512_setzero_ps());
46799        let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46800            0b11111111_00000000,
46801            a,
46802        );
46803        let e = _mm512_setr_ps(
46804            0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46805            1.7320508, 1.7320508, 1.7320508,
46806        );
46807        assert_eq_m512(r, e);
46808    }
46809
46810    #[simd_test(enable = "avx512f")]
46811    unsafe fn test_mm512_fmadd_round_ps() {
46812        let a = _mm512_set1_ps(0.00000007);
46813        let b = _mm512_set1_ps(1.);
46814        let c = _mm512_set1_ps(-1.);
46815        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46816        let e = _mm512_set1_ps(-0.99999994);
46817        assert_eq_m512(r, e);
46818        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46819        let e = _mm512_set1_ps(-0.9999999);
46820        assert_eq_m512(r, e);
46821    }
46822
46823    #[simd_test(enable = "avx512f")]
46824    unsafe fn test_mm512_mask_fmadd_round_ps() {
46825        let a = _mm512_set1_ps(0.00000007);
46826        let b = _mm512_set1_ps(1.);
46827        let c = _mm512_set1_ps(-1.);
46828        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46829            a, 0, b, c,
46830        );
46831        assert_eq_m512(r, a);
46832        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46833            a,
46834            0b00000000_11111111,
46835            b,
46836            c,
46837        );
46838        #[rustfmt::skip]
46839        let e = _mm512_setr_ps(
46840            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46841            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46842            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46843            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46844        );
46845        assert_eq_m512(r, e);
46846    }
46847
46848    #[simd_test(enable = "avx512f")]
46849    unsafe fn test_mm512_maskz_fmadd_round_ps() {
46850        let a = _mm512_set1_ps(0.00000007);
46851        let b = _mm512_set1_ps(1.);
46852        let c = _mm512_set1_ps(-1.);
46853        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46854            0, a, b, c,
46855        );
46856        assert_eq_m512(r, _mm512_setzero_ps());
46857        #[rustfmt::skip]
46858        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46859            0b00000000_11111111,
46860            a,
46861            b,
46862            c,
46863        );
46864        #[rustfmt::skip]
46865        let e = _mm512_setr_ps(
46866            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46867            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46868            0., 0., 0., 0.,
46869            0., 0., 0., 0.,
46870        );
46871        assert_eq_m512(r, e);
46872    }
46873
46874    #[simd_test(enable = "avx512f")]
46875    unsafe fn test_mm512_mask3_fmadd_round_ps() {
46876        let a = _mm512_set1_ps(0.00000007);
46877        let b = _mm512_set1_ps(1.);
46878        let c = _mm512_set1_ps(-1.);
46879        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46880            a, b, c, 0,
46881        );
46882        assert_eq_m512(r, c);
46883        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46884            a,
46885            b,
46886            c,
46887            0b00000000_11111111,
46888        );
46889        #[rustfmt::skip]
46890        let e = _mm512_setr_ps(
46891            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46892            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46893            -1., -1., -1., -1.,
46894            -1., -1., -1., -1.,
46895        );
46896        assert_eq_m512(r, e);
46897    }
46898
46899    #[simd_test(enable = "avx512f")]
46900    unsafe fn test_mm512_fmsub_round_ps() {
46901        let a = _mm512_set1_ps(0.00000007);
46902        let b = _mm512_set1_ps(1.);
46903        let c = _mm512_set1_ps(1.);
46904        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46905        let e = _mm512_set1_ps(-0.99999994);
46906        assert_eq_m512(r, e);
46907        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46908        let e = _mm512_set1_ps(-0.9999999);
46909        assert_eq_m512(r, e);
46910    }
46911
46912    #[simd_test(enable = "avx512f")]
46913    unsafe fn test_mm512_mask_fmsub_round_ps() {
46914        let a = _mm512_set1_ps(0.00000007);
46915        let b = _mm512_set1_ps(1.);
46916        let c = _mm512_set1_ps(1.);
46917        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46918            a, 0, b, c,
46919        );
46920        assert_eq_m512(r, a);
46921        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46922            a,
46923            0b00000000_11111111,
46924            b,
46925            c,
46926        );
46927        #[rustfmt::skip]
46928        let e = _mm512_setr_ps(
46929            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46930            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46931            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46932            0.00000007, 0.00000007, 0.00000007, 0.00000007,
46933        );
46934        assert_eq_m512(r, e);
46935    }
46936
46937    #[simd_test(enable = "avx512f")]
46938    unsafe fn test_mm512_maskz_fmsub_round_ps() {
46939        let a = _mm512_set1_ps(0.00000007);
46940        let b = _mm512_set1_ps(1.);
46941        let c = _mm512_set1_ps(1.);
46942        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46943            0, a, b, c,
46944        );
46945        assert_eq_m512(r, _mm512_setzero_ps());
46946        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46947            0b00000000_11111111,
46948            a,
46949            b,
46950            c,
46951        );
46952        #[rustfmt::skip]
46953        let e = _mm512_setr_ps(
46954            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46955            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46956            0., 0., 0., 0.,
46957            0., 0., 0., 0.,
46958        );
46959        assert_eq_m512(r, e);
46960    }
46961
46962    #[simd_test(enable = "avx512f")]
46963    unsafe fn test_mm512_mask3_fmsub_round_ps() {
46964        let a = _mm512_set1_ps(0.00000007);
46965        let b = _mm512_set1_ps(1.);
46966        let c = _mm512_set1_ps(1.);
46967        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46968            a, b, c, 0,
46969        );
46970        assert_eq_m512(r, c);
46971        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46972            a,
46973            b,
46974            c,
46975            0b00000000_11111111,
46976        );
46977        #[rustfmt::skip]
46978        let e = _mm512_setr_ps(
46979            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46980            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46981            1., 1., 1., 1.,
46982            1., 1., 1., 1.,
46983        );
46984        assert_eq_m512(r, e);
46985    }
46986
46987    #[simd_test(enable = "avx512f")]
46988    unsafe fn test_mm512_fmaddsub_round_ps() {
46989        let a = _mm512_set1_ps(0.00000007);
46990        let b = _mm512_set1_ps(1.);
46991        let c = _mm512_set1_ps(-1.);
46992        let r =
46993            _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46994        #[rustfmt::skip]
46995        let e = _mm512_setr_ps(
46996            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46997            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46998            1.0000001, -0.99999994, 1.0000001, -0.99999994,
46999            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47000        );
47001        assert_eq_m512(r, e);
47002        let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47003        let e = _mm512_setr_ps(
47004            1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47005            -0.9999999, 1., -0.9999999, 1., -0.9999999,
47006        );
47007        assert_eq_m512(r, e);
47008    }
47009
47010    #[simd_test(enable = "avx512f")]
47011    unsafe fn test_mm512_mask_fmaddsub_round_ps() {
47012        let a = _mm512_set1_ps(0.00000007);
47013        let b = _mm512_set1_ps(1.);
47014        let c = _mm512_set1_ps(-1.);
47015        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47016            a, 0, b, c,
47017        );
47018        assert_eq_m512(r, a);
47019        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47020            a,
47021            0b00000000_11111111,
47022            b,
47023            c,
47024        );
47025        #[rustfmt::skip]
47026        let e = _mm512_setr_ps(
47027            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47028            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47029            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47030            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47031        );
47032        assert_eq_m512(r, e);
47033    }
47034
47035    #[simd_test(enable = "avx512f")]
47036    unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
47037        let a = _mm512_set1_ps(0.00000007);
47038        let b = _mm512_set1_ps(1.);
47039        let c = _mm512_set1_ps(-1.);
47040        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47041            0, a, b, c,
47042        );
47043        assert_eq_m512(r, _mm512_setzero_ps());
47044        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47045            0b00000000_11111111,
47046            a,
47047            b,
47048            c,
47049        );
47050        #[rustfmt::skip]
47051        let e = _mm512_setr_ps(
47052            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47053            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47054            0., 0., 0., 0.,
47055            0., 0., 0., 0.,
47056        );
47057        assert_eq_m512(r, e);
47058    }
47059
47060    #[simd_test(enable = "avx512f")]
47061    unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
47062        let a = _mm512_set1_ps(0.00000007);
47063        let b = _mm512_set1_ps(1.);
47064        let c = _mm512_set1_ps(-1.);
47065        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47066            a, b, c, 0,
47067        );
47068        assert_eq_m512(r, c);
47069        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47070            a,
47071            b,
47072            c,
47073            0b00000000_11111111,
47074        );
47075        #[rustfmt::skip]
47076        let e = _mm512_setr_ps(
47077            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47078            1.0000001, -0.99999994, 1.0000001, -0.99999994,
47079            -1., -1., -1., -1.,
47080            -1., -1., -1., -1.,
47081        );
47082        assert_eq_m512(r, e);
47083    }
47084
47085    #[simd_test(enable = "avx512f")]
47086    unsafe fn test_mm512_fmsubadd_round_ps() {
47087        let a = _mm512_set1_ps(0.00000007);
47088        let b = _mm512_set1_ps(1.);
47089        let c = _mm512_set1_ps(-1.);
47090        let r =
47091            _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47092        #[rustfmt::skip]
47093        let e = _mm512_setr_ps(
47094            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47095            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47096            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47097            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47098        );
47099        assert_eq_m512(r, e);
47100        let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47101        let e = _mm512_setr_ps(
47102            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47103            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47104        );
47105        assert_eq_m512(r, e);
47106    }
47107
47108    #[simd_test(enable = "avx512f")]
47109    unsafe fn test_mm512_mask_fmsubadd_round_ps() {
47110        let a = _mm512_set1_ps(0.00000007);
47111        let b = _mm512_set1_ps(1.);
47112        let c = _mm512_set1_ps(-1.);
47113        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47114            a, 0, b, c,
47115        );
47116        assert_eq_m512(r, a);
47117        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47118            a,
47119            0b00000000_11111111,
47120            b,
47121            c,
47122        );
47123        #[rustfmt::skip]
47124        let e = _mm512_setr_ps(
47125            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47126            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47127            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47128            0.00000007, 0.00000007, 0.00000007, 0.00000007,
47129        );
47130        assert_eq_m512(r, e);
47131    }
47132
47133    #[simd_test(enable = "avx512f")]
47134    unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
47135        let a = _mm512_set1_ps(0.00000007);
47136        let b = _mm512_set1_ps(1.);
47137        let c = _mm512_set1_ps(-1.);
47138        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47139            0, a, b, c,
47140        );
47141        assert_eq_m512(r, _mm512_setzero_ps());
47142        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47143            0b00000000_11111111,
47144            a,
47145            b,
47146            c,
47147        );
47148        #[rustfmt::skip]
47149        let e = _mm512_setr_ps(
47150            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47151            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47152            0., 0., 0., 0.,
47153            0., 0., 0., 0.,
47154        );
47155        assert_eq_m512(r, e);
47156    }
47157
47158    #[simd_test(enable = "avx512f")]
47159    unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
47160        let a = _mm512_set1_ps(0.00000007);
47161        let b = _mm512_set1_ps(1.);
47162        let c = _mm512_set1_ps(-1.);
47163        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47164            a, b, c, 0,
47165        );
47166        assert_eq_m512(r, c);
47167        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47168            a,
47169            b,
47170            c,
47171            0b00000000_11111111,
47172        );
47173        #[rustfmt::skip]
47174        let e = _mm512_setr_ps(
47175            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47176            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47177            -1., -1., -1., -1.,
47178            -1., -1., -1., -1.,
47179        );
47180        assert_eq_m512(r, e);
47181    }
47182
47183    #[simd_test(enable = "avx512f")]
47184    unsafe fn test_mm512_fnmadd_round_ps() {
47185        let a = _mm512_set1_ps(0.00000007);
47186        let b = _mm512_set1_ps(1.);
47187        let c = _mm512_set1_ps(1.);
47188        let r =
47189            _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47190        let e = _mm512_set1_ps(0.99999994);
47191        assert_eq_m512(r, e);
47192        let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47193        let e = _mm512_set1_ps(0.9999999);
47194        assert_eq_m512(r, e);
47195    }
47196
47197    #[simd_test(enable = "avx512f")]
47198    unsafe fn test_mm512_mask_fnmadd_round_ps() {
47199        let a = _mm512_set1_ps(0.00000007);
47200        let b = _mm512_set1_ps(1.);
47201        let c = _mm512_set1_ps(1.);
47202        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47203            a, 0, b, c,
47204        );
47205        assert_eq_m512(r, a);
47206        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47207            a,
47208            0b00000000_11111111,
47209            b,
47210            c,
47211        );
47212        let e = _mm512_setr_ps(
47213            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47214            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47215            0.00000007, 0.00000007,
47216        );
47217        assert_eq_m512(r, e);
47218    }
47219
47220    #[simd_test(enable = "avx512f")]
47221    unsafe fn test_mm512_maskz_fnmadd_round_ps() {
47222        let a = _mm512_set1_ps(0.00000007);
47223        let b = _mm512_set1_ps(1.);
47224        let c = _mm512_set1_ps(1.);
47225        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47226            0, a, b, c,
47227        );
47228        assert_eq_m512(r, _mm512_setzero_ps());
47229        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47230            0b00000000_11111111,
47231            a,
47232            b,
47233            c,
47234        );
47235        let e = _mm512_setr_ps(
47236            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47237            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47238        );
47239        assert_eq_m512(r, e);
47240    }
47241
47242    #[simd_test(enable = "avx512f")]
47243    unsafe fn test_mm512_mask3_fnmadd_round_ps() {
47244        let a = _mm512_set1_ps(0.00000007);
47245        let b = _mm512_set1_ps(1.);
47246        let c = _mm512_set1_ps(1.);
47247        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47248            a, b, c, 0,
47249        );
47250        assert_eq_m512(r, c);
47251        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47252            a,
47253            b,
47254            c,
47255            0b00000000_11111111,
47256        );
47257        let e = _mm512_setr_ps(
47258            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47259            0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
47260        );
47261        assert_eq_m512(r, e);
47262    }
47263
47264    #[simd_test(enable = "avx512f")]
47265    unsafe fn test_mm512_fnmsub_round_ps() {
47266        let a = _mm512_set1_ps(0.00000007);
47267        let b = _mm512_set1_ps(1.);
47268        let c = _mm512_set1_ps(-1.);
47269        let r =
47270            _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47271        let e = _mm512_set1_ps(0.99999994);
47272        assert_eq_m512(r, e);
47273        let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47274        let e = _mm512_set1_ps(0.9999999);
47275        assert_eq_m512(r, e);
47276    }
47277
47278    #[simd_test(enable = "avx512f")]
47279    unsafe fn test_mm512_mask_fnmsub_round_ps() {
47280        let a = _mm512_set1_ps(0.00000007);
47281        let b = _mm512_set1_ps(1.);
47282        let c = _mm512_set1_ps(-1.);
47283        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47284            a, 0, b, c,
47285        );
47286        assert_eq_m512(r, a);
47287        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47288            a,
47289            0b00000000_11111111,
47290            b,
47291            c,
47292        );
47293        let e = _mm512_setr_ps(
47294            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47295            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47296            0.00000007, 0.00000007,
47297        );
47298        assert_eq_m512(r, e);
47299    }
47300
47301    #[simd_test(enable = "avx512f")]
47302    unsafe fn test_mm512_maskz_fnmsub_round_ps() {
47303        let a = _mm512_set1_ps(0.00000007);
47304        let b = _mm512_set1_ps(1.);
47305        let c = _mm512_set1_ps(-1.);
47306        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47307            0, a, b, c,
47308        );
47309        assert_eq_m512(r, _mm512_setzero_ps());
47310        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47311            0b00000000_11111111,
47312            a,
47313            b,
47314            c,
47315        );
47316        let e = _mm512_setr_ps(
47317            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47318            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47319        );
47320        assert_eq_m512(r, e);
47321    }
47322
47323    #[simd_test(enable = "avx512f")]
47324    unsafe fn test_mm512_mask3_fnmsub_round_ps() {
47325        let a = _mm512_set1_ps(0.00000007);
47326        let b = _mm512_set1_ps(1.);
47327        let c = _mm512_set1_ps(-1.);
47328        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47329            a, b, c, 0,
47330        );
47331        assert_eq_m512(r, c);
47332        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47333            a,
47334            b,
47335            c,
47336            0b00000000_11111111,
47337        );
47338        let e = _mm512_setr_ps(
47339            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47340            0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
47341        );
47342        assert_eq_m512(r, e);
47343    }
47344
47345    #[simd_test(enable = "avx512f")]
47346    unsafe fn test_mm512_max_round_ps() {
47347        let a = _mm512_setr_ps(
47348            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47349        );
47350        let b = _mm512_setr_ps(
47351            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47352        );
47353        let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47354        let e = _mm512_setr_ps(
47355            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47356        );
47357        assert_eq_m512(r, e);
47358    }
47359
47360    #[simd_test(enable = "avx512f")]
47361    unsafe fn test_mm512_mask_max_round_ps() {
47362        let a = _mm512_setr_ps(
47363            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47364        );
47365        let b = _mm512_setr_ps(
47366            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47367        );
47368        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47369        assert_eq_m512(r, a);
47370        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47371        let e = _mm512_setr_ps(
47372            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47373        );
47374        assert_eq_m512(r, e);
47375    }
47376
47377    #[simd_test(enable = "avx512f")]
47378    unsafe fn test_mm512_maskz_max_round_ps() {
47379        let a = _mm512_setr_ps(
47380            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47381        );
47382        let b = _mm512_setr_ps(
47383            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47384        );
47385        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47386        assert_eq_m512(r, _mm512_setzero_ps());
47387        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47388        let e = _mm512_setr_ps(
47389            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47390        );
47391        assert_eq_m512(r, e);
47392    }
47393
47394    #[simd_test(enable = "avx512f")]
47395    unsafe fn test_mm512_min_round_ps() {
47396        let a = _mm512_setr_ps(
47397            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47398        );
47399        let b = _mm512_setr_ps(
47400            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47401        );
47402        let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47403        let e = _mm512_setr_ps(
47404            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
47405        );
47406        assert_eq_m512(r, e);
47407    }
47408
47409    #[simd_test(enable = "avx512f")]
47410    unsafe fn test_mm512_mask_min_round_ps() {
47411        let a = _mm512_setr_ps(
47412            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47413        );
47414        let b = _mm512_setr_ps(
47415            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47416        );
47417        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47418        assert_eq_m512(r, a);
47419        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47420        let e = _mm512_setr_ps(
47421            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47422        );
47423        assert_eq_m512(r, e);
47424    }
47425
47426    #[simd_test(enable = "avx512f")]
47427    unsafe fn test_mm512_maskz_min_round_ps() {
47428        let a = _mm512_setr_ps(
47429            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47430        );
47431        let b = _mm512_setr_ps(
47432            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47433        );
47434        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47435        assert_eq_m512(r, _mm512_setzero_ps());
47436        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47437        let e = _mm512_setr_ps(
47438            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
47439        );
47440        assert_eq_m512(r, e);
47441    }
47442
47443    #[simd_test(enable = "avx512f")]
47444    unsafe fn test_mm512_getexp_round_ps() {
47445        let a = _mm512_set1_ps(3.);
47446        let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
47447        let e = _mm512_set1_ps(1.);
47448        assert_eq_m512(r, e);
47449    }
47450
47451    #[simd_test(enable = "avx512f")]
47452    unsafe fn test_mm512_mask_getexp_round_ps() {
47453        let a = _mm512_set1_ps(3.);
47454        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
47455        assert_eq_m512(r, a);
47456        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
47457        let e = _mm512_setr_ps(
47458            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47459        );
47460        assert_eq_m512(r, e);
47461    }
47462
47463    #[simd_test(enable = "avx512f")]
47464    unsafe fn test_mm512_maskz_getexp_round_ps() {
47465        let a = _mm512_set1_ps(3.);
47466        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
47467        assert_eq_m512(r, _mm512_setzero_ps());
47468        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
47469        let e = _mm512_setr_ps(
47470            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47471        );
47472        assert_eq_m512(r, e);
47473    }
47474
47475    #[simd_test(enable = "avx512f")]
47476    unsafe fn test_mm512_roundscale_round_ps() {
47477        let a = _mm512_set1_ps(1.1);
47478        let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
47479        let e = _mm512_set1_ps(1.0);
47480        assert_eq_m512(r, e);
47481    }
47482
47483    #[simd_test(enable = "avx512f")]
47484    unsafe fn test_mm512_mask_roundscale_round_ps() {
47485        let a = _mm512_set1_ps(1.1);
47486        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
47487        let e = _mm512_set1_ps(1.1);
47488        assert_eq_m512(r, e);
47489        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
47490            a,
47491            0b11111111_11111111,
47492            a,
47493        );
47494        let e = _mm512_set1_ps(1.0);
47495        assert_eq_m512(r, e);
47496    }
47497
47498    #[simd_test(enable = "avx512f")]
47499    unsafe fn test_mm512_maskz_roundscale_round_ps() {
47500        let a = _mm512_set1_ps(1.1);
47501        let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
47502        assert_eq_m512(r, _mm512_setzero_ps());
47503        let r =
47504            _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
47505        let e = _mm512_set1_ps(1.0);
47506        assert_eq_m512(r, e);
47507    }
47508
47509    #[simd_test(enable = "avx512f")]
47510    unsafe fn test_mm512_scalef_round_ps() {
47511        let a = _mm512_set1_ps(1.);
47512        let b = _mm512_set1_ps(3.);
47513        let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
47514        let e = _mm512_set1_ps(8.);
47515        assert_eq_m512(r, e);
47516    }
47517
47518    #[simd_test(enable = "avx512f")]
47519    unsafe fn test_mm512_mask_scalef_round_ps() {
47520        let a = _mm512_set1_ps(1.);
47521        let b = _mm512_set1_ps(3.);
47522        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47523            a, 0, a, b,
47524        );
47525        assert_eq_m512(r, a);
47526        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47527            a,
47528            0b11111111_00000000,
47529            a,
47530            b,
47531        );
47532        let e = _mm512_set_ps(
47533            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47534        );
47535        assert_eq_m512(r, e);
47536    }
47537
47538    #[simd_test(enable = "avx512f")]
47539    unsafe fn test_mm512_maskz_scalef_round_ps() {
47540        let a = _mm512_set1_ps(1.);
47541        let b = _mm512_set1_ps(3.);
47542        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47543            0, a, b,
47544        );
47545        assert_eq_m512(r, _mm512_setzero_ps());
47546        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47547            0b11111111_00000000,
47548            a,
47549            b,
47550        );
47551        let e = _mm512_set_ps(
47552            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47553        );
47554        assert_eq_m512(r, e);
47555    }
47556
47557    #[simd_test(enable = "avx512f")]
47558    unsafe fn test_mm512_fixupimm_round_ps() {
47559        let a = _mm512_set1_ps(f32::NAN);
47560        let b = _mm512_set1_ps(f32::MAX);
47561        let c = _mm512_set1_epi32(i32::MAX);
47562        let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
47563        let e = _mm512_set1_ps(0.0);
47564        assert_eq_m512(r, e);
47565    }
47566
47567    #[simd_test(enable = "avx512f")]
47568    unsafe fn test_mm512_mask_fixupimm_round_ps() {
47569        #[rustfmt::skip]
47570        let a = _mm512_set_ps(
47571            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47572            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47573            1., 1., 1., 1.,
47574            1., 1., 1., 1.,
47575        );
47576        let b = _mm512_set1_ps(f32::MAX);
47577        let c = _mm512_set1_epi32(i32::MAX);
47578        let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47579            a,
47580            0b11111111_00000000,
47581            b,
47582            c,
47583        );
47584        let e = _mm512_set_ps(
47585            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47586        );
47587        assert_eq_m512(r, e);
47588    }
47589
47590    #[simd_test(enable = "avx512f")]
47591    unsafe fn test_mm512_maskz_fixupimm_round_ps() {
47592        #[rustfmt::skip]
47593        let a = _mm512_set_ps(
47594            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47595            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47596            1., 1., 1., 1.,
47597            1., 1., 1., 1.,
47598        );
47599        let b = _mm512_set1_ps(f32::MAX);
47600        let c = _mm512_set1_epi32(i32::MAX);
47601        let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47602            0b11111111_00000000,
47603            a,
47604            b,
47605            c,
47606        );
47607        let e = _mm512_set_ps(
47608            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
47609        );
47610        assert_eq_m512(r, e);
47611    }
47612
47613    #[simd_test(enable = "avx512f")]
47614    unsafe fn test_mm512_getmant_round_ps() {
47615        let a = _mm512_set1_ps(10.);
47616        let r = _mm512_getmant_round_ps::<
47617            _MM_MANT_NORM_1_2,
47618            _MM_MANT_SIGN_SRC,
47619            _MM_FROUND_CUR_DIRECTION,
47620        >(a);
47621        let e = _mm512_set1_ps(1.25);
47622        assert_eq_m512(r, e);
47623    }
47624
47625    #[simd_test(enable = "avx512f")]
47626    unsafe fn test_mm512_mask_getmant_round_ps() {
47627        let a = _mm512_set1_ps(10.);
47628        let r = _mm512_mask_getmant_round_ps::<
47629            _MM_MANT_NORM_1_2,
47630            _MM_MANT_SIGN_SRC,
47631            _MM_FROUND_CUR_DIRECTION,
47632        >(a, 0, a);
47633        assert_eq_m512(r, a);
47634        let r = _mm512_mask_getmant_round_ps::<
47635            _MM_MANT_NORM_1_2,
47636            _MM_MANT_SIGN_SRC,
47637            _MM_FROUND_CUR_DIRECTION,
47638        >(a, 0b11111111_00000000, a);
47639        let e = _mm512_setr_ps(
47640            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47641        );
47642        assert_eq_m512(r, e);
47643    }
47644
47645    #[simd_test(enable = "avx512f")]
47646    unsafe fn test_mm512_maskz_getmant_round_ps() {
47647        let a = _mm512_set1_ps(10.);
47648        let r = _mm512_maskz_getmant_round_ps::<
47649            _MM_MANT_NORM_1_2,
47650            _MM_MANT_SIGN_SRC,
47651            _MM_FROUND_CUR_DIRECTION,
47652        >(0, a);
47653        assert_eq_m512(r, _mm512_setzero_ps());
47654        let r = _mm512_maskz_getmant_round_ps::<
47655            _MM_MANT_NORM_1_2,
47656            _MM_MANT_SIGN_SRC,
47657            _MM_FROUND_CUR_DIRECTION,
47658        >(0b11111111_00000000, a);
47659        let e = _mm512_setr_ps(
47660            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47661        );
47662        assert_eq_m512(r, e);
47663    }
47664
47665    #[simd_test(enable = "avx512f")]
47666    unsafe fn test_mm512_cvtps_epi32() {
47667        let a = _mm512_setr_ps(
47668            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47669        );
47670        let r = _mm512_cvtps_epi32(a);
47671        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47672        assert_eq_m512i(r, e);
47673    }
47674
47675    #[simd_test(enable = "avx512f")]
47676    unsafe fn test_mm512_mask_cvtps_epi32() {
47677        let a = _mm512_setr_ps(
47678            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47679        );
47680        let src = _mm512_set1_epi32(0);
47681        let r = _mm512_mask_cvtps_epi32(src, 0, a);
47682        assert_eq_m512i(r, src);
47683        let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
47684        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47685        assert_eq_m512i(r, e);
47686    }
47687
47688    #[simd_test(enable = "avx512f")]
47689    unsafe fn test_mm512_maskz_cvtps_epi32() {
47690        let a = _mm512_setr_ps(
47691            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47692        );
47693        let r = _mm512_maskz_cvtps_epi32(0, a);
47694        assert_eq_m512i(r, _mm512_setzero_si512());
47695        let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
47696        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47697        assert_eq_m512i(r, e);
47698    }
47699
47700    #[simd_test(enable = "avx512f,avx512vl")]
47701    unsafe fn test_mm256_mask_cvtps_epi32() {
47702        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47703        let src = _mm256_set1_epi32(0);
47704        let r = _mm256_mask_cvtps_epi32(src, 0, a);
47705        assert_eq_m256i(r, src);
47706        let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
47707        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47708        assert_eq_m256i(r, e);
47709    }
47710
47711    #[simd_test(enable = "avx512f,avx512vl")]
47712    unsafe fn test_mm256_maskz_cvtps_epi32() {
47713        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47714        let r = _mm256_maskz_cvtps_epi32(0, a);
47715        assert_eq_m256i(r, _mm256_setzero_si256());
47716        let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
47717        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47718        assert_eq_m256i(r, e);
47719    }
47720
47721    #[simd_test(enable = "avx512f,avx512vl")]
47722    unsafe fn test_mm_mask_cvtps_epi32() {
47723        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47724        let src = _mm_set1_epi32(0);
47725        let r = _mm_mask_cvtps_epi32(src, 0, a);
47726        assert_eq_m128i(r, src);
47727        let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
47728        let e = _mm_set_epi32(12, 14, 14, 16);
47729        assert_eq_m128i(r, e);
47730    }
47731
47732    #[simd_test(enable = "avx512f,avx512vl")]
47733    unsafe fn test_mm_maskz_cvtps_epi32() {
47734        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47735        let r = _mm_maskz_cvtps_epi32(0, a);
47736        assert_eq_m128i(r, _mm_setzero_si128());
47737        let r = _mm_maskz_cvtps_epi32(0b00001111, a);
47738        let e = _mm_set_epi32(12, 14, 14, 16);
47739        assert_eq_m128i(r, e);
47740    }
47741
47742    #[simd_test(enable = "avx512f")]
47743    unsafe fn test_mm512_cvtps_epu32() {
47744        let a = _mm512_setr_ps(
47745            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47746        );
47747        let r = _mm512_cvtps_epu32(a);
47748        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
47749        assert_eq_m512i(r, e);
47750    }
47751
47752    #[simd_test(enable = "avx512f")]
47753    unsafe fn test_mm512_mask_cvtps_epu32() {
47754        let a = _mm512_setr_ps(
47755            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47756        );
47757        let src = _mm512_set1_epi32(0);
47758        let r = _mm512_mask_cvtps_epu32(src, 0, a);
47759        assert_eq_m512i(r, src);
47760        let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
47761        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47762        assert_eq_m512i(r, e);
47763    }
47764
47765    #[simd_test(enable = "avx512f")]
47766    unsafe fn test_mm512_maskz_cvtps_epu32() {
47767        let a = _mm512_setr_ps(
47768            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47769        );
47770        let r = _mm512_maskz_cvtps_epu32(0, a);
47771        assert_eq_m512i(r, _mm512_setzero_si512());
47772        let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
47773        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47774        assert_eq_m512i(r, e);
47775    }
47776
47777    #[simd_test(enable = "avx512f,avx512vl")]
47778    unsafe fn test_mm256_cvtps_epu32() {
47779        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47780        let r = _mm256_cvtps_epu32(a);
47781        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47782        assert_eq_m256i(r, e);
47783    }
47784
47785    #[simd_test(enable = "avx512f,avx512vl")]
47786    unsafe fn test_mm256_mask_cvtps_epu32() {
47787        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47788        let src = _mm256_set1_epi32(0);
47789        let r = _mm256_mask_cvtps_epu32(src, 0, a);
47790        assert_eq_m256i(r, src);
47791        let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
47792        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47793        assert_eq_m256i(r, e);
47794    }
47795
47796    #[simd_test(enable = "avx512f,avx512vl")]
47797    unsafe fn test_mm256_maskz_cvtps_epu32() {
47798        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47799        let r = _mm256_maskz_cvtps_epu32(0, a);
47800        assert_eq_m256i(r, _mm256_setzero_si256());
47801        let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
47802        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47803        assert_eq_m256i(r, e);
47804    }
47805
47806    #[simd_test(enable = "avx512f,avx512vl")]
47807    unsafe fn test_mm_cvtps_epu32() {
47808        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47809        let r = _mm_cvtps_epu32(a);
47810        let e = _mm_set_epi32(12, 14, 14, 16);
47811        assert_eq_m128i(r, e);
47812    }
47813
47814    #[simd_test(enable = "avx512f,avx512vl")]
47815    unsafe fn test_mm_mask_cvtps_epu32() {
47816        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47817        let src = _mm_set1_epi32(0);
47818        let r = _mm_mask_cvtps_epu32(src, 0, a);
47819        assert_eq_m128i(r, src);
47820        let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
47821        let e = _mm_set_epi32(12, 14, 14, 16);
47822        assert_eq_m128i(r, e);
47823    }
47824
47825    #[simd_test(enable = "avx512f,avx512vl")]
47826    unsafe fn test_mm_maskz_cvtps_epu32() {
47827        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47828        let r = _mm_maskz_cvtps_epu32(0, a);
47829        assert_eq_m128i(r, _mm_setzero_si128());
47830        let r = _mm_maskz_cvtps_epu32(0b00001111, a);
47831        let e = _mm_set_epi32(12, 14, 14, 16);
47832        assert_eq_m128i(r, e);
47833    }
47834
47835    #[simd_test(enable = "avx512f")]
47836    unsafe fn test_mm512_cvtepi8_epi32() {
47837        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47838        let r = _mm512_cvtepi8_epi32(a);
47839        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47840        assert_eq_m512i(r, e);
47841    }
47842
47843    #[simd_test(enable = "avx512f")]
47844    unsafe fn test_mm512_mask_cvtepi8_epi32() {
47845        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47846        let src = _mm512_set1_epi32(-1);
47847        let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
47848        assert_eq_m512i(r, src);
47849        let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
47850        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47851        assert_eq_m512i(r, e);
47852    }
47853
47854    #[simd_test(enable = "avx512f")]
47855    unsafe fn test_mm512_maskz_cvtepi8_epi32() {
47856        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47857        let r = _mm512_maskz_cvtepi8_epi32(0, a);
47858        assert_eq_m512i(r, _mm512_setzero_si512());
47859        let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
47860        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47861        assert_eq_m512i(r, e);
47862    }
47863
47864    #[simd_test(enable = "avx512f,avx512vl")]
47865    unsafe fn test_mm256_mask_cvtepi8_epi32() {
47866        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47867        let src = _mm256_set1_epi32(-1);
47868        let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
47869        assert_eq_m256i(r, src);
47870        let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
47871        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47872        assert_eq_m256i(r, e);
47873    }
47874
47875    #[simd_test(enable = "avx512f,avx512vl")]
47876    unsafe fn test_mm256_maskz_cvtepi8_epi32() {
47877        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47878        let r = _mm256_maskz_cvtepi8_epi32(0, a);
47879        assert_eq_m256i(r, _mm256_setzero_si256());
47880        let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
47881        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47882        assert_eq_m256i(r, e);
47883    }
47884
47885    #[simd_test(enable = "avx512f,avx512vl")]
47886    unsafe fn test_mm_mask_cvtepi8_epi32() {
47887        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47888        let src = _mm_set1_epi32(-1);
47889        let r = _mm_mask_cvtepi8_epi32(src, 0, a);
47890        assert_eq_m128i(r, src);
47891        let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
47892        let e = _mm_set_epi32(12, 13, 14, 15);
47893        assert_eq_m128i(r, e);
47894    }
47895
47896    #[simd_test(enable = "avx512f,avx512vl")]
47897    unsafe fn test_mm_maskz_cvtepi8_epi32() {
47898        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47899        let r = _mm_maskz_cvtepi8_epi32(0, a);
47900        assert_eq_m128i(r, _mm_setzero_si128());
47901        let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
47902        let e = _mm_set_epi32(12, 13, 14, 15);
47903        assert_eq_m128i(r, e);
47904    }
47905
47906    #[simd_test(enable = "avx512f")]
47907    unsafe fn test_mm512_cvtepu8_epi32() {
47908        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47909        let r = _mm512_cvtepu8_epi32(a);
47910        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47911        assert_eq_m512i(r, e);
47912    }
47913
47914    #[simd_test(enable = "avx512f")]
47915    unsafe fn test_mm512_mask_cvtepu8_epi32() {
47916        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47917        let src = _mm512_set1_epi32(-1);
47918        let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
47919        assert_eq_m512i(r, src);
47920        let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
47921        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47922        assert_eq_m512i(r, e);
47923    }
47924
47925    #[simd_test(enable = "avx512f")]
47926    unsafe fn test_mm512_maskz_cvtepu8_epi32() {
47927        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47928        let r = _mm512_maskz_cvtepu8_epi32(0, a);
47929        assert_eq_m512i(r, _mm512_setzero_si512());
47930        let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
47931        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47932        assert_eq_m512i(r, e);
47933    }
47934
47935    #[simd_test(enable = "avx512f,avx512vl")]
47936    unsafe fn test_mm256_mask_cvtepu8_epi32() {
47937        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47938        let src = _mm256_set1_epi32(-1);
47939        let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
47940        assert_eq_m256i(r, src);
47941        let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
47942        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47943        assert_eq_m256i(r, e);
47944    }
47945
47946    #[simd_test(enable = "avx512f,avx512vl")]
47947    unsafe fn test_mm256_maskz_cvtepu8_epi32() {
47948        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47949        let r = _mm256_maskz_cvtepu8_epi32(0, a);
47950        assert_eq_m256i(r, _mm256_setzero_si256());
47951        let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
47952        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47953        assert_eq_m256i(r, e);
47954    }
47955
47956    #[simd_test(enable = "avx512f,avx512vl")]
47957    unsafe fn test_mm_mask_cvtepu8_epi32() {
47958        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47959        let src = _mm_set1_epi32(-1);
47960        let r = _mm_mask_cvtepu8_epi32(src, 0, a);
47961        assert_eq_m128i(r, src);
47962        let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
47963        let e = _mm_set_epi32(12, 13, 14, 15);
47964        assert_eq_m128i(r, e);
47965    }
47966
47967    #[simd_test(enable = "avx512f,avx512vl")]
47968    unsafe fn test_mm_maskz_cvtepu8_epi32() {
47969        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47970        let r = _mm_maskz_cvtepu8_epi32(0, a);
47971        assert_eq_m128i(r, _mm_setzero_si128());
47972        let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
47973        let e = _mm_set_epi32(12, 13, 14, 15);
47974        assert_eq_m128i(r, e);
47975    }
47976
47977    #[simd_test(enable = "avx512f")]
47978    unsafe fn test_mm512_cvtepi16_epi32() {
47979        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47980        let r = _mm512_cvtepi16_epi32(a);
47981        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47982        assert_eq_m512i(r, e);
47983    }
47984
47985    #[simd_test(enable = "avx512f")]
47986    unsafe fn test_mm512_mask_cvtepi16_epi32() {
47987        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47988        let src = _mm512_set1_epi32(-1);
47989        let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
47990        assert_eq_m512i(r, src);
47991        let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
47992        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47993        assert_eq_m512i(r, e);
47994    }
47995
47996    #[simd_test(enable = "avx512f")]
47997    unsafe fn test_mm512_maskz_cvtepi16_epi32() {
47998        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47999        let r = _mm512_maskz_cvtepi16_epi32(0, a);
48000        assert_eq_m512i(r, _mm512_setzero_si512());
48001        let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
48002        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48003        assert_eq_m512i(r, e);
48004    }
48005
48006    #[simd_test(enable = "avx512f,avx512vl")]
48007    unsafe fn test_mm256_mask_cvtepi16_epi32() {
48008        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48009        let src = _mm256_set1_epi32(-1);
48010        let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
48011        assert_eq_m256i(r, src);
48012        let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
48013        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48014        assert_eq_m256i(r, e);
48015    }
48016
48017    #[simd_test(enable = "avx512f,avx512vl")]
48018    unsafe fn test_mm256_maskz_cvtepi16_epi32() {
48019        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48020        let r = _mm256_maskz_cvtepi16_epi32(0, a);
48021        assert_eq_m256i(r, _mm256_setzero_si256());
48022        let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
48023        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48024        assert_eq_m256i(r, e);
48025    }
48026
48027    #[simd_test(enable = "avx512f,avx512vl")]
48028    unsafe fn test_mm_mask_cvtepi16_epi32() {
48029        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48030        let src = _mm_set1_epi32(-1);
48031        let r = _mm_mask_cvtepi16_epi32(src, 0, a);
48032        assert_eq_m128i(r, src);
48033        let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
48034        let e = _mm_set_epi32(4, 5, 6, 7);
48035        assert_eq_m128i(r, e);
48036    }
48037
48038    #[simd_test(enable = "avx512f,avx512vl")]
48039    unsafe fn test_mm_maskz_cvtepi16_epi32() {
48040        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48041        let r = _mm_maskz_cvtepi16_epi32(0, a);
48042        assert_eq_m128i(r, _mm_setzero_si128());
48043        let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
48044        let e = _mm_set_epi32(4, 5, 6, 7);
48045        assert_eq_m128i(r, e);
48046    }
48047
48048    #[simd_test(enable = "avx512f")]
48049    unsafe fn test_mm512_cvtepu16_epi32() {
48050        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48051        let r = _mm512_cvtepu16_epi32(a);
48052        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48053        assert_eq_m512i(r, e);
48054    }
48055
48056    #[simd_test(enable = "avx512f")]
48057    unsafe fn test_mm512_mask_cvtepu16_epi32() {
48058        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48059        let src = _mm512_set1_epi32(-1);
48060        let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
48061        assert_eq_m512i(r, src);
48062        let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
48063        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48064        assert_eq_m512i(r, e);
48065    }
48066
48067    #[simd_test(enable = "avx512f")]
48068    unsafe fn test_mm512_maskz_cvtepu16_epi32() {
48069        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48070        let r = _mm512_maskz_cvtepu16_epi32(0, a);
48071        assert_eq_m512i(r, _mm512_setzero_si512());
48072        let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
48073        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48074        assert_eq_m512i(r, e);
48075    }
48076
48077    #[simd_test(enable = "avx512f,avx512vl")]
48078    unsafe fn test_mm256_mask_cvtepu16_epi32() {
48079        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48080        let src = _mm256_set1_epi32(-1);
48081        let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
48082        assert_eq_m256i(r, src);
48083        let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
48084        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48085        assert_eq_m256i(r, e);
48086    }
48087
48088    #[simd_test(enable = "avx512f,avx512vl")]
48089    unsafe fn test_mm256_maskz_cvtepu16_epi32() {
48090        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48091        let r = _mm256_maskz_cvtepu16_epi32(0, a);
48092        assert_eq_m256i(r, _mm256_setzero_si256());
48093        let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
48094        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48095        assert_eq_m256i(r, e);
48096    }
48097
48098    #[simd_test(enable = "avx512f,avx512vl")]
48099    unsafe fn test_mm_mask_cvtepu16_epi32() {
48100        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48101        let src = _mm_set1_epi32(-1);
48102        let r = _mm_mask_cvtepu16_epi32(src, 0, a);
48103        assert_eq_m128i(r, src);
48104        let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
48105        let e = _mm_set_epi32(12, 13, 14, 15);
48106        assert_eq_m128i(r, e);
48107    }
48108
48109    #[simd_test(enable = "avx512f,avx512vl")]
48110    unsafe fn test_mm_maskz_cvtepu16_epi32() {
48111        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48112        let r = _mm_maskz_cvtepu16_epi32(0, a);
48113        assert_eq_m128i(r, _mm_setzero_si128());
48114        let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
48115        let e = _mm_set_epi32(12, 13, 14, 15);
48116        assert_eq_m128i(r, e);
48117    }
48118
48119    #[simd_test(enable = "avx512f")]
48120    unsafe fn test_mm512_cvtepi32_ps() {
48121        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48122        let r = _mm512_cvtepi32_ps(a);
48123        let e = _mm512_set_ps(
48124            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48125        );
48126        assert_eq_m512(r, e);
48127    }
48128
48129    #[simd_test(enable = "avx512f")]
48130    unsafe fn test_mm512_mask_cvtepi32_ps() {
48131        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48132        let src = _mm512_set1_ps(-1.);
48133        let r = _mm512_mask_cvtepi32_ps(src, 0, a);
48134        assert_eq_m512(r, src);
48135        let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
48136        let e = _mm512_set_ps(
48137            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48138        );
48139        assert_eq_m512(r, e);
48140    }
48141
48142    #[simd_test(enable = "avx512f")]
48143    unsafe fn test_mm512_maskz_cvtepi32_ps() {
48144        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48145        let r = _mm512_maskz_cvtepi32_ps(0, a);
48146        assert_eq_m512(r, _mm512_setzero_ps());
48147        let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
48148        let e = _mm512_set_ps(
48149            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48150        );
48151        assert_eq_m512(r, e);
48152    }
48153
48154    #[simd_test(enable = "avx512f,avx512vl")]
48155    unsafe fn test_mm256_mask_cvtepi32_ps() {
48156        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48157        let src = _mm256_set1_ps(-1.);
48158        let r = _mm256_mask_cvtepi32_ps(src, 0, a);
48159        assert_eq_m256(r, src);
48160        let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
48161        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48162        assert_eq_m256(r, e);
48163    }
48164
48165    #[simd_test(enable = "avx512f,avx512vl")]
48166    unsafe fn test_mm256_maskz_cvtepi32_ps() {
48167        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48168        let r = _mm256_maskz_cvtepi32_ps(0, a);
48169        assert_eq_m256(r, _mm256_setzero_ps());
48170        let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
48171        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48172        assert_eq_m256(r, e);
48173    }
48174
48175    #[simd_test(enable = "avx512f,avx512vl")]
48176    unsafe fn test_mm_mask_cvtepi32_ps() {
48177        let a = _mm_set_epi32(1, 2, 3, 4);
48178        let src = _mm_set1_ps(-1.);
48179        let r = _mm_mask_cvtepi32_ps(src, 0, a);
48180        assert_eq_m128(r, src);
48181        let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
48182        let e = _mm_set_ps(1., 2., 3., 4.);
48183        assert_eq_m128(r, e);
48184    }
48185
48186    #[simd_test(enable = "avx512f,avx512vl")]
48187    unsafe fn test_mm_maskz_cvtepi32_ps() {
48188        let a = _mm_set_epi32(1, 2, 3, 4);
48189        let r = _mm_maskz_cvtepi32_ps(0, a);
48190        assert_eq_m128(r, _mm_setzero_ps());
48191        let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
48192        let e = _mm_set_ps(1., 2., 3., 4.);
48193        assert_eq_m128(r, e);
48194    }
48195
48196    #[simd_test(enable = "avx512f")]
48197    unsafe fn test_mm512_cvtepu32_ps() {
48198        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48199        let r = _mm512_cvtepu32_ps(a);
48200        let e = _mm512_set_ps(
48201            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48202        );
48203        assert_eq_m512(r, e);
48204    }
48205
48206    #[simd_test(enable = "avx512f")]
48207    unsafe fn test_mm512_mask_cvtepu32_ps() {
48208        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48209        let src = _mm512_set1_ps(-1.);
48210        let r = _mm512_mask_cvtepu32_ps(src, 0, a);
48211        assert_eq_m512(r, src);
48212        let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
48213        let e = _mm512_set_ps(
48214            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48215        );
48216        assert_eq_m512(r, e);
48217    }
48218
48219    #[simd_test(enable = "avx512f")]
48220    unsafe fn test_mm512_maskz_cvtepu32_ps() {
48221        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48222        let r = _mm512_maskz_cvtepu32_ps(0, a);
48223        assert_eq_m512(r, _mm512_setzero_ps());
48224        let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
48225        let e = _mm512_set_ps(
48226            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48227        );
48228        assert_eq_m512(r, e);
48229    }
48230
48231    #[simd_test(enable = "avx512f")]
48232    unsafe fn test_mm512_cvtepi32_epi16() {
48233        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48234        let r = _mm512_cvtepi32_epi16(a);
48235        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48236        assert_eq_m256i(r, e);
48237    }
48238
48239    #[simd_test(enable = "avx512f")]
48240    unsafe fn test_mm512_mask_cvtepi32_epi16() {
48241        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48242        let src = _mm256_set1_epi16(-1);
48243        let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
48244        assert_eq_m256i(r, src);
48245        let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
48246        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48247        assert_eq_m256i(r, e);
48248    }
48249
48250    #[simd_test(enable = "avx512f")]
48251    unsafe fn test_mm512_maskz_cvtepi32_epi16() {
48252        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48253        let r = _mm512_maskz_cvtepi32_epi16(0, a);
48254        assert_eq_m256i(r, _mm256_setzero_si256());
48255        let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
48256        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48257        assert_eq_m256i(r, e);
48258    }
48259
48260    #[simd_test(enable = "avx512f,avx512vl")]
48261    unsafe fn test_mm256_cvtepi32_epi16() {
48262        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48263        let r = _mm256_cvtepi32_epi16(a);
48264        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48265        assert_eq_m128i(r, e);
48266    }
48267
48268    #[simd_test(enable = "avx512f,avx512vl")]
48269    unsafe fn test_mm256_mask_cvtepi32_epi16() {
48270        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48271        let src = _mm_set1_epi16(-1);
48272        let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
48273        assert_eq_m128i(r, src);
48274        let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
48275        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48276        assert_eq_m128i(r, e);
48277    }
48278
48279    #[simd_test(enable = "avx512f,avx512vl")]
48280    unsafe fn test_mm256_maskz_cvtepi32_epi16() {
48281        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48282        let r = _mm256_maskz_cvtepi32_epi16(0, a);
48283        assert_eq_m128i(r, _mm_setzero_si128());
48284        let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
48285        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48286        assert_eq_m128i(r, e);
48287    }
48288
48289    #[simd_test(enable = "avx512f,avx512vl")]
48290    unsafe fn test_mm_cvtepi32_epi16() {
48291        let a = _mm_set_epi32(4, 5, 6, 7);
48292        let r = _mm_cvtepi32_epi16(a);
48293        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48294        assert_eq_m128i(r, e);
48295    }
48296
48297    #[simd_test(enable = "avx512f,avx512vl")]
48298    unsafe fn test_mm_mask_cvtepi32_epi16() {
48299        let a = _mm_set_epi32(4, 5, 6, 7);
48300        let src = _mm_set1_epi16(0);
48301        let r = _mm_mask_cvtepi32_epi16(src, 0, a);
48302        assert_eq_m128i(r, src);
48303        let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
48304        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48305        assert_eq_m128i(r, e);
48306    }
48307
48308    #[simd_test(enable = "avx512f,avx512vl")]
48309    unsafe fn test_mm_maskz_cvtepi32_epi16() {
48310        let a = _mm_set_epi32(4, 5, 6, 7);
48311        let r = _mm_maskz_cvtepi32_epi16(0, a);
48312        assert_eq_m128i(r, _mm_setzero_si128());
48313        let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
48314        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48315        assert_eq_m128i(r, e);
48316    }
48317
48318    #[simd_test(enable = "avx512f")]
48319    unsafe fn test_mm512_cvtepi32_epi8() {
48320        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48321        let r = _mm512_cvtepi32_epi8(a);
48322        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48323        assert_eq_m128i(r, e);
48324    }
48325
48326    #[simd_test(enable = "avx512f")]
48327    unsafe fn test_mm512_mask_cvtepi32_epi8() {
48328        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48329        let src = _mm_set1_epi8(-1);
48330        let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
48331        assert_eq_m128i(r, src);
48332        let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
48333        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48334        assert_eq_m128i(r, e);
48335    }
48336
48337    #[simd_test(enable = "avx512f")]
48338    unsafe fn test_mm512_maskz_cvtepi32_epi8() {
48339        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48340        let r = _mm512_maskz_cvtepi32_epi8(0, a);
48341        assert_eq_m128i(r, _mm_setzero_si128());
48342        let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
48343        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48344        assert_eq_m128i(r, e);
48345    }
48346
48347    #[simd_test(enable = "avx512f,avx512vl")]
48348    unsafe fn test_mm256_cvtepi32_epi8() {
48349        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48350        let r = _mm256_cvtepi32_epi8(a);
48351        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48352        assert_eq_m128i(r, e);
48353    }
48354
48355    #[simd_test(enable = "avx512f,avx512vl")]
48356    unsafe fn test_mm256_mask_cvtepi32_epi8() {
48357        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48358        let src = _mm_set1_epi8(0);
48359        let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
48360        assert_eq_m128i(r, src);
48361        let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
48362        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48363        assert_eq_m128i(r, e);
48364    }
48365
48366    #[simd_test(enable = "avx512f,avx512vl")]
48367    unsafe fn test_mm256_maskz_cvtepi32_epi8() {
48368        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48369        let r = _mm256_maskz_cvtepi32_epi8(0, a);
48370        assert_eq_m128i(r, _mm_setzero_si128());
48371        let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
48372        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48373        assert_eq_m128i(r, e);
48374    }
48375
48376    #[simd_test(enable = "avx512f,avx512vl")]
48377    unsafe fn test_mm_cvtepi32_epi8() {
48378        let a = _mm_set_epi32(4, 5, 6, 7);
48379        let r = _mm_cvtepi32_epi8(a);
48380        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48381        assert_eq_m128i(r, e);
48382    }
48383
48384    #[simd_test(enable = "avx512f,avx512vl")]
48385    unsafe fn test_mm_mask_cvtepi32_epi8() {
48386        let a = _mm_set_epi32(4, 5, 6, 7);
48387        let src = _mm_set1_epi8(0);
48388        let r = _mm_mask_cvtepi32_epi8(src, 0, a);
48389        assert_eq_m128i(r, src);
48390        let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
48391        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48392        assert_eq_m128i(r, e);
48393    }
48394
48395    #[simd_test(enable = "avx512f,avx512vl")]
48396    unsafe fn test_mm_maskz_cvtepi32_epi8() {
48397        let a = _mm_set_epi32(4, 5, 6, 7);
48398        let r = _mm_maskz_cvtepi32_epi8(0, a);
48399        assert_eq_m128i(r, _mm_setzero_si128());
48400        let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
48401        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48402        assert_eq_m128i(r, e);
48403    }
48404
48405    #[simd_test(enable = "avx512f")]
48406    unsafe fn test_mm512_cvtsepi32_epi16() {
48407        #[rustfmt::skip]
48408        let a = _mm512_set_epi32(
48409            0, 1, 2, 3,
48410            4, 5, 6, 7,
48411            8, 9, 10, 11,
48412            12, 13, i32::MIN, i32::MAX,
48413        );
48414        let r = _mm512_cvtsepi32_epi16(a);
48415        #[rustfmt::skip]
48416        let e = _mm256_set_epi16(
48417            0, 1, 2, 3,
48418            4, 5, 6, 7,
48419            8, 9, 10, 11,
48420            12, 13, i16::MIN, i16::MAX,
48421        );
48422        assert_eq_m256i(r, e);
48423    }
48424
48425    #[simd_test(enable = "avx512f")]
48426    unsafe fn test_mm512_mask_cvtsepi32_epi16() {
48427        #[rustfmt::skip]
48428        let a = _mm512_set_epi32(
48429            0, 1, 2, 3,
48430            4, 5, 6, 7,
48431            8, 9, 10, 11,
48432            12, 13, i32::MIN, i32::MAX,
48433        );
48434        let src = _mm256_set1_epi16(-1);
48435        let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
48436        assert_eq_m256i(r, src);
48437        let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
48438        #[rustfmt::skip]
48439        let e = _mm256_set_epi16(
48440            -1, -1, -1, -1,
48441            -1, -1, -1, -1,
48442            8, 9, 10, 11,
48443            12, 13, i16::MIN, i16::MAX,
48444        );
48445        assert_eq_m256i(r, e);
48446    }
48447
48448    #[simd_test(enable = "avx512f")]
48449    unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
48450        #[rustfmt::skip]
48451        let a = _mm512_set_epi32(
48452            0, 1, 2, 3,
48453            4, 5, 6, 7,
48454            8, 9, 10, 11,
48455            12, 13, i32::MIN, i32::MAX,
48456        );
48457        let r = _mm512_maskz_cvtsepi32_epi16(0, a);
48458        assert_eq_m256i(r, _mm256_setzero_si256());
48459        let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
48460        #[rustfmt::skip]
48461        let e = _mm256_set_epi16(
48462            0, 0, 0, 0,
48463            0, 0, 0, 0,
48464            8, 9, 10, 11,
48465            12, 13, i16::MIN, i16::MAX,
48466        );
48467        assert_eq_m256i(r, e);
48468    }
48469
48470    #[simd_test(enable = "avx512f,avx512vl")]
48471    unsafe fn test_mm256_cvtsepi32_epi16() {
48472        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48473        let r = _mm256_cvtsepi32_epi16(a);
48474        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48475        assert_eq_m128i(r, e);
48476    }
48477
48478    #[simd_test(enable = "avx512f,avx512vl")]
48479    unsafe fn test_mm256_mask_cvtsepi32_epi16() {
48480        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48481        let src = _mm_set1_epi16(-1);
48482        let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
48483        assert_eq_m128i(r, src);
48484        let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
48485        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48486        assert_eq_m128i(r, e);
48487    }
48488
48489    #[simd_test(enable = "avx512f,avx512vl")]
48490    unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
48491        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48492        let r = _mm256_maskz_cvtsepi32_epi16(0, a);
48493        assert_eq_m128i(r, _mm_setzero_si128());
48494        let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
48495        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48496        assert_eq_m128i(r, e);
48497    }
48498
48499    #[simd_test(enable = "avx512f,avx512vl")]
48500    unsafe fn test_mm_cvtsepi32_epi16() {
48501        let a = _mm_set_epi32(4, 5, 6, 7);
48502        let r = _mm_cvtsepi32_epi16(a);
48503        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48504        assert_eq_m128i(r, e);
48505    }
48506
48507    #[simd_test(enable = "avx512f,avx512vl")]
48508    unsafe fn test_mm_mask_cvtsepi32_epi16() {
48509        let a = _mm_set_epi32(4, 5, 6, 7);
48510        let src = _mm_set1_epi16(0);
48511        let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
48512        assert_eq_m128i(r, src);
48513        let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
48514        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48515        assert_eq_m128i(r, e);
48516    }
48517
48518    #[simd_test(enable = "avx512f,avx512vl")]
48519    unsafe fn test_mm_maskz_cvtsepi32_epi16() {
48520        let a = _mm_set_epi32(4, 5, 6, 7);
48521        let r = _mm_maskz_cvtsepi32_epi16(0, a);
48522        assert_eq_m128i(r, _mm_setzero_si128());
48523        let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
48524        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48525        assert_eq_m128i(r, e);
48526    }
48527
48528    #[simd_test(enable = "avx512f")]
48529    unsafe fn test_mm512_cvtsepi32_epi8() {
48530        #[rustfmt::skip]
48531        let a = _mm512_set_epi32(
48532            0, 1, 2, 3,
48533            4, 5, 6, 7,
48534            8, 9, 10, 11,
48535            12, 13, i32::MIN, i32::MAX,
48536        );
48537        let r = _mm512_cvtsepi32_epi8(a);
48538        #[rustfmt::skip]
48539        let e = _mm_set_epi8(
48540            0, 1, 2, 3,
48541            4, 5, 6, 7,
48542            8, 9, 10, 11,
48543            12, 13, i8::MIN, i8::MAX,
48544        );
48545        assert_eq_m128i(r, e);
48546    }
48547
48548    #[simd_test(enable = "avx512f")]
48549    unsafe fn test_mm512_mask_cvtsepi32_epi8() {
48550        #[rustfmt::skip]
48551        let a = _mm512_set_epi32(
48552            0, 1, 2, 3,
48553            4, 5, 6, 7,
48554            8, 9, 10, 11,
48555            12, 13, i32::MIN, i32::MAX,
48556        );
48557        let src = _mm_set1_epi8(-1);
48558        let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
48559        assert_eq_m128i(r, src);
48560        let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
48561        #[rustfmt::skip]
48562        let e = _mm_set_epi8(
48563            -1, -1, -1, -1,
48564            -1, -1, -1, -1,
48565            8, 9, 10, 11,
48566            12, 13, i8::MIN, i8::MAX,
48567        );
48568        assert_eq_m128i(r, e);
48569    }
48570
48571    #[simd_test(enable = "avx512f")]
48572    unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
48573        #[rustfmt::skip]
48574        let a = _mm512_set_epi32(
48575            0, 1, 2, 3,
48576            4, 5, 6, 7,
48577            8, 9, 10, 11,
48578            12, 13, i32::MIN, i32::MAX,
48579        );
48580        let r = _mm512_maskz_cvtsepi32_epi8(0, a);
48581        assert_eq_m128i(r, _mm_setzero_si128());
48582        let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
48583        #[rustfmt::skip]
48584        let e = _mm_set_epi8(
48585            0, 0, 0, 0,
48586            0, 0, 0, 0,
48587            8, 9, 10, 11,
48588            12, 13, i8::MIN, i8::MAX,
48589        );
48590        assert_eq_m128i(r, e);
48591    }
48592
48593    #[simd_test(enable = "avx512f,avx512vl")]
48594    unsafe fn test_mm256_cvtsepi32_epi8() {
48595        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48596        let r = _mm256_cvtsepi32_epi8(a);
48597        #[rustfmt::skip]
48598        let e = _mm_set_epi8(
48599            0, 0, 0, 0,
48600            0, 0, 0, 0,
48601            9, 10, 11, 12,
48602            13, 14, 15, 16,
48603        );
48604        assert_eq_m128i(r, e);
48605    }
48606
48607    #[simd_test(enable = "avx512f,avx512vl")]
48608    unsafe fn test_mm256_mask_cvtsepi32_epi8() {
48609        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48610        let src = _mm_set1_epi8(0);
48611        let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
48612        assert_eq_m128i(r, src);
48613        let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
48614        #[rustfmt::skip]
48615        let e = _mm_set_epi8(
48616            0, 0, 0, 0,
48617            0, 0, 0, 0,
48618            9, 10, 11, 12,
48619            13, 14, 15, 16,
48620        );
48621        assert_eq_m128i(r, e);
48622    }
48623
48624    #[simd_test(enable = "avx512f,avx512vl")]
48625    unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
48626        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48627        let r = _mm256_maskz_cvtsepi32_epi8(0, a);
48628        assert_eq_m128i(r, _mm_setzero_si128());
48629        let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
48630        #[rustfmt::skip]
48631        let e = _mm_set_epi8(
48632            0, 0, 0, 0,
48633            0, 0, 0, 0,
48634            9, 10, 11, 12,
48635            13, 14, 15, 16,
48636        );
48637        assert_eq_m128i(r, e);
48638    }
48639
48640    #[simd_test(enable = "avx512f,avx512vl")]
48641    unsafe fn test_mm_cvtsepi32_epi8() {
48642        let a = _mm_set_epi32(13, 14, 15, 16);
48643        let r = _mm_cvtsepi32_epi8(a);
48644        #[rustfmt::skip]
48645        let e = _mm_set_epi8(
48646            0, 0, 0, 0,
48647            0, 0, 0, 0,
48648            0, 0, 0, 0,
48649            13, 14, 15, 16,
48650        );
48651        assert_eq_m128i(r, e);
48652    }
48653
48654    #[simd_test(enable = "avx512f,avx512vl")]
48655    unsafe fn test_mm_mask_cvtsepi32_epi8() {
48656        let a = _mm_set_epi32(13, 14, 15, 16);
48657        let src = _mm_set1_epi8(0);
48658        let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
48659        assert_eq_m128i(r, src);
48660        let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
48661        #[rustfmt::skip]
48662        let e = _mm_set_epi8(
48663            0, 0, 0, 0,
48664            0, 0, 0, 0,
48665            0, 0, 0, 0,
48666            13, 14, 15, 16,
48667        );
48668        assert_eq_m128i(r, e);
48669    }
48670
48671    #[simd_test(enable = "avx512f,avx512vl")]
48672    unsafe fn test_mm_maskz_cvtsepi32_epi8() {
48673        let a = _mm_set_epi32(13, 14, 15, 16);
48674        let r = _mm_maskz_cvtsepi32_epi8(0, a);
48675        assert_eq_m128i(r, _mm_setzero_si128());
48676        let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
48677        #[rustfmt::skip]
48678        let e = _mm_set_epi8(
48679            0, 0, 0, 0,
48680            0, 0, 0, 0,
48681            0, 0, 0, 0,
48682            13, 14, 15, 16,
48683        );
48684        assert_eq_m128i(r, e);
48685    }
48686
48687    #[simd_test(enable = "avx512f")]
48688    unsafe fn test_mm512_cvtusepi32_epi16() {
48689        #[rustfmt::skip]
48690        let a = _mm512_set_epi32(
48691            0, 1, 2, 3,
48692            4, 5, 6, 7,
48693            8, 9, 10, 11,
48694            12, 13, i32::MIN, i32::MIN,
48695        );
48696        let r = _mm512_cvtusepi32_epi16(a);
48697        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48698        assert_eq_m256i(r, e);
48699    }
48700
48701    #[simd_test(enable = "avx512f")]
48702    unsafe fn test_mm512_mask_cvtusepi32_epi16() {
48703        #[rustfmt::skip]
48704        let a = _mm512_set_epi32(
48705            0, 1, 2, 3,
48706            4, 5, 6, 7,
48707            8, 9, 10, 11,
48708            12, 13, i32::MIN, i32::MIN,
48709        );
48710        let src = _mm256_set1_epi16(-1);
48711        let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
48712        assert_eq_m256i(r, src);
48713        let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
48714        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48715        assert_eq_m256i(r, e);
48716    }
48717
48718    #[simd_test(enable = "avx512f")]
48719    unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
48720        #[rustfmt::skip]
48721        let a = _mm512_set_epi32(
48722            0, 1, 2, 3,
48723            4, 5, 6, 7,
48724            8, 9, 10, 11,
48725            12, 13, i32::MIN, i32::MIN,
48726        );
48727        let r = _mm512_maskz_cvtusepi32_epi16(0, a);
48728        assert_eq_m256i(r, _mm256_setzero_si256());
48729        let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
48730        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48731        assert_eq_m256i(r, e);
48732    }
48733
48734    #[simd_test(enable = "avx512f,avx512vl")]
48735    unsafe fn test_mm256_cvtusepi32_epi16() {
48736        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48737        let r = _mm256_cvtusepi32_epi16(a);
48738        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48739        assert_eq_m128i(r, e);
48740    }
48741
48742    #[simd_test(enable = "avx512f,avx512vl")]
48743    unsafe fn test_mm256_mask_cvtusepi32_epi16() {
48744        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48745        let src = _mm_set1_epi16(0);
48746        let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
48747        assert_eq_m128i(r, src);
48748        let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
48749        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48750        assert_eq_m128i(r, e);
48751    }
48752
48753    #[simd_test(enable = "avx512f,avx512vl")]
48754    unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
48755        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48756        let r = _mm256_maskz_cvtusepi32_epi16(0, a);
48757        assert_eq_m128i(r, _mm_setzero_si128());
48758        let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
48759        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48760        assert_eq_m128i(r, e);
48761    }
48762
48763    #[simd_test(enable = "avx512f,avx512vl")]
48764    unsafe fn test_mm_cvtusepi32_epi16() {
48765        let a = _mm_set_epi32(5, 6, 7, 8);
48766        let r = _mm_cvtusepi32_epi16(a);
48767        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48768        assert_eq_m128i(r, e);
48769    }
48770
48771    #[simd_test(enable = "avx512f,avx512vl")]
48772    unsafe fn test_mm_mask_cvtusepi32_epi16() {
48773        let a = _mm_set_epi32(5, 6, 7, 8);
48774        let src = _mm_set1_epi16(0);
48775        let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
48776        assert_eq_m128i(r, src);
48777        let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
48778        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48779        assert_eq_m128i(r, e);
48780    }
48781
48782    #[simd_test(enable = "avx512f,avx512vl")]
48783    unsafe fn test_mm_maskz_cvtusepi32_epi16() {
48784        let a = _mm_set_epi32(5, 6, 7, 8);
48785        let r = _mm_maskz_cvtusepi32_epi16(0, a);
48786        assert_eq_m128i(r, _mm_setzero_si128());
48787        let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
48788        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48789        assert_eq_m128i(r, e);
48790    }
48791
48792    #[simd_test(enable = "avx512f")]
48793    unsafe fn test_mm512_cvtusepi32_epi8() {
48794        #[rustfmt::skip]
48795        let a = _mm512_set_epi32(
48796            0, 1, 2, 3,
48797            4, 5, 6, 7,
48798            8, 9, 10, 11,
48799            12, 13, i32::MIN, i32::MIN,
48800        );
48801        let r = _mm512_cvtusepi32_epi8(a);
48802        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48803        assert_eq_m128i(r, e);
48804    }
48805
48806    #[simd_test(enable = "avx512f")]
48807    unsafe fn test_mm512_mask_cvtusepi32_epi8() {
48808        #[rustfmt::skip]
48809        let a = _mm512_set_epi32(
48810            0, 1, 2, 3,
48811            4, 5, 6, 7,
48812            8, 9, 10, 11,
48813            12, 13, i32::MIN, i32::MIN,
48814        );
48815        let src = _mm_set1_epi8(-1);
48816        let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
48817        assert_eq_m128i(r, src);
48818        let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
48819        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48820        assert_eq_m128i(r, e);
48821    }
48822
48823    #[simd_test(enable = "avx512f")]
48824    unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
48825        #[rustfmt::skip]
48826        let a = _mm512_set_epi32(
48827            0, 1, 2, 3,
48828            4, 5, 6, 7,
48829            8, 9, 10, 11,
48830            12, 13, i32::MIN, i32::MIN,
48831        );
48832        let r = _mm512_maskz_cvtusepi32_epi8(0, a);
48833        assert_eq_m128i(r, _mm_setzero_si128());
48834        let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
48835        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48836        assert_eq_m128i(r, e);
48837    }
48838
48839    #[simd_test(enable = "avx512f,avx512vl")]
48840    unsafe fn test_mm256_cvtusepi32_epi8() {
48841        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48842        let r = _mm256_cvtusepi32_epi8(a);
48843        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48844        assert_eq_m128i(r, e);
48845    }
48846
48847    #[simd_test(enable = "avx512f,avx512vl")]
48848    unsafe fn test_mm256_mask_cvtusepi32_epi8() {
48849        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48850        let src = _mm_set1_epi8(0);
48851        let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
48852        assert_eq_m128i(r, src);
48853        let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
48854        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48855        assert_eq_m128i(r, e);
48856    }
48857
48858    #[simd_test(enable = "avx512f,avx512vl")]
48859    unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
48860        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48861        let r = _mm256_maskz_cvtusepi32_epi8(0, a);
48862        assert_eq_m128i(r, _mm_setzero_si128());
48863        let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
48864        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48865        assert_eq_m128i(r, e);
48866    }
48867
48868    #[simd_test(enable = "avx512f,avx512vl")]
48869    unsafe fn test_mm_cvtusepi32_epi8() {
48870        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48871        let r = _mm_cvtusepi32_epi8(a);
48872        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48873        assert_eq_m128i(r, e);
48874    }
48875
48876    #[simd_test(enable = "avx512f,avx512vl")]
48877    unsafe fn test_mm_mask_cvtusepi32_epi8() {
48878        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48879        let src = _mm_set1_epi8(0);
48880        let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
48881        assert_eq_m128i(r, src);
48882        let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
48883        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48884        assert_eq_m128i(r, e);
48885    }
48886
48887    #[simd_test(enable = "avx512f,avx512vl")]
48888    unsafe fn test_mm_maskz_cvtusepi32_epi8() {
48889        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48890        let r = _mm_maskz_cvtusepi32_epi8(0, a);
48891        assert_eq_m128i(r, _mm_setzero_si128());
48892        let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
48893        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48894        assert_eq_m128i(r, e);
48895    }
48896
48897    #[simd_test(enable = "avx512f")]
48898    unsafe fn test_mm512_cvt_roundps_epi32() {
48899        let a = _mm512_setr_ps(
48900            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48901        );
48902        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48903        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48904        assert_eq_m512i(r, e);
48905        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48906        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
48907        assert_eq_m512i(r, e);
48908    }
48909
48910    #[simd_test(enable = "avx512f")]
48911    unsafe fn test_mm512_mask_cvt_roundps_epi32() {
48912        let a = _mm512_setr_ps(
48913            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48914        );
48915        let src = _mm512_set1_epi32(0);
48916        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48917            src, 0, a,
48918        );
48919        assert_eq_m512i(r, src);
48920        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48921            src,
48922            0b00000000_11111111,
48923            a,
48924        );
48925        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48926        assert_eq_m512i(r, e);
48927    }
48928
48929    #[simd_test(enable = "avx512f")]
48930    unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
48931        let a = _mm512_setr_ps(
48932            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48933        );
48934        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48935            0, a,
48936        );
48937        assert_eq_m512i(r, _mm512_setzero_si512());
48938        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48939            0b00000000_11111111,
48940            a,
48941        );
48942        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48943        assert_eq_m512i(r, e);
48944    }
48945
48946    #[simd_test(enable = "avx512f")]
48947    unsafe fn test_mm512_cvt_roundps_epu32() {
48948        let a = _mm512_setr_ps(
48949            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48950        );
48951        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48952        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
48953        assert_eq_m512i(r, e);
48954        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48955        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48956        assert_eq_m512i(r, e);
48957    }
48958
48959    #[simd_test(enable = "avx512f")]
48960    unsafe fn test_mm512_mask_cvt_roundps_epu32() {
48961        let a = _mm512_setr_ps(
48962            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48963        );
48964        let src = _mm512_set1_epi32(0);
48965        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48966            src, 0, a,
48967        );
48968        assert_eq_m512i(r, src);
48969        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48970            src,
48971            0b00000000_11111111,
48972            a,
48973        );
48974        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48975        assert_eq_m512i(r, e);
48976    }
48977
48978    #[simd_test(enable = "avx512f")]
48979    unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
48980        let a = _mm512_setr_ps(
48981            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48982        );
48983        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48984            0, a,
48985        );
48986        assert_eq_m512i(r, _mm512_setzero_si512());
48987        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48988            0b00000000_11111111,
48989            a,
48990        );
48991        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48992        assert_eq_m512i(r, e);
48993    }
48994
48995    #[simd_test(enable = "avx512f")]
48996    unsafe fn test_mm512_cvt_roundepi32_ps() {
48997        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48998        let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48999        let e = _mm512_setr_ps(
49000            0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
49001        );
49002        assert_eq_m512(r, e);
49003    }
49004
49005    #[simd_test(enable = "avx512f")]
49006    unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
49007        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49008        let src = _mm512_set1_ps(0.);
49009        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49010            src, 0, a,
49011        );
49012        assert_eq_m512(r, src);
49013        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49014            src,
49015            0b00000000_11111111,
49016            a,
49017        );
49018        let e = _mm512_setr_ps(
49019            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49020        );
49021        assert_eq_m512(r, e);
49022    }
49023
49024    #[simd_test(enable = "avx512f")]
49025    unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
49026        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49027        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49028            0, a,
49029        );
49030        assert_eq_m512(r, _mm512_setzero_ps());
49031        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49032            0b00000000_11111111,
49033            a,
49034        );
49035        let e = _mm512_setr_ps(
49036            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49037        );
49038        assert_eq_m512(r, e);
49039    }
49040
49041    #[simd_test(enable = "avx512f")]
49042    unsafe fn test_mm512_cvt_roundepu32_ps() {
49043        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49044        let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49045        #[rustfmt::skip]
49046        let e = _mm512_setr_ps(
49047            0., 4294967300., 2., 4294967300.,
49048            4., 4294967300., 6., 4294967300.,
49049            8., 10., 10., 12.,
49050            12., 14., 14., 16.,
49051        );
49052        assert_eq_m512(r, e);
49053    }
49054
49055    #[simd_test(enable = "avx512f")]
49056    unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
49057        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49058        let src = _mm512_set1_ps(0.);
49059        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49060            src, 0, a,
49061        );
49062        assert_eq_m512(r, src);
49063        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49064            src,
49065            0b00000000_11111111,
49066            a,
49067        );
49068        #[rustfmt::skip]
49069        let e = _mm512_setr_ps(
49070            0., 4294967300., 2., 4294967300.,
49071            4., 4294967300., 6., 4294967300.,
49072            0., 0., 0., 0.,
49073            0., 0., 0., 0.,
49074        );
49075        assert_eq_m512(r, e);
49076    }
49077
49078    #[simd_test(enable = "avx512f")]
49079    unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
49080        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49081        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49082            0, a,
49083        );
49084        assert_eq_m512(r, _mm512_setzero_ps());
49085        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49086            0b00000000_11111111,
49087            a,
49088        );
49089        #[rustfmt::skip]
49090        let e = _mm512_setr_ps(
49091            0., 4294967300., 2., 4294967300.,
49092            4., 4294967300., 6., 4294967300.,
49093            0., 0., 0., 0.,
49094            0., 0., 0., 0.,
49095        );
49096        assert_eq_m512(r, e);
49097    }
49098
49099    #[simd_test(enable = "avx512f")]
49100    unsafe fn test_mm512_cvt_roundps_ph() {
49101        let a = _mm512_set1_ps(1.);
49102        let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
49103        let e = _mm256_setr_epi64x(
49104            4323521613979991040,
49105            4323521613979991040,
49106            4323521613979991040,
49107            4323521613979991040,
49108        );
49109        assert_eq_m256i(r, e);
49110    }
49111
49112    #[simd_test(enable = "avx512f")]
49113    unsafe fn test_mm512_mask_cvt_roundps_ph() {
49114        let a = _mm512_set1_ps(1.);
49115        let src = _mm256_set1_epi16(0);
49116        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49117        assert_eq_m256i(r, src);
49118        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49119        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49120        assert_eq_m256i(r, e);
49121    }
49122
49123    #[simd_test(enable = "avx512f")]
49124    unsafe fn test_mm512_maskz_cvt_roundps_ph() {
49125        let a = _mm512_set1_ps(1.);
49126        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49127        assert_eq_m256i(r, _mm256_setzero_si256());
49128        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49129        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49130        assert_eq_m256i(r, e);
49131    }
49132
49133    #[simd_test(enable = "avx512f,avx512vl")]
49134    unsafe fn test_mm256_mask_cvt_roundps_ph() {
49135        let a = _mm256_set1_ps(1.);
49136        let src = _mm_set1_epi16(0);
49137        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49138        assert_eq_m128i(r, src);
49139        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49140        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49141        assert_eq_m128i(r, e);
49142    }
49143
49144    #[simd_test(enable = "avx512f,avx512vl")]
49145    unsafe fn test_mm256_maskz_cvt_roundps_ph() {
49146        let a = _mm256_set1_ps(1.);
49147        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49148        assert_eq_m128i(r, _mm_setzero_si128());
49149        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49150        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49151        assert_eq_m128i(r, e);
49152    }
49153
49154    #[simd_test(enable = "avx512f,avx512vl")]
49155    unsafe fn test_mm_mask_cvt_roundps_ph() {
49156        let a = _mm_set1_ps(1.);
49157        let src = _mm_set1_epi16(0);
49158        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49159        assert_eq_m128i(r, src);
49160        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49161        let e = _mm_setr_epi64x(4323521613979991040, 0);
49162        assert_eq_m128i(r, e);
49163    }
49164
49165    #[simd_test(enable = "avx512f,avx512vl")]
49166    unsafe fn test_mm_maskz_cvt_roundps_ph() {
49167        let a = _mm_set1_ps(1.);
49168        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49169        assert_eq_m128i(r, _mm_setzero_si128());
49170        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49171        let e = _mm_setr_epi64x(4323521613979991040, 0);
49172        assert_eq_m128i(r, e);
49173    }
49174
49175    #[simd_test(enable = "avx512f")]
49176    unsafe fn test_mm512_cvtps_ph() {
49177        let a = _mm512_set1_ps(1.);
49178        let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
49179        let e = _mm256_setr_epi64x(
49180            4323521613979991040,
49181            4323521613979991040,
49182            4323521613979991040,
49183            4323521613979991040,
49184        );
49185        assert_eq_m256i(r, e);
49186    }
49187
49188    #[simd_test(enable = "avx512f")]
49189    unsafe fn test_mm512_mask_cvtps_ph() {
49190        let a = _mm512_set1_ps(1.);
49191        let src = _mm256_set1_epi16(0);
49192        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49193        assert_eq_m256i(r, src);
49194        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49195        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49196        assert_eq_m256i(r, e);
49197    }
49198
49199    #[simd_test(enable = "avx512f")]
49200    unsafe fn test_mm512_maskz_cvtps_ph() {
49201        let a = _mm512_set1_ps(1.);
49202        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49203        assert_eq_m256i(r, _mm256_setzero_si256());
49204        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49205        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49206        assert_eq_m256i(r, e);
49207    }
49208
49209    #[simd_test(enable = "avx512f,avx512vl")]
49210    unsafe fn test_mm256_mask_cvtps_ph() {
49211        let a = _mm256_set1_ps(1.);
49212        let src = _mm_set1_epi16(0);
49213        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49214        assert_eq_m128i(r, src);
49215        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49216        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49217        assert_eq_m128i(r, e);
49218    }
49219
49220    #[simd_test(enable = "avx512f,avx512vl")]
49221    unsafe fn test_mm256_maskz_cvtps_ph() {
49222        let a = _mm256_set1_ps(1.);
49223        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49224        assert_eq_m128i(r, _mm_setzero_si128());
49225        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49226        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49227        assert_eq_m128i(r, e);
49228    }
49229
49230    #[simd_test(enable = "avx512f,avx512vl")]
49231    unsafe fn test_mm_mask_cvtps_ph() {
49232        let a = _mm_set1_ps(1.);
49233        let src = _mm_set1_epi16(0);
49234        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49235        assert_eq_m128i(r, src);
49236        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49237        let e = _mm_setr_epi64x(4323521613979991040, 0);
49238        assert_eq_m128i(r, e);
49239    }
49240
49241    #[simd_test(enable = "avx512f,avx512vl")]
49242    unsafe fn test_mm_maskz_cvtps_ph() {
49243        let a = _mm_set1_ps(1.);
49244        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49245        assert_eq_m128i(r, _mm_setzero_si128());
49246        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49247        let e = _mm_setr_epi64x(4323521613979991040, 0);
49248        assert_eq_m128i(r, e);
49249    }
49250
49251    #[simd_test(enable = "avx512f")]
49252    unsafe fn test_mm512_cvt_roundph_ps() {
49253        let a = _mm256_setr_epi64x(
49254            4323521613979991040,
49255            4323521613979991040,
49256            4323521613979991040,
49257            4323521613979991040,
49258        );
49259        let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
49260        let e = _mm512_set1_ps(1.);
49261        assert_eq_m512(r, e);
49262    }
49263
49264    #[simd_test(enable = "avx512f")]
49265    unsafe fn test_mm512_mask_cvt_roundph_ps() {
49266        let a = _mm256_setr_epi64x(
49267            4323521613979991040,
49268            4323521613979991040,
49269            4323521613979991040,
49270            4323521613979991040,
49271        );
49272        let src = _mm512_set1_ps(0.);
49273        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
49274        assert_eq_m512(r, src);
49275        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49276        let e = _mm512_setr_ps(
49277            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49278        );
49279        assert_eq_m512(r, e);
49280    }
49281
49282    #[simd_test(enable = "avx512f")]
49283    unsafe fn test_mm512_maskz_cvt_roundph_ps() {
49284        let a = _mm256_setr_epi64x(
49285            4323521613979991040,
49286            4323521613979991040,
49287            4323521613979991040,
49288            4323521613979991040,
49289        );
49290        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
49291        assert_eq_m512(r, _mm512_setzero_ps());
49292        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49293        let e = _mm512_setr_ps(
49294            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49295        );
49296        assert_eq_m512(r, e);
49297    }
49298
49299    #[simd_test(enable = "avx512f")]
49300    unsafe fn test_mm512_cvtph_ps() {
49301        let a = _mm256_setr_epi64x(
49302            4323521613979991040,
49303            4323521613979991040,
49304            4323521613979991040,
49305            4323521613979991040,
49306        );
49307        let r = _mm512_cvtph_ps(a);
49308        let e = _mm512_set1_ps(1.);
49309        assert_eq_m512(r, e);
49310    }
49311
49312    #[simd_test(enable = "avx512f")]
49313    unsafe fn test_mm512_mask_cvtph_ps() {
49314        let a = _mm256_setr_epi64x(
49315            4323521613979991040,
49316            4323521613979991040,
49317            4323521613979991040,
49318            4323521613979991040,
49319        );
49320        let src = _mm512_set1_ps(0.);
49321        let r = _mm512_mask_cvtph_ps(src, 0, a);
49322        assert_eq_m512(r, src);
49323        let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
49324        let e = _mm512_setr_ps(
49325            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49326        );
49327        assert_eq_m512(r, e);
49328    }
49329
49330    #[simd_test(enable = "avx512f")]
49331    unsafe fn test_mm512_maskz_cvtph_ps() {
49332        let a = _mm256_setr_epi64x(
49333            4323521613979991040,
49334            4323521613979991040,
49335            4323521613979991040,
49336            4323521613979991040,
49337        );
49338        let r = _mm512_maskz_cvtph_ps(0, a);
49339        assert_eq_m512(r, _mm512_setzero_ps());
49340        let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
49341        let e = _mm512_setr_ps(
49342            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49343        );
49344        assert_eq_m512(r, e);
49345    }
49346
49347    #[simd_test(enable = "avx512f,avx512vl")]
49348    unsafe fn test_mm256_mask_cvtph_ps() {
49349        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49350        let src = _mm256_set1_ps(0.);
49351        let r = _mm256_mask_cvtph_ps(src, 0, a);
49352        assert_eq_m256(r, src);
49353        let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
49354        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49355        assert_eq_m256(r, e);
49356    }
49357
49358    #[simd_test(enable = "avx512f,avx512vl")]
49359    unsafe fn test_mm256_maskz_cvtph_ps() {
49360        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49361        let r = _mm256_maskz_cvtph_ps(0, a);
49362        assert_eq_m256(r, _mm256_setzero_ps());
49363        let r = _mm256_maskz_cvtph_ps(0b11111111, a);
49364        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49365        assert_eq_m256(r, e);
49366    }
49367
49368    #[simd_test(enable = "avx512f,avx512vl")]
49369    unsafe fn test_mm_mask_cvtph_ps() {
49370        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49371        let src = _mm_set1_ps(0.);
49372        let r = _mm_mask_cvtph_ps(src, 0, a);
49373        assert_eq_m128(r, src);
49374        let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
49375        let e = _mm_setr_ps(1., 1., 1., 1.);
49376        assert_eq_m128(r, e);
49377    }
49378
49379    #[simd_test(enable = "avx512f,avx512vl")]
49380    unsafe fn test_mm_maskz_cvtph_ps() {
49381        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49382        let r = _mm_maskz_cvtph_ps(0, a);
49383        assert_eq_m128(r, _mm_setzero_ps());
49384        let r = _mm_maskz_cvtph_ps(0b00001111, a);
49385        let e = _mm_setr_ps(1., 1., 1., 1.);
49386        assert_eq_m128(r, e);
49387    }
49388
49389    #[simd_test(enable = "avx512f")]
49390    unsafe fn test_mm512_cvtt_roundps_epi32() {
49391        let a = _mm512_setr_ps(
49392            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49393        );
49394        let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
49395        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49396        assert_eq_m512i(r, e);
49397    }
49398
49399    #[simd_test(enable = "avx512f")]
49400    unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
49401        let a = _mm512_setr_ps(
49402            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49403        );
49404        let src = _mm512_set1_epi32(0);
49405        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
49406        assert_eq_m512i(r, src);
49407        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49408        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49409        assert_eq_m512i(r, e);
49410    }
49411
49412    #[simd_test(enable = "avx512f")]
49413    unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
49414        let a = _mm512_setr_ps(
49415            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49416        );
49417        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
49418        assert_eq_m512i(r, _mm512_setzero_si512());
49419        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49420        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49421        assert_eq_m512i(r, e);
49422    }
49423
49424    #[simd_test(enable = "avx512f")]
49425    unsafe fn test_mm512_cvtt_roundps_epu32() {
49426        let a = _mm512_setr_ps(
49427            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49428        );
49429        let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
49430        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49431        assert_eq_m512i(r, e);
49432    }
49433
49434    #[simd_test(enable = "avx512f")]
49435    unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
49436        let a = _mm512_setr_ps(
49437            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49438        );
49439        let src = _mm512_set1_epi32(0);
49440        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
49441        assert_eq_m512i(r, src);
49442        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49443        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49444        assert_eq_m512i(r, e);
49445    }
49446
49447    #[simd_test(enable = "avx512f")]
49448    unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
49449        let a = _mm512_setr_ps(
49450            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49451        );
49452        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
49453        assert_eq_m512i(r, _mm512_setzero_si512());
49454        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49455        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49456        assert_eq_m512i(r, e);
49457    }
49458
49459    #[simd_test(enable = "avx512f")]
49460    unsafe fn test_mm512_cvttps_epi32() {
49461        let a = _mm512_setr_ps(
49462            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49463        );
49464        let r = _mm512_cvttps_epi32(a);
49465        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49466        assert_eq_m512i(r, e);
49467    }
49468
49469    #[simd_test(enable = "avx512f")]
49470    unsafe fn test_mm512_mask_cvttps_epi32() {
49471        let a = _mm512_setr_ps(
49472            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49473        );
49474        let src = _mm512_set1_epi32(0);
49475        let r = _mm512_mask_cvttps_epi32(src, 0, a);
49476        assert_eq_m512i(r, src);
49477        let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
49478        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49479        assert_eq_m512i(r, e);
49480    }
49481
49482    #[simd_test(enable = "avx512f")]
49483    unsafe fn test_mm512_maskz_cvttps_epi32() {
49484        let a = _mm512_setr_ps(
49485            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49486        );
49487        let r = _mm512_maskz_cvttps_epi32(0, a);
49488        assert_eq_m512i(r, _mm512_setzero_si512());
49489        let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
49490        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49491        assert_eq_m512i(r, e);
49492    }
49493
49494    #[simd_test(enable = "avx512f,avx512vl")]
49495    unsafe fn test_mm256_mask_cvttps_epi32() {
49496        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49497        let src = _mm256_set1_epi32(0);
49498        let r = _mm256_mask_cvttps_epi32(src, 0, a);
49499        assert_eq_m256i(r, src);
49500        let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
49501        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49502        assert_eq_m256i(r, e);
49503    }
49504
49505    #[simd_test(enable = "avx512f,avx512vl")]
49506    unsafe fn test_mm256_maskz_cvttps_epi32() {
49507        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49508        let r = _mm256_maskz_cvttps_epi32(0, a);
49509        assert_eq_m256i(r, _mm256_setzero_si256());
49510        let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
49511        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49512        assert_eq_m256i(r, e);
49513    }
49514
49515    #[simd_test(enable = "avx512f,avx512vl")]
49516    unsafe fn test_mm_mask_cvttps_epi32() {
49517        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49518        let src = _mm_set1_epi32(0);
49519        let r = _mm_mask_cvttps_epi32(src, 0, a);
49520        assert_eq_m128i(r, src);
49521        let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
49522        let e = _mm_set_epi32(12, 13, 14, 15);
49523        assert_eq_m128i(r, e);
49524    }
49525
49526    #[simd_test(enable = "avx512f,avx512vl")]
49527    unsafe fn test_mm_maskz_cvttps_epi32() {
49528        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49529        let r = _mm_maskz_cvttps_epi32(0, a);
49530        assert_eq_m128i(r, _mm_setzero_si128());
49531        let r = _mm_maskz_cvttps_epi32(0b00001111, a);
49532        let e = _mm_set_epi32(12, 13, 14, 15);
49533        assert_eq_m128i(r, e);
49534    }
49535
49536    #[simd_test(enable = "avx512f")]
49537    unsafe fn test_mm512_cvttps_epu32() {
49538        let a = _mm512_setr_ps(
49539            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49540        );
49541        let r = _mm512_cvttps_epu32(a);
49542        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49543        assert_eq_m512i(r, e);
49544    }
49545
49546    #[simd_test(enable = "avx512f")]
49547    unsafe fn test_mm512_mask_cvttps_epu32() {
49548        let a = _mm512_setr_ps(
49549            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49550        );
49551        let src = _mm512_set1_epi32(0);
49552        let r = _mm512_mask_cvttps_epu32(src, 0, a);
49553        assert_eq_m512i(r, src);
49554        let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
49555        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49556        assert_eq_m512i(r, e);
49557    }
49558
49559    #[simd_test(enable = "avx512f")]
49560    unsafe fn test_mm512_maskz_cvttps_epu32() {
49561        let a = _mm512_setr_ps(
49562            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49563        );
49564        let r = _mm512_maskz_cvttps_epu32(0, a);
49565        assert_eq_m512i(r, _mm512_setzero_si512());
49566        let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
49567        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49568        assert_eq_m512i(r, e);
49569    }
49570
49571    #[simd_test(enable = "avx512f,avx512vl")]
49572    unsafe fn test_mm256_cvttps_epu32() {
49573        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49574        let r = _mm256_cvttps_epu32(a);
49575        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49576        assert_eq_m256i(r, e);
49577    }
49578
49579    #[simd_test(enable = "avx512f,avx512vl")]
49580    unsafe fn test_mm256_mask_cvttps_epu32() {
49581        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49582        let src = _mm256_set1_epi32(0);
49583        let r = _mm256_mask_cvttps_epu32(src, 0, a);
49584        assert_eq_m256i(r, src);
49585        let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
49586        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49587        assert_eq_m256i(r, e);
49588    }
49589
49590    #[simd_test(enable = "avx512f,avx512vl")]
49591    unsafe fn test_mm256_maskz_cvttps_epu32() {
49592        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49593        let r = _mm256_maskz_cvttps_epu32(0, a);
49594        assert_eq_m256i(r, _mm256_setzero_si256());
49595        let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
49596        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49597        assert_eq_m256i(r, e);
49598    }
49599
49600    #[simd_test(enable = "avx512f,avx512vl")]
49601    unsafe fn test_mm_cvttps_epu32() {
49602        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49603        let r = _mm_cvttps_epu32(a);
49604        let e = _mm_set_epi32(12, 13, 14, 15);
49605        assert_eq_m128i(r, e);
49606    }
49607
49608    #[simd_test(enable = "avx512f,avx512vl")]
49609    unsafe fn test_mm_mask_cvttps_epu32() {
49610        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49611        let src = _mm_set1_epi32(0);
49612        let r = _mm_mask_cvttps_epu32(src, 0, a);
49613        assert_eq_m128i(r, src);
49614        let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
49615        let e = _mm_set_epi32(12, 13, 14, 15);
49616        assert_eq_m128i(r, e);
49617    }
49618
49619    #[simd_test(enable = "avx512f,avx512vl")]
49620    unsafe fn test_mm_maskz_cvttps_epu32() {
49621        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49622        let r = _mm_maskz_cvttps_epu32(0, a);
49623        assert_eq_m128i(r, _mm_setzero_si128());
49624        let r = _mm_maskz_cvttps_epu32(0b00001111, a);
49625        let e = _mm_set_epi32(12, 13, 14, 15);
49626        assert_eq_m128i(r, e);
49627    }
49628
49629    #[simd_test(enable = "avx512f")]
49630    unsafe fn test_mm512_i32gather_ps() {
49631        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49632        // A multiplier of 4 is word-addressing
49633        #[rustfmt::skip]
49634        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49635                                      120, 128, 136, 144, 152, 160, 168, 176);
49636        let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr() as *const u8);
49637        #[rustfmt::skip]
49638        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
49639                                         120., 128., 136., 144., 152., 160., 168., 176.));
49640    }
49641
49642    #[simd_test(enable = "avx512f")]
49643    unsafe fn test_mm512_mask_i32gather_ps() {
49644        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49645        let src = _mm512_set1_ps(2.);
49646        let mask = 0b10101010_10101010;
49647        #[rustfmt::skip]
49648        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49649                                      120, 128, 136, 144, 152, 160, 168, 176);
49650        // A multiplier of 4 is word-addressing
49651        let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8);
49652        #[rustfmt::skip]
49653        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
49654                                         2., 128., 2., 144., 2., 160., 2., 176.));
49655    }
49656
49657    #[simd_test(enable = "avx512f")]
49658    unsafe fn test_mm512_i32gather_epi32() {
49659        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49660        // A multiplier of 4 is word-addressing
49661        #[rustfmt::skip]
49662        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49663                                      120, 128, 136, 144, 152, 160, 168, 176);
49664        let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr() as *const u8);
49665        #[rustfmt::skip]
49666        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49667                                             120, 128, 136, 144, 152, 160, 168, 176));
49668    }
49669
49670    #[simd_test(enable = "avx512f")]
49671    unsafe fn test_mm512_mask_i32gather_epi32() {
49672        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49673        let src = _mm512_set1_epi32(2);
49674        let mask = 0b10101010_10101010;
49675        let index = _mm512_setr_epi32(
49676            0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
49677        );
49678        // A multiplier of 4 is word-addressing
49679        let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr() as *const u8);
49680        assert_eq_m512i(
49681            r,
49682            _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
49683        );
49684    }
49685
49686    #[simd_test(enable = "avx512f")]
49687    unsafe fn test_mm512_i32scatter_ps() {
49688        let mut arr = [0f32; 256];
49689        #[rustfmt::skip]
49690        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49691                                      128, 144, 160, 176, 192, 208, 224, 240);
49692        let src = _mm512_setr_ps(
49693            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49694        );
49695        // A multiplier of 4 is word-addressing
49696        _mm512_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src);
49697        let mut expected = [0f32; 256];
49698        for i in 0..16 {
49699            expected[i * 16] = (i + 1) as f32;
49700        }
49701        assert_eq!(&arr[..], &expected[..],);
49702    }
49703
49704    #[simd_test(enable = "avx512f")]
49705    unsafe fn test_mm512_mask_i32scatter_ps() {
49706        let mut arr = [0f32; 256];
49707        let mask = 0b10101010_10101010;
49708        #[rustfmt::skip]
49709        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49710                                      128, 144, 160, 176, 192, 208, 224, 240);
49711        let src = _mm512_setr_ps(
49712            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49713        );
49714        // A multiplier of 4 is word-addressing
49715        _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
49716        let mut expected = [0f32; 256];
49717        for i in 0..8 {
49718            expected[i * 32 + 16] = 2. * (i + 1) as f32;
49719        }
49720        assert_eq!(&arr[..], &expected[..],);
49721    }
49722
49723    #[simd_test(enable = "avx512f")]
49724    unsafe fn test_mm512_i32scatter_epi32() {
49725        let mut arr = [0i32; 256];
49726        #[rustfmt::skip]
49727
49728        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49729                                      128, 144, 160, 176, 192, 208, 224, 240);
49730        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49731        // A multiplier of 4 is word-addressing
49732        _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src);
49733        let mut expected = [0i32; 256];
49734        for i in 0..16 {
49735            expected[i * 16] = (i + 1) as i32;
49736        }
49737        assert_eq!(&arr[..], &expected[..],);
49738    }
49739
49740    #[simd_test(enable = "avx512f")]
49741    unsafe fn test_mm512_mask_i32scatter_epi32() {
49742        let mut arr = [0i32; 256];
49743        let mask = 0b10101010_10101010;
49744        #[rustfmt::skip]
49745        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49746                                      128, 144, 160, 176, 192, 208, 224, 240);
49747        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49748        // A multiplier of 4 is word-addressing
49749        _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
49750        let mut expected = [0i32; 256];
49751        for i in 0..8 {
49752            expected[i * 32 + 16] = 2 * (i + 1) as i32;
49753        }
49754        assert_eq!(&arr[..], &expected[..],);
49755    }
49756
49757    #[simd_test(enable = "avx512f")]
49758    unsafe fn test_mm512_cmplt_ps_mask() {
49759        #[rustfmt::skip]
49760        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49761                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49762        let b = _mm512_set1_ps(-1.);
49763        let m = _mm512_cmplt_ps_mask(a, b);
49764        assert_eq!(m, 0b00000101_00000101);
49765    }
49766
49767    #[simd_test(enable = "avx512f")]
49768    unsafe fn test_mm512_mask_cmplt_ps_mask() {
49769        #[rustfmt::skip]
49770        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49771                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49772        let b = _mm512_set1_ps(-1.);
49773        let mask = 0b01100110_01100110;
49774        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
49775        assert_eq!(r, 0b00000100_00000100);
49776    }
49777
49778    #[simd_test(enable = "avx512f")]
49779    unsafe fn test_mm512_cmpnlt_ps_mask() {
49780        #[rustfmt::skip]
49781        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49782                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49783        let b = _mm512_set1_ps(-1.);
49784        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
49785    }
49786
49787    #[simd_test(enable = "avx512f")]
49788    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
49789        #[rustfmt::skip]
49790        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49791                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49792        let b = _mm512_set1_ps(-1.);
49793        let mask = 0b01111010_01111010;
49794        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
49795    }
49796
49797    #[simd_test(enable = "avx512f")]
49798    unsafe fn test_mm512_cmpnle_ps_mask() {
49799        #[rustfmt::skip]
49800        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49801                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49802        let b = _mm512_set1_ps(-1.);
49803        let m = _mm512_cmpnle_ps_mask(b, a);
49804        assert_eq!(m, 0b00001101_00001101);
49805    }
49806
49807    #[simd_test(enable = "avx512f")]
49808    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
49809        #[rustfmt::skip]
49810        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49811                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49812        let b = _mm512_set1_ps(-1.);
49813        let mask = 0b01100110_01100110;
49814        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
49815        assert_eq!(r, 0b00000100_00000100);
49816    }
49817
49818    #[simd_test(enable = "avx512f")]
49819    unsafe fn test_mm512_cmple_ps_mask() {
49820        #[rustfmt::skip]
49821        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49822                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49823        let b = _mm512_set1_ps(-1.);
49824        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
49825    }
49826
49827    #[simd_test(enable = "avx512f")]
49828    unsafe fn test_mm512_mask_cmple_ps_mask() {
49829        #[rustfmt::skip]
49830        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49831                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49832        let b = _mm512_set1_ps(-1.);
49833        let mask = 0b01111010_01111010;
49834        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
49835    }
49836
49837    #[simd_test(enable = "avx512f")]
49838    unsafe fn test_mm512_cmpeq_ps_mask() {
49839        #[rustfmt::skip]
49840        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49841                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49842        #[rustfmt::skip]
49843        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49844                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49845        let m = _mm512_cmpeq_ps_mask(b, a);
49846        assert_eq!(m, 0b11001101_11001101);
49847    }
49848
49849    #[simd_test(enable = "avx512f")]
49850    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
49851        #[rustfmt::skip]
49852        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49853                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49854        #[rustfmt::skip]
49855        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49856                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49857        let mask = 0b01111010_01111010;
49858        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
49859        assert_eq!(r, 0b01001000_01001000);
49860    }
49861
49862    #[simd_test(enable = "avx512f")]
49863    unsafe fn test_mm512_cmpneq_ps_mask() {
49864        #[rustfmt::skip]
49865        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49866                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49867        #[rustfmt::skip]
49868        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49869                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49870        let m = _mm512_cmpneq_ps_mask(b, a);
49871        assert_eq!(m, 0b00110010_00110010);
49872    }
49873
49874    #[simd_test(enable = "avx512f")]
49875    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
49876        #[rustfmt::skip]
49877        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49878                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49879        #[rustfmt::skip]
49880        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49881                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49882        let mask = 0b01111010_01111010;
49883        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
49884        assert_eq!(r, 0b00110010_00110010)
49885    }
49886
49887    #[simd_test(enable = "avx512f")]
49888    unsafe fn test_mm512_cmp_ps_mask() {
49889        #[rustfmt::skip]
49890        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49891                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49892        let b = _mm512_set1_ps(-1.);
49893        let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49894        assert_eq!(m, 0b00000101_00000101);
49895    }
49896
49897    #[simd_test(enable = "avx512f")]
49898    unsafe fn test_mm512_mask_cmp_ps_mask() {
49899        #[rustfmt::skip]
49900        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49901                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49902        let b = _mm512_set1_ps(-1.);
49903        let mask = 0b01100110_01100110;
49904        let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49905        assert_eq!(r, 0b00000100_00000100);
49906    }
49907
49908    #[simd_test(enable = "avx512f,avx512vl")]
49909    unsafe fn test_mm256_cmp_ps_mask() {
49910        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49911        let b = _mm256_set1_ps(-1.);
49912        let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49913        assert_eq!(m, 0b00000101);
49914    }
49915
49916    #[simd_test(enable = "avx512f,avx512vl")]
49917    unsafe fn test_mm256_mask_cmp_ps_mask() {
49918        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49919        let b = _mm256_set1_ps(-1.);
49920        let mask = 0b01100110;
49921        let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49922        assert_eq!(r, 0b00000100);
49923    }
49924
49925    #[simd_test(enable = "avx512f,avx512vl")]
49926    unsafe fn test_mm_cmp_ps_mask() {
49927        let a = _mm_set_ps(0., 1., -1., 13.);
49928        let b = _mm_set1_ps(1.);
49929        let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49930        assert_eq!(m, 0b00001010);
49931    }
49932
49933    #[simd_test(enable = "avx512f,avx512vl")]
49934    unsafe fn test_mm_mask_cmp_ps_mask() {
49935        let a = _mm_set_ps(0., 1., -1., 13.);
49936        let b = _mm_set1_ps(1.);
49937        let mask = 0b11111111;
49938        let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49939        assert_eq!(r, 0b00001010);
49940    }
49941
49942    #[simd_test(enable = "avx512f")]
49943    unsafe fn test_mm512_cmp_round_ps_mask() {
49944        #[rustfmt::skip]
49945        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49946                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49947        let b = _mm512_set1_ps(-1.);
49948        let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
49949        assert_eq!(m, 0b00000101_00000101);
49950    }
49951
49952    #[simd_test(enable = "avx512f")]
49953    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
49954        #[rustfmt::skip]
49955        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49956                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49957        let b = _mm512_set1_ps(-1.);
49958        let mask = 0b01100110_01100110;
49959        let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
49960        assert_eq!(r, 0b00000100_00000100);
49961    }
49962
49963    #[simd_test(enable = "avx512f")]
49964    unsafe fn test_mm512_cmpord_ps_mask() {
49965        #[rustfmt::skip]
49966        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49967                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49968        #[rustfmt::skip]
49969        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49970                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49971        let m = _mm512_cmpord_ps_mask(a, b);
49972        assert_eq!(m, 0b00000101_00000101);
49973    }
49974
49975    #[simd_test(enable = "avx512f")]
49976    unsafe fn test_mm512_mask_cmpord_ps_mask() {
49977        #[rustfmt::skip]
49978        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49979                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49980        #[rustfmt::skip]
49981        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49982                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49983        let mask = 0b11000011_11000011;
49984        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
49985        assert_eq!(m, 0b00000001_00000001);
49986    }
49987
49988    #[simd_test(enable = "avx512f")]
49989    unsafe fn test_mm512_cmpunord_ps_mask() {
49990        #[rustfmt::skip]
49991        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49992                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49993        #[rustfmt::skip]
49994        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49995                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49996        let m = _mm512_cmpunord_ps_mask(a, b);
49997
49998        assert_eq!(m, 0b11111010_11111010);
49999    }
50000
50001    #[simd_test(enable = "avx512f")]
50002    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
50003        #[rustfmt::skip]
50004        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50005                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50006        #[rustfmt::skip]
50007        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50008                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50009        let mask = 0b00001111_00001111;
50010        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
50011        assert_eq!(m, 0b000001010_00001010);
50012    }
50013
50014    #[simd_test(enable = "avx512f")]
50015    unsafe fn test_mm_cmp_ss_mask() {
50016        let a = _mm_setr_ps(2., 1., 1., 1.);
50017        let b = _mm_setr_ps(1., 2., 2., 2.);
50018        let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
50019        assert_eq!(m, 1);
50020    }
50021
50022    #[simd_test(enable = "avx512f")]
50023    unsafe fn test_mm_mask_cmp_ss_mask() {
50024        let a = _mm_setr_ps(2., 1., 1., 1.);
50025        let b = _mm_setr_ps(1., 2., 2., 2.);
50026        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
50027        assert_eq!(m, 0);
50028        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
50029        assert_eq!(m, 1);
50030    }
50031
50032    #[simd_test(enable = "avx512f")]
50033    unsafe fn test_mm_cmp_round_ss_mask() {
50034        let a = _mm_setr_ps(2., 1., 1., 1.);
50035        let b = _mm_setr_ps(1., 2., 2., 2.);
50036        let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50037        assert_eq!(m, 1);
50038    }
50039
50040    #[simd_test(enable = "avx512f")]
50041    unsafe fn test_mm_mask_cmp_round_ss_mask() {
50042        let a = _mm_setr_ps(2., 1., 1., 1.);
50043        let b = _mm_setr_ps(1., 2., 2., 2.);
50044        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50045        assert_eq!(m, 0);
50046        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50047        assert_eq!(m, 1);
50048    }
50049
50050    #[simd_test(enable = "avx512f")]
50051    unsafe fn test_mm_cmp_sd_mask() {
50052        let a = _mm_setr_pd(2., 1.);
50053        let b = _mm_setr_pd(1., 2.);
50054        let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
50055        assert_eq!(m, 1);
50056    }
50057
50058    #[simd_test(enable = "avx512f")]
50059    unsafe fn test_mm_mask_cmp_sd_mask() {
50060        let a = _mm_setr_pd(2., 1.);
50061        let b = _mm_setr_pd(1., 2.);
50062        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
50063        assert_eq!(m, 0);
50064        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
50065        assert_eq!(m, 1);
50066    }
50067
50068    #[simd_test(enable = "avx512f")]
50069    unsafe fn test_mm_cmp_round_sd_mask() {
50070        let a = _mm_setr_pd(2., 1.);
50071        let b = _mm_setr_pd(1., 2.);
50072        let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50073        assert_eq!(m, 1);
50074    }
50075
50076    #[simd_test(enable = "avx512f")]
50077    unsafe fn test_mm_mask_cmp_round_sd_mask() {
50078        let a = _mm_setr_pd(2., 1.);
50079        let b = _mm_setr_pd(1., 2.);
50080        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50081        assert_eq!(m, 0);
50082        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50083        assert_eq!(m, 1);
50084    }
50085
50086    #[simd_test(enable = "avx512f")]
50087    unsafe fn test_mm512_cmplt_epu32_mask() {
50088        #[rustfmt::skip]
50089        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50090                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50091        let b = _mm512_set1_epi32(-1);
50092        let m = _mm512_cmplt_epu32_mask(a, b);
50093        assert_eq!(m, 0b11001111_11001111);
50094    }
50095
50096    #[simd_test(enable = "avx512f")]
50097    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
50098        #[rustfmt::skip]
50099        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50100                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50101        let b = _mm512_set1_epi32(-1);
50102        let mask = 0b01111010_01111010;
50103        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
50104        assert_eq!(r, 0b01001010_01001010);
50105    }
50106
50107    #[simd_test(enable = "avx512f,avx512vl")]
50108    unsafe fn test_mm256_cmplt_epu32_mask() {
50109        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50110        let b = _mm256_set1_epi32(1);
50111        let r = _mm256_cmplt_epu32_mask(a, b);
50112        assert_eq!(r, 0b10000000);
50113    }
50114
50115    #[simd_test(enable = "avx512f,avx512vl")]
50116    unsafe fn test_mm256_mask_cmplt_epu32_mask() {
50117        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50118        let b = _mm256_set1_epi32(1);
50119        let mask = 0b11111111;
50120        let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
50121        assert_eq!(r, 0b10000000);
50122    }
50123
50124    #[simd_test(enable = "avx512f,avx512vl")]
50125    unsafe fn test_mm_cmplt_epu32_mask() {
50126        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50127        let b = _mm_set1_epi32(1);
50128        let r = _mm_cmplt_epu32_mask(a, b);
50129        assert_eq!(r, 0b00001000);
50130    }
50131
50132    #[simd_test(enable = "avx512f,avx512vl")]
50133    unsafe fn test_mm_mask_cmplt_epu32_mask() {
50134        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50135        let b = _mm_set1_epi32(1);
50136        let mask = 0b11111111;
50137        let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
50138        assert_eq!(r, 0b00001000);
50139    }
50140
50141    #[simd_test(enable = "avx512f")]
50142    unsafe fn test_mm512_cmpgt_epu32_mask() {
50143        #[rustfmt::skip]
50144        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50145                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50146        let b = _mm512_set1_epi32(-1);
50147        let m = _mm512_cmpgt_epu32_mask(b, a);
50148        assert_eq!(m, 0b11001111_11001111);
50149    }
50150
50151    #[simd_test(enable = "avx512f")]
50152    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
50153        #[rustfmt::skip]
50154        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50155                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50156        let b = _mm512_set1_epi32(-1);
50157        let mask = 0b01111010_01111010;
50158        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
50159        assert_eq!(r, 0b01001010_01001010);
50160    }
50161
50162    #[simd_test(enable = "avx512f,avx512vl")]
50163    unsafe fn test_mm256_cmpgt_epu32_mask() {
50164        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50165        let b = _mm256_set1_epi32(1);
50166        let r = _mm256_cmpgt_epu32_mask(a, b);
50167        assert_eq!(r, 0b00111111);
50168    }
50169
50170    #[simd_test(enable = "avx512f,avx512vl")]
50171    unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
50172        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50173        let b = _mm256_set1_epi32(1);
50174        let mask = 0b11111111;
50175        let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
50176        assert_eq!(r, 0b00111111);
50177    }
50178
50179    #[simd_test(enable = "avx512f,avx512vl")]
50180    unsafe fn test_mm_cmpgt_epu32_mask() {
50181        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50182        let b = _mm_set1_epi32(1);
50183        let r = _mm_cmpgt_epu32_mask(a, b);
50184        assert_eq!(r, 0b00000011);
50185    }
50186
50187    #[simd_test(enable = "avx512f,avx512vl")]
50188    unsafe fn test_mm_mask_cmpgt_epu32_mask() {
50189        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50190        let b = _mm_set1_epi32(1);
50191        let mask = 0b11111111;
50192        let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
50193        assert_eq!(r, 0b00000011);
50194    }
50195
50196    #[simd_test(enable = "avx512f")]
50197    unsafe fn test_mm512_cmple_epu32_mask() {
50198        #[rustfmt::skip]
50199        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50200                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50201        let b = _mm512_set1_epi32(-1);
50202        assert_eq!(
50203            _mm512_cmple_epu32_mask(a, b),
50204            !_mm512_cmpgt_epu32_mask(a, b)
50205        )
50206    }
50207
50208    #[simd_test(enable = "avx512f")]
50209    unsafe fn test_mm512_mask_cmple_epu32_mask() {
50210        #[rustfmt::skip]
50211        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50212                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50213        let b = _mm512_set1_epi32(-1);
50214        let mask = 0b01111010_01111010;
50215        assert_eq!(
50216            _mm512_mask_cmple_epu32_mask(mask, a, b),
50217            0b01111010_01111010
50218        );
50219    }
50220
50221    #[simd_test(enable = "avx512f,avx512vl")]
50222    unsafe fn test_mm256_cmple_epu32_mask() {
50223        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50224        let b = _mm256_set1_epi32(1);
50225        let r = _mm256_cmple_epu32_mask(a, b);
50226        assert_eq!(r, 0b11000000)
50227    }
50228
50229    #[simd_test(enable = "avx512f,avx512vl")]
50230    unsafe fn test_mm256_mask_cmple_epu32_mask() {
50231        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50232        let b = _mm256_set1_epi32(1);
50233        let mask = 0b11111111;
50234        let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
50235        assert_eq!(r, 0b11000000)
50236    }
50237
50238    #[simd_test(enable = "avx512f,avx512vl")]
50239    unsafe fn test_mm_cmple_epu32_mask() {
50240        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50241        let b = _mm_set1_epi32(1);
50242        let r = _mm_cmple_epu32_mask(a, b);
50243        assert_eq!(r, 0b00001100)
50244    }
50245
50246    #[simd_test(enable = "avx512f,avx512vl")]
50247    unsafe fn test_mm_mask_cmple_epu32_mask() {
50248        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50249        let b = _mm_set1_epi32(1);
50250        let mask = 0b11111111;
50251        let r = _mm_mask_cmple_epu32_mask(mask, a, b);
50252        assert_eq!(r, 0b00001100)
50253    }
50254
50255    #[simd_test(enable = "avx512f")]
50256    unsafe fn test_mm512_cmpge_epu32_mask() {
50257        #[rustfmt::skip]
50258        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50259                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50260        let b = _mm512_set1_epi32(-1);
50261        assert_eq!(
50262            _mm512_cmpge_epu32_mask(a, b),
50263            !_mm512_cmplt_epu32_mask(a, b)
50264        )
50265    }
50266
50267    #[simd_test(enable = "avx512f")]
50268    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
50269        #[rustfmt::skip]
50270        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50271                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50272        let b = _mm512_set1_epi32(-1);
50273        let mask = 0b01111010_01111010;
50274        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
50275    }
50276
50277    #[simd_test(enable = "avx512f,avx512vl")]
50278    unsafe fn test_mm256_cmpge_epu32_mask() {
50279        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50280        let b = _mm256_set1_epi32(1);
50281        let r = _mm256_cmpge_epu32_mask(a, b);
50282        assert_eq!(r, 0b01111111)
50283    }
50284
50285    #[simd_test(enable = "avx512f,avx512vl")]
50286    unsafe fn test_mm256_mask_cmpge_epu32_mask() {
50287        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50288        let b = _mm256_set1_epi32(1);
50289        let mask = 0b11111111;
50290        let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
50291        assert_eq!(r, 0b01111111)
50292    }
50293
50294    #[simd_test(enable = "avx512f,avx512vl")]
50295    unsafe fn test_mm_cmpge_epu32_mask() {
50296        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50297        let b = _mm_set1_epi32(1);
50298        let r = _mm_cmpge_epu32_mask(a, b);
50299        assert_eq!(r, 0b00000111)
50300    }
50301
50302    #[simd_test(enable = "avx512f,avx512vl")]
50303    unsafe fn test_mm_mask_cmpge_epu32_mask() {
50304        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50305        let b = _mm_set1_epi32(1);
50306        let mask = 0b11111111;
50307        let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
50308        assert_eq!(r, 0b00000111)
50309    }
50310
50311    #[simd_test(enable = "avx512f")]
50312    unsafe fn test_mm512_cmpeq_epu32_mask() {
50313        #[rustfmt::skip]
50314        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50315                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50316        #[rustfmt::skip]
50317        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50318                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50319        let m = _mm512_cmpeq_epu32_mask(b, a);
50320        assert_eq!(m, 0b11001111_11001111);
50321    }
50322
50323    #[simd_test(enable = "avx512f")]
50324    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
50325        #[rustfmt::skip]
50326        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50327                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50328        #[rustfmt::skip]
50329        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50330                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50331        let mask = 0b01111010_01111010;
50332        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
50333        assert_eq!(r, 0b01001010_01001010);
50334    }
50335
50336    #[simd_test(enable = "avx512f,avx512vl")]
50337    unsafe fn test_mm256_cmpeq_epu32_mask() {
50338        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50339        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50340        let m = _mm256_cmpeq_epu32_mask(b, a);
50341        assert_eq!(m, 0b11001111);
50342    }
50343
50344    #[simd_test(enable = "avx512f,avx512vl")]
50345    unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
50346        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50347        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50348        let mask = 0b01111010;
50349        let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
50350        assert_eq!(r, 0b01001010);
50351    }
50352
50353    #[simd_test(enable = "avx512f,avx512vl")]
50354    unsafe fn test_mm_cmpeq_epu32_mask() {
50355        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50356        let b = _mm_set_epi32(0, 1, 13, 42);
50357        let m = _mm_cmpeq_epu32_mask(b, a);
50358        assert_eq!(m, 0b00001100);
50359    }
50360
50361    #[simd_test(enable = "avx512f,avx512vl")]
50362    unsafe fn test_mm_mask_cmpeq_epu32_mask() {
50363        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50364        let b = _mm_set_epi32(0, 1, 13, 42);
50365        let mask = 0b11111111;
50366        let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
50367        assert_eq!(r, 0b00001100);
50368    }
50369
50370    #[simd_test(enable = "avx512f")]
50371    unsafe fn test_mm512_cmpneq_epu32_mask() {
50372        #[rustfmt::skip]
50373        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50374                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50375        #[rustfmt::skip]
50376        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50377                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50378        let m = _mm512_cmpneq_epu32_mask(b, a);
50379        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
50380    }
50381
50382    #[simd_test(enable = "avx512f")]
50383    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
50384        #[rustfmt::skip]
50385        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
50386                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50387        #[rustfmt::skip]
50388        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50389                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50390        let mask = 0b01111010_01111010;
50391        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
50392        assert_eq!(r, 0b00110010_00110010);
50393    }
50394
50395    #[simd_test(enable = "avx512f,avx512vl")]
50396    unsafe fn test_mm256_cmpneq_epu32_mask() {
50397        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50398        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50399        let r = _mm256_cmpneq_epu32_mask(b, a);
50400        assert_eq!(r, 0b00110000);
50401    }
50402
50403    #[simd_test(enable = "avx512f,avx512vl")]
50404    unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
50405        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50406        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50407        let mask = 0b11111111;
50408        let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
50409        assert_eq!(r, 0b00110000);
50410    }
50411
50412    #[simd_test(enable = "avx512f,avx512vl")]
50413    unsafe fn test_mm_cmpneq_epu32_mask() {
50414        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50415        let b = _mm_set_epi32(0, 1, 13, 42);
50416        let r = _mm_cmpneq_epu32_mask(b, a);
50417        assert_eq!(r, 0b00000011);
50418    }
50419
50420    #[simd_test(enable = "avx512f,avx512vl")]
50421    unsafe fn test_mm_mask_cmpneq_epu32_mask() {
50422        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50423        let b = _mm_set_epi32(0, 1, 13, 42);
50424        let mask = 0b11111111;
50425        let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
50426        assert_eq!(r, 0b00000011);
50427    }
50428
50429    #[simd_test(enable = "avx512f")]
50430    unsafe fn test_mm512_cmp_epu32_mask() {
50431        #[rustfmt::skip]
50432        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50433                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50434        let b = _mm512_set1_epi32(-1);
50435        let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50436        assert_eq!(m, 0b11001111_11001111);
50437    }
50438
50439    #[simd_test(enable = "avx512f")]
50440    unsafe fn test_mm512_mask_cmp_epu32_mask() {
50441        #[rustfmt::skip]
50442        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50443                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50444        let b = _mm512_set1_epi32(-1);
50445        let mask = 0b01111010_01111010;
50446        let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50447        assert_eq!(r, 0b01001010_01001010);
50448    }
50449
50450    #[simd_test(enable = "avx512f,avx512vl")]
50451    unsafe fn test_mm256_cmp_epu32_mask() {
50452        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50453        let b = _mm256_set1_epi32(-1);
50454        let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50455        assert_eq!(m, 0b11001111);
50456    }
50457
50458    #[simd_test(enable = "avx512f,avx512vl")]
50459    unsafe fn test_mm256_mask_cmp_epu32_mask() {
50460        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50461        let b = _mm256_set1_epi32(-1);
50462        let mask = 0b11111111;
50463        let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50464        assert_eq!(r, 0b11001111);
50465    }
50466
50467    #[simd_test(enable = "avx512f,avx512vl")]
50468    unsafe fn test_mm_cmp_epu32_mask() {
50469        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50470        let b = _mm_set1_epi32(1);
50471        let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50472        assert_eq!(m, 0b00001000);
50473    }
50474
50475    #[simd_test(enable = "avx512f,avx512vl")]
50476    unsafe fn test_mm_mask_cmp_epu32_mask() {
50477        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50478        let b = _mm_set1_epi32(1);
50479        let mask = 0b11111111;
50480        let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50481        assert_eq!(r, 0b00001000);
50482    }
50483
50484    #[simd_test(enable = "avx512f")]
50485    unsafe fn test_mm512_cmplt_epi32_mask() {
50486        #[rustfmt::skip]
50487        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50488                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50489        let b = _mm512_set1_epi32(-1);
50490        let m = _mm512_cmplt_epi32_mask(a, b);
50491        assert_eq!(m, 0b00000101_00000101);
50492    }
50493
50494    #[simd_test(enable = "avx512f")]
50495    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
50496        #[rustfmt::skip]
50497        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50498                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50499        let b = _mm512_set1_epi32(-1);
50500        let mask = 0b01100110_01100110;
50501        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
50502        assert_eq!(r, 0b00000100_00000100);
50503    }
50504
50505    #[simd_test(enable = "avx512f,avx512vl")]
50506    unsafe fn test_mm256_cmplt_epi32_mask() {
50507        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50508        let b = _mm256_set1_epi32(-1);
50509        let r = _mm256_cmplt_epi32_mask(a, b);
50510        assert_eq!(r, 0b00000101);
50511    }
50512
50513    #[simd_test(enable = "avx512f,avx512vl")]
50514    unsafe fn test_mm256_mask_cmplt_epi32_mask() {
50515        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50516        let b = _mm256_set1_epi32(-1);
50517        let mask = 0b11111111;
50518        let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
50519        assert_eq!(r, 0b00000101);
50520    }
50521
50522    #[simd_test(enable = "avx512f,avx512vl")]
50523    unsafe fn test_mm_cmplt_epi32_mask() {
50524        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50525        let b = _mm_set1_epi32(-1);
50526        let r = _mm_cmplt_epi32_mask(a, b);
50527        assert_eq!(r, 0b00000101);
50528    }
50529
50530    #[simd_test(enable = "avx512f,avx512vl")]
50531    unsafe fn test_mm_mask_cmplt_epi32_mask() {
50532        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50533        let b = _mm_set1_epi32(-1);
50534        let mask = 0b11111111;
50535        let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
50536        assert_eq!(r, 0b00000101);
50537    }
50538
50539    #[simd_test(enable = "avx512f")]
50540    unsafe fn test_mm512_cmpgt_epi32_mask() {
50541        #[rustfmt::skip]
50542        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50543                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50544        let b = _mm512_set1_epi32(-1);
50545        let m = _mm512_cmpgt_epi32_mask(b, a);
50546        assert_eq!(m, 0b00000101_00000101);
50547    }
50548
50549    #[simd_test(enable = "avx512f")]
50550    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
50551        #[rustfmt::skip]
50552        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50553                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50554        let b = _mm512_set1_epi32(-1);
50555        let mask = 0b01100110_01100110;
50556        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
50557        assert_eq!(r, 0b00000100_00000100);
50558    }
50559
50560    #[simd_test(enable = "avx512f,avx512vl")]
50561    unsafe fn test_mm256_cmpgt_epi32_mask() {
50562        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50563        let b = _mm256_set1_epi32(-1);
50564        let r = _mm256_cmpgt_epi32_mask(a, b);
50565        assert_eq!(r, 0b11011010);
50566    }
50567
50568    #[simd_test(enable = "avx512f,avx512vl")]
50569    unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
50570        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50571        let b = _mm256_set1_epi32(-1);
50572        let mask = 0b11111111;
50573        let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
50574        assert_eq!(r, 0b11011010);
50575    }
50576
50577    #[simd_test(enable = "avx512f,avx512vl")]
50578    unsafe fn test_mm_cmpgt_epi32_mask() {
50579        let a = _mm_set_epi32(0, 1, -1, 13);
50580        let b = _mm_set1_epi32(-1);
50581        let r = _mm_cmpgt_epi32_mask(a, b);
50582        assert_eq!(r, 0b00001101);
50583    }
50584
50585    #[simd_test(enable = "avx512f,avx512vl")]
50586    unsafe fn test_mm_mask_cmpgt_epi32_mask() {
50587        let a = _mm_set_epi32(0, 1, -1, 13);
50588        let b = _mm_set1_epi32(-1);
50589        let mask = 0b11111111;
50590        let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
50591        assert_eq!(r, 0b00001101);
50592    }
50593
50594    #[simd_test(enable = "avx512f")]
50595    unsafe fn test_mm512_cmple_epi32_mask() {
50596        #[rustfmt::skip]
50597        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50598                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50599        let b = _mm512_set1_epi32(-1);
50600        assert_eq!(
50601            _mm512_cmple_epi32_mask(a, b),
50602            !_mm512_cmpgt_epi32_mask(a, b)
50603        )
50604    }
50605
50606    #[simd_test(enable = "avx512f")]
50607    unsafe fn test_mm512_mask_cmple_epi32_mask() {
50608        #[rustfmt::skip]
50609        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50610                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50611        let b = _mm512_set1_epi32(-1);
50612        let mask = 0b01111010_01111010;
50613        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
50614    }
50615
50616    #[simd_test(enable = "avx512f,avx512vl")]
50617    unsafe fn test_mm256_cmple_epi32_mask() {
50618        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50619        let b = _mm256_set1_epi32(-1);
50620        let r = _mm256_cmple_epi32_mask(a, b);
50621        assert_eq!(r, 0b00100101)
50622    }
50623
50624    #[simd_test(enable = "avx512f,avx512vl")]
50625    unsafe fn test_mm256_mask_cmple_epi32_mask() {
50626        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50627        let b = _mm256_set1_epi32(-1);
50628        let mask = 0b11111111;
50629        let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
50630        assert_eq!(r, 0b00100101)
50631    }
50632
50633    #[simd_test(enable = "avx512f,avx512vl")]
50634    unsafe fn test_mm_cmple_epi32_mask() {
50635        let a = _mm_set_epi32(0, 1, -1, 200);
50636        let b = _mm_set1_epi32(-1);
50637        let r = _mm_cmple_epi32_mask(a, b);
50638        assert_eq!(r, 0b00000010)
50639    }
50640
50641    #[simd_test(enable = "avx512f,avx512vl")]
50642    unsafe fn test_mm_mask_cmple_epi32_mask() {
50643        let a = _mm_set_epi32(0, 1, -1, 200);
50644        let b = _mm_set1_epi32(-1);
50645        let mask = 0b11111111;
50646        let r = _mm_mask_cmple_epi32_mask(mask, a, b);
50647        assert_eq!(r, 0b00000010)
50648    }
50649
50650    #[simd_test(enable = "avx512f")]
50651    unsafe fn test_mm512_cmpge_epi32_mask() {
50652        #[rustfmt::skip]
50653        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50654                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50655        let b = _mm512_set1_epi32(-1);
50656        assert_eq!(
50657            _mm512_cmpge_epi32_mask(a, b),
50658            !_mm512_cmplt_epi32_mask(a, b)
50659        )
50660    }
50661
50662    #[simd_test(enable = "avx512f")]
50663    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
50664        #[rustfmt::skip]
50665        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50666                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50667        let b = _mm512_set1_epi32(-1);
50668        let mask = 0b01111010_01111010;
50669        assert_eq!(
50670            _mm512_mask_cmpge_epi32_mask(mask, a, b),
50671            0b01111010_01111010
50672        );
50673    }
50674
50675    #[simd_test(enable = "avx512f,avx512vl")]
50676    unsafe fn test_mm256_cmpge_epi32_mask() {
50677        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50678        let b = _mm256_set1_epi32(-1);
50679        let r = _mm256_cmpge_epi32_mask(a, b);
50680        assert_eq!(r, 0b11111010)
50681    }
50682
50683    #[simd_test(enable = "avx512f,avx512vl")]
50684    unsafe fn test_mm256_mask_cmpge_epi32_mask() {
50685        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50686        let b = _mm256_set1_epi32(-1);
50687        let mask = 0b11111111;
50688        let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
50689        assert_eq!(r, 0b11111010)
50690    }
50691
50692    #[simd_test(enable = "avx512f,avx512vl")]
50693    unsafe fn test_mm_cmpge_epi32_mask() {
50694        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50695        let b = _mm_set1_epi32(-1);
50696        let r = _mm_cmpge_epi32_mask(a, b);
50697        assert_eq!(r, 0b00001111)
50698    }
50699
50700    #[simd_test(enable = "avx512f,avx512vl")]
50701    unsafe fn test_mm_mask_cmpge_epi32_mask() {
50702        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50703        let b = _mm_set1_epi32(-1);
50704        let mask = 0b11111111;
50705        let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
50706        assert_eq!(r, 0b00001111)
50707    }
50708
50709    #[simd_test(enable = "avx512f")]
50710    unsafe fn test_mm512_cmpeq_epi32_mask() {
50711        #[rustfmt::skip]
50712        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50713                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50714        #[rustfmt::skip]
50715        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50716                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50717        let m = _mm512_cmpeq_epi32_mask(b, a);
50718        assert_eq!(m, 0b11001111_11001111);
50719    }
50720
50721    #[simd_test(enable = "avx512f")]
50722    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
50723        #[rustfmt::skip]
50724        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50725                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50726        #[rustfmt::skip]
50727        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50728                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50729        let mask = 0b01111010_01111010;
50730        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
50731        assert_eq!(r, 0b01001010_01001010);
50732    }
50733
50734    #[simd_test(enable = "avx512f,avx512vl")]
50735    unsafe fn test_mm256_cmpeq_epi32_mask() {
50736        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50737        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50738        let m = _mm256_cmpeq_epi32_mask(b, a);
50739        assert_eq!(m, 0b11001111);
50740    }
50741
50742    #[simd_test(enable = "avx512f,avx512vl")]
50743    unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
50744        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50745        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50746        let mask = 0b01111010;
50747        let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
50748        assert_eq!(r, 0b01001010);
50749    }
50750
50751    #[simd_test(enable = "avx512f,avx512vl")]
50752    unsafe fn test_mm_cmpeq_epi32_mask() {
50753        let a = _mm_set_epi32(0, 1, -1, 13);
50754        let b = _mm_set_epi32(0, 1, 13, 42);
50755        let m = _mm_cmpeq_epi32_mask(b, a);
50756        assert_eq!(m, 0b00001100);
50757    }
50758
50759    #[simd_test(enable = "avx512f,avx512vl")]
50760    unsafe fn test_mm_mask_cmpeq_epi32_mask() {
50761        let a = _mm_set_epi32(0, 1, -1, 13);
50762        let b = _mm_set_epi32(0, 1, 13, 42);
50763        let mask = 0b11111111;
50764        let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
50765        assert_eq!(r, 0b00001100);
50766    }
50767
50768    #[simd_test(enable = "avx512f")]
50769    unsafe fn test_mm512_cmpneq_epi32_mask() {
50770        #[rustfmt::skip]
50771        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50772                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50773        #[rustfmt::skip]
50774        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50775                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50776        let m = _mm512_cmpneq_epi32_mask(b, a);
50777        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
50778    }
50779
50780    #[simd_test(enable = "avx512f")]
50781    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
50782        #[rustfmt::skip]
50783        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
50784                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50785        #[rustfmt::skip]
50786        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50787                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50788        let mask = 0b01111010_01111010;
50789        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
50790        assert_eq!(r, 0b00110010_00110010)
50791    }
50792
50793    #[simd_test(enable = "avx512f,avx512vl")]
50794    unsafe fn test_mm256_cmpneq_epi32_mask() {
50795        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50796        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50797        let m = _mm256_cmpneq_epi32_mask(b, a);
50798        assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
50799    }
50800
50801    #[simd_test(enable = "avx512f,avx512vl")]
50802    unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
50803        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50804        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50805        let mask = 0b11111111;
50806        let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
50807        assert_eq!(r, 0b00110011)
50808    }
50809
50810    #[simd_test(enable = "avx512f,avx512vl")]
50811    unsafe fn test_mm_cmpneq_epi32_mask() {
50812        let a = _mm_set_epi32(0, 1, -1, 13);
50813        let b = _mm_set_epi32(0, 1, 13, 42);
50814        let r = _mm_cmpneq_epi32_mask(b, a);
50815        assert_eq!(r, 0b00000011)
50816    }
50817
50818    #[simd_test(enable = "avx512f,avx512vl")]
50819    unsafe fn test_mm_mask_cmpneq_epi32_mask() {
50820        let a = _mm_set_epi32(0, 1, -1, 13);
50821        let b = _mm_set_epi32(0, 1, 13, 42);
50822        let mask = 0b11111111;
50823        let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
50824        assert_eq!(r, 0b00000011)
50825    }
50826
50827    #[simd_test(enable = "avx512f")]
50828    unsafe fn test_mm512_cmp_epi32_mask() {
50829        #[rustfmt::skip]
50830        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50831                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50832        let b = _mm512_set1_epi32(-1);
50833        let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50834        assert_eq!(m, 0b00000101_00000101);
50835    }
50836
50837    #[simd_test(enable = "avx512f")]
50838    unsafe fn test_mm512_mask_cmp_epi32_mask() {
50839        #[rustfmt::skip]
50840        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50841                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50842        let b = _mm512_set1_epi32(-1);
50843        let mask = 0b01100110_01100110;
50844        let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50845        assert_eq!(r, 0b00000100_00000100);
50846    }
50847
50848    #[simd_test(enable = "avx512f,avx512vl")]
50849    unsafe fn test_mm256_cmp_epi32_mask() {
50850        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50851        let b = _mm256_set1_epi32(-1);
50852        let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50853        assert_eq!(m, 0b00000101);
50854    }
50855
50856    #[simd_test(enable = "avx512f,avx512vl")]
50857    unsafe fn test_mm256_mask_cmp_epi32_mask() {
50858        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50859        let b = _mm256_set1_epi32(-1);
50860        let mask = 0b01100110;
50861        let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50862        assert_eq!(r, 0b00000100);
50863    }
50864
50865    #[simd_test(enable = "avx512f,avx512vl")]
50866    unsafe fn test_mm_cmp_epi32_mask() {
50867        let a = _mm_set_epi32(0, 1, -1, 13);
50868        let b = _mm_set1_epi32(1);
50869        let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50870        assert_eq!(m, 0b00001010);
50871    }
50872
50873    #[simd_test(enable = "avx512f,avx512vl")]
50874    unsafe fn test_mm_mask_cmp_epi32_mask() {
50875        let a = _mm_set_epi32(0, 1, -1, 13);
50876        let b = _mm_set1_epi32(1);
50877        let mask = 0b11111111;
50878        let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50879        assert_eq!(r, 0b00001010);
50880    }
50881
50882    #[simd_test(enable = "avx512f")]
50883    unsafe fn test_mm512_set_epi8() {
50884        let r = _mm512_set1_epi8(2);
50885        assert_eq_m512i(
50886            r,
50887            _mm512_set_epi8(
50888                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50889                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50890                2, 2, 2, 2, 2, 2, 2, 2,
50891            ),
50892        )
50893    }
50894
50895    #[simd_test(enable = "avx512f")]
50896    unsafe fn test_mm512_set_epi16() {
50897        let r = _mm512_set1_epi16(2);
50898        assert_eq_m512i(
50899            r,
50900            _mm512_set_epi16(
50901                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50902                2, 2, 2, 2,
50903            ),
50904        )
50905    }
50906
50907    #[simd_test(enable = "avx512f")]
50908    unsafe fn test_mm512_set_epi32() {
50909        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50910        assert_eq_m512i(
50911            r,
50912            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50913        )
50914    }
50915
50916    #[simd_test(enable = "avx512f")]
50917    unsafe fn test_mm512_setr_epi32() {
50918        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50919        assert_eq_m512i(
50920            r,
50921            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50922        )
50923    }
50924
50925    #[simd_test(enable = "avx512f")]
50926    unsafe fn test_mm512_set1_epi8() {
50927        let r = _mm512_set_epi8(
50928            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50929            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50930            2, 2, 2, 2, 2, 2,
50931        );
50932        assert_eq_m512i(r, _mm512_set1_epi8(2));
50933    }
50934
50935    #[simd_test(enable = "avx512f")]
50936    unsafe fn test_mm512_set1_epi16() {
50937        let r = _mm512_set_epi16(
50938            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50939            2, 2, 2,
50940        );
50941        assert_eq_m512i(r, _mm512_set1_epi16(2));
50942    }
50943
50944    #[simd_test(enable = "avx512f")]
50945    unsafe fn test_mm512_set1_epi32() {
50946        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50947        assert_eq_m512i(r, _mm512_set1_epi32(2));
50948    }
50949
50950    #[simd_test(enable = "avx512f")]
50951    unsafe fn test_mm512_setzero_si512() {
50952        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
50953    }
50954
50955    #[simd_test(enable = "avx512f")]
50956    unsafe fn test_mm512_setzero_epi32() {
50957        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
50958    }
50959
50960    #[simd_test(enable = "avx512f")]
50961    unsafe fn test_mm512_set_ps() {
50962        let r = _mm512_setr_ps(
50963            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50964        );
50965        assert_eq_m512(
50966            r,
50967            _mm512_set_ps(
50968                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50969            ),
50970        )
50971    }
50972
50973    #[simd_test(enable = "avx512f")]
50974    unsafe fn test_mm512_setr_ps() {
50975        let r = _mm512_set_ps(
50976            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50977        );
50978        assert_eq_m512(
50979            r,
50980            _mm512_setr_ps(
50981                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50982            ),
50983        )
50984    }
50985
50986    #[simd_test(enable = "avx512f")]
50987    unsafe fn test_mm512_set1_ps() {
50988        #[rustfmt::skip]
50989        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
50990                                     2., 2., 2., 2., 2., 2., 2., 2.);
50991        assert_eq_m512(expected, _mm512_set1_ps(2.));
50992    }
50993
50994    #[simd_test(enable = "avx512f")]
50995    unsafe fn test_mm512_set4_epi32() {
50996        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
50997        assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
50998    }
50999
51000    #[simd_test(enable = "avx512f")]
51001    unsafe fn test_mm512_set4_ps() {
51002        let r = _mm512_set_ps(
51003            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51004        );
51005        assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
51006    }
51007
51008    #[simd_test(enable = "avx512f")]
51009    unsafe fn test_mm512_setr4_epi32() {
51010        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51011        assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
51012    }
51013
51014    #[simd_test(enable = "avx512f")]
51015    unsafe fn test_mm512_setr4_ps() {
51016        let r = _mm512_set_ps(
51017            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51018        );
51019        assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
51020    }
51021
51022    #[simd_test(enable = "avx512f")]
51023    unsafe fn test_mm512_setzero_ps() {
51024        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
51025    }
51026
51027    #[simd_test(enable = "avx512f")]
51028    unsafe fn test_mm512_setzero() {
51029        assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
51030    }
51031
51032    #[simd_test(enable = "avx512f")]
51033    unsafe fn test_mm512_loadu_pd() {
51034        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
51035        let p = a.as_ptr();
51036        let r = _mm512_loadu_pd(black_box(p));
51037        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
51038        assert_eq_m512d(r, e);
51039    }
51040
51041    #[simd_test(enable = "avx512f")]
51042    unsafe fn test_mm512_storeu_pd() {
51043        let a = _mm512_set1_pd(9.);
51044        let mut r = _mm512_undefined_pd();
51045        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
51046        assert_eq_m512d(r, a);
51047    }
51048
51049    #[simd_test(enable = "avx512f")]
51050    unsafe fn test_mm512_loadu_ps() {
51051        let a = &[
51052            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51053        ];
51054        let p = a.as_ptr();
51055        let r = _mm512_loadu_ps(black_box(p));
51056        let e = _mm512_setr_ps(
51057            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51058        );
51059        assert_eq_m512(r, e);
51060    }
51061
51062    #[simd_test(enable = "avx512f")]
51063    unsafe fn test_mm512_storeu_ps() {
51064        let a = _mm512_set1_ps(9.);
51065        let mut r = _mm512_undefined_ps();
51066        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
51067        assert_eq_m512(r, a);
51068    }
51069
51070    #[simd_test(enable = "avx512f")]
51071    unsafe fn test_mm512_mask_loadu_epi32() {
51072        let src = _mm512_set1_epi32(42);
51073        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51074        let p = a.as_ptr();
51075        let m = 0b11101000_11001010;
51076        let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
51077        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51078        assert_eq_m512i(r, e);
51079    }
51080
51081    #[simd_test(enable = "avx512f")]
51082    unsafe fn test_mm512_maskz_loadu_epi32() {
51083        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51084        let p = a.as_ptr();
51085        let m = 0b11101000_11001010;
51086        let r = _mm512_maskz_loadu_epi32(m, black_box(p));
51087        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51088        assert_eq_m512i(r, e);
51089    }
51090
51091    #[simd_test(enable = "avx512f")]
51092    unsafe fn test_mm512_mask_load_epi32() {
51093        #[repr(align(64))]
51094        struct Align {
51095            data: [i32; 16], // 64 bytes
51096        }
51097        let src = _mm512_set1_epi32(42);
51098        let a = Align {
51099            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51100        };
51101        let p = a.data.as_ptr();
51102        let m = 0b11101000_11001010;
51103        let r = _mm512_mask_load_epi32(src, m, black_box(p));
51104        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51105        assert_eq_m512i(r, e);
51106    }
51107
51108    #[simd_test(enable = "avx512f")]
51109    unsafe fn test_mm512_maskz_load_epi32() {
51110        #[repr(align(64))]
51111        struct Align {
51112            data: [i32; 16], // 64 bytes
51113        }
51114        let a = Align {
51115            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51116        };
51117        let p = a.data.as_ptr();
51118        let m = 0b11101000_11001010;
51119        let r = _mm512_maskz_load_epi32(m, black_box(p));
51120        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51121        assert_eq_m512i(r, e);
51122    }
51123
51124    #[simd_test(enable = "avx512f")]
51125    unsafe fn test_mm512_mask_storeu_epi32() {
51126        let mut r = [42_i32; 16];
51127        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51128        let m = 0b11101000_11001010;
51129        _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51130        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51131        assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
51132    }
51133
51134    #[simd_test(enable = "avx512f")]
51135    unsafe fn test_mm512_mask_store_epi32() {
51136        #[repr(align(64))]
51137        struct Align {
51138            data: [i32; 16],
51139        }
51140        let mut r = Align { data: [42; 16] };
51141        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51142        let m = 0b11101000_11001010;
51143        _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51144        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51145        assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
51146    }
51147
51148    #[simd_test(enable = "avx512f")]
51149    unsafe fn test_mm512_mask_loadu_epi64() {
51150        let src = _mm512_set1_epi64(42);
51151        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51152        let p = a.as_ptr();
51153        let m = 0b11001010;
51154        let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
51155        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51156        assert_eq_m512i(r, e);
51157    }
51158
51159    #[simd_test(enable = "avx512f")]
51160    unsafe fn test_mm512_maskz_loadu_epi64() {
51161        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51162        let p = a.as_ptr();
51163        let m = 0b11001010;
51164        let r = _mm512_maskz_loadu_epi64(m, black_box(p));
51165        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51166        assert_eq_m512i(r, e);
51167    }
51168
51169    #[simd_test(enable = "avx512f")]
51170    unsafe fn test_mm512_mask_load_epi64() {
51171        #[repr(align(64))]
51172        struct Align {
51173            data: [i64; 8], // 64 bytes
51174        }
51175        let src = _mm512_set1_epi64(42);
51176        let a = Align {
51177            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51178        };
51179        let p = a.data.as_ptr();
51180        let m = 0b11001010;
51181        let r = _mm512_mask_load_epi64(src, m, black_box(p));
51182        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51183        assert_eq_m512i(r, e);
51184    }
51185
51186    #[simd_test(enable = "avx512f")]
51187    unsafe fn test_mm512_maskz_load_epi64() {
51188        #[repr(align(64))]
51189        struct Align {
51190            data: [i64; 8], // 64 bytes
51191        }
51192        let a = Align {
51193            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51194        };
51195        let p = a.data.as_ptr();
51196        let m = 0b11001010;
51197        let r = _mm512_maskz_load_epi64(m, black_box(p));
51198        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51199        assert_eq_m512i(r, e);
51200    }
51201
51202    #[simd_test(enable = "avx512f")]
51203    unsafe fn test_mm512_mask_storeu_epi64() {
51204        let mut r = [42_i64; 8];
51205        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51206        let m = 0b11001010;
51207        _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51208        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51209        assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
51210    }
51211
51212    #[simd_test(enable = "avx512f")]
51213    unsafe fn test_mm512_mask_store_epi64() {
51214        #[repr(align(64))]
51215        struct Align {
51216            data: [i64; 8],
51217        }
51218        let mut r = Align { data: [42; 8] };
51219        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51220        let m = 0b11001010;
51221        let p = r.data.as_mut_ptr();
51222        _mm512_mask_store_epi64(p, m, a);
51223        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51224        assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
51225    }
51226
51227    #[simd_test(enable = "avx512f")]
51228    unsafe fn test_mm512_mask_loadu_ps() {
51229        let src = _mm512_set1_ps(42.0);
51230        let a = &[
51231            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51232            16.0,
51233        ];
51234        let p = a.as_ptr();
51235        let m = 0b11101000_11001010;
51236        let r = _mm512_mask_loadu_ps(src, m, black_box(p));
51237        let e = _mm512_setr_ps(
51238            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51239            16.0,
51240        );
51241        assert_eq_m512(r, e);
51242    }
51243
51244    #[simd_test(enable = "avx512f")]
51245    unsafe fn test_mm512_maskz_loadu_ps() {
51246        let a = &[
51247            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51248            16.0,
51249        ];
51250        let p = a.as_ptr();
51251        let m = 0b11101000_11001010;
51252        let r = _mm512_maskz_loadu_ps(m, black_box(p));
51253        let e = _mm512_setr_ps(
51254            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51255        );
51256        assert_eq_m512(r, e);
51257    }
51258
51259    #[simd_test(enable = "avx512f")]
51260    unsafe fn test_mm512_mask_load_ps() {
51261        #[repr(align(64))]
51262        struct Align {
51263            data: [f32; 16], // 64 bytes
51264        }
51265        let src = _mm512_set1_ps(42.0);
51266        let a = Align {
51267            data: [
51268                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51269                15.0, 16.0,
51270            ],
51271        };
51272        let p = a.data.as_ptr();
51273        let m = 0b11101000_11001010;
51274        let r = _mm512_mask_load_ps(src, m, black_box(p));
51275        let e = _mm512_setr_ps(
51276            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51277            16.0,
51278        );
51279        assert_eq_m512(r, e);
51280    }
51281
51282    #[simd_test(enable = "avx512f")]
51283    unsafe fn test_mm512_maskz_load_ps() {
51284        #[repr(align(64))]
51285        struct Align {
51286            data: [f32; 16], // 64 bytes
51287        }
51288        let a = Align {
51289            data: [
51290                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51291                15.0, 16.0,
51292            ],
51293        };
51294        let p = a.data.as_ptr();
51295        let m = 0b11101000_11001010;
51296        let r = _mm512_maskz_load_ps(m, black_box(p));
51297        let e = _mm512_setr_ps(
51298            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51299        );
51300        assert_eq_m512(r, e);
51301    }
51302
51303    #[simd_test(enable = "avx512f")]
51304    unsafe fn test_mm512_mask_storeu_ps() {
51305        let mut r = [42_f32; 16];
51306        let a = _mm512_setr_ps(
51307            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51308        );
51309        let m = 0b11101000_11001010;
51310        _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
51311        let e = _mm512_setr_ps(
51312            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51313            16.0,
51314        );
51315        assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
51316    }
51317
51318    #[simd_test(enable = "avx512f")]
51319    unsafe fn test_mm512_mask_store_ps() {
51320        #[repr(align(64))]
51321        struct Align {
51322            data: [f32; 16],
51323        }
51324        let mut r = Align { data: [42.0; 16] };
51325        let a = _mm512_setr_ps(
51326            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51327        );
51328        let m = 0b11101000_11001010;
51329        _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
51330        let e = _mm512_setr_ps(
51331            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51332            16.0,
51333        );
51334        assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
51335    }
51336
51337    #[simd_test(enable = "avx512f")]
51338    unsafe fn test_mm512_mask_loadu_pd() {
51339        let src = _mm512_set1_pd(42.0);
51340        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51341        let p = a.as_ptr();
51342        let m = 0b11001010;
51343        let r = _mm512_mask_loadu_pd(src, m, black_box(p));
51344        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51345        assert_eq_m512d(r, e);
51346    }
51347
51348    #[simd_test(enable = "avx512f")]
51349    unsafe fn test_mm512_maskz_loadu_pd() {
51350        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51351        let p = a.as_ptr();
51352        let m = 0b11001010;
51353        let r = _mm512_maskz_loadu_pd(m, black_box(p));
51354        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51355        assert_eq_m512d(r, e);
51356    }
51357
51358    #[simd_test(enable = "avx512f")]
51359    unsafe fn test_mm512_mask_load_pd() {
51360        #[repr(align(64))]
51361        struct Align {
51362            data: [f64; 8], // 64 bytes
51363        }
51364        let src = _mm512_set1_pd(42.0);
51365        let a = Align {
51366            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51367        };
51368        let p = a.data.as_ptr();
51369        let m = 0b11001010;
51370        let r = _mm512_mask_load_pd(src, m, black_box(p));
51371        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51372        assert_eq_m512d(r, e);
51373    }
51374
51375    #[simd_test(enable = "avx512f")]
51376    unsafe fn test_mm512_maskz_load_pd() {
51377        #[repr(align(64))]
51378        struct Align {
51379            data: [f64; 8], // 64 bytes
51380        }
51381        let a = Align {
51382            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51383        };
51384        let p = a.data.as_ptr();
51385        let m = 0b11001010;
51386        let r = _mm512_maskz_load_pd(m, black_box(p));
51387        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51388        assert_eq_m512d(r, e);
51389    }
51390
51391    #[simd_test(enable = "avx512f")]
51392    unsafe fn test_mm512_mask_storeu_pd() {
51393        let mut r = [42_f64; 8];
51394        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51395        let m = 0b11001010;
51396        _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
51397        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51398        assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
51399    }
51400
51401    #[simd_test(enable = "avx512f")]
51402    unsafe fn test_mm512_mask_store_pd() {
51403        #[repr(align(64))]
51404        struct Align {
51405            data: [f64; 8],
51406        }
51407        let mut r = Align { data: [42.0; 8] };
51408        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51409        let m = 0b11001010;
51410        _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
51411        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51412        assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
51413    }
51414
51415    #[simd_test(enable = "avx512f,avx512vl")]
51416    unsafe fn test_mm256_mask_loadu_epi32() {
51417        let src = _mm256_set1_epi32(42);
51418        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51419        let p = a.as_ptr();
51420        let m = 0b11001010;
51421        let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
51422        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51423        assert_eq_m256i(r, e);
51424    }
51425
51426    #[simd_test(enable = "avx512f,avx512vl")]
51427    unsafe fn test_mm256_maskz_loadu_epi32() {
51428        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51429        let p = a.as_ptr();
51430        let m = 0b11001010;
51431        let r = _mm256_maskz_loadu_epi32(m, black_box(p));
51432        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51433        assert_eq_m256i(r, e);
51434    }
51435
51436    #[simd_test(enable = "avx512f,avx512vl")]
51437    unsafe fn test_mm256_mask_load_epi32() {
51438        #[repr(align(32))]
51439        struct Align {
51440            data: [i32; 8], // 32 bytes
51441        }
51442        let src = _mm256_set1_epi32(42);
51443        let a = Align {
51444            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51445        };
51446        let p = a.data.as_ptr();
51447        let m = 0b11001010;
51448        let r = _mm256_mask_load_epi32(src, m, black_box(p));
51449        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51450        assert_eq_m256i(r, e);
51451    }
51452
51453    #[simd_test(enable = "avx512f,avx512vl")]
51454    unsafe fn test_mm256_maskz_load_epi32() {
51455        #[repr(align(32))]
51456        struct Align {
51457            data: [i32; 8], // 32 bytes
51458        }
51459        let a = Align {
51460            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51461        };
51462        let p = a.data.as_ptr();
51463        let m = 0b11001010;
51464        let r = _mm256_maskz_load_epi32(m, black_box(p));
51465        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51466        assert_eq_m256i(r, e);
51467    }
51468
51469    #[simd_test(enable = "avx512f,avx512vl")]
51470    unsafe fn test_mm256_mask_storeu_epi32() {
51471        let mut r = [42_i32; 8];
51472        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51473        let m = 0b11001010;
51474        _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51475        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51476        assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
51477    }
51478
51479    #[simd_test(enable = "avx512f,avx512vl")]
51480    unsafe fn test_mm256_mask_store_epi32() {
51481        #[repr(align(64))]
51482        struct Align {
51483            data: [i32; 8],
51484        }
51485        let mut r = Align { data: [42; 8] };
51486        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51487        let m = 0b11001010;
51488        _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51489        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51490        assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
51491    }
51492
51493    #[simd_test(enable = "avx512f,avx512vl")]
51494    unsafe fn test_mm256_mask_loadu_epi64() {
51495        let src = _mm256_set1_epi64x(42);
51496        let a = &[1_i64, 2, 3, 4];
51497        let p = a.as_ptr();
51498        let m = 0b1010;
51499        let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
51500        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51501        assert_eq_m256i(r, e);
51502    }
51503
51504    #[simd_test(enable = "avx512f,avx512vl")]
51505    unsafe fn test_mm256_maskz_loadu_epi64() {
51506        let a = &[1_i64, 2, 3, 4];
51507        let p = a.as_ptr();
51508        let m = 0b1010;
51509        let r = _mm256_maskz_loadu_epi64(m, black_box(p));
51510        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51511        assert_eq_m256i(r, e);
51512    }
51513
51514    #[simd_test(enable = "avx512f,avx512vl")]
51515    unsafe fn test_mm256_mask_load_epi64() {
51516        #[repr(align(32))]
51517        struct Align {
51518            data: [i64; 4], // 32 bytes
51519        }
51520        let src = _mm256_set1_epi64x(42);
51521        let a = Align {
51522            data: [1_i64, 2, 3, 4],
51523        };
51524        let p = a.data.as_ptr();
51525        let m = 0b1010;
51526        let r = _mm256_mask_load_epi64(src, m, black_box(p));
51527        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51528        assert_eq_m256i(r, e);
51529    }
51530
51531    #[simd_test(enable = "avx512f,avx512vl")]
51532    unsafe fn test_mm256_maskz_load_epi64() {
51533        #[repr(align(32))]
51534        struct Align {
51535            data: [i64; 4], // 32 bytes
51536        }
51537        let a = Align {
51538            data: [1_i64, 2, 3, 4],
51539        };
51540        let p = a.data.as_ptr();
51541        let m = 0b1010;
51542        let r = _mm256_maskz_load_epi64(m, black_box(p));
51543        let e = _mm256_setr_epi64x(0, 2, 0, 4);
51544        assert_eq_m256i(r, e);
51545    }
51546
51547    #[simd_test(enable = "avx512f,avx512vl")]
51548    unsafe fn test_mm256_mask_storeu_epi64() {
51549        let mut r = [42_i64; 4];
51550        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51551        let m = 0b1010;
51552        _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51553        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51554        assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
51555    }
51556
51557    #[simd_test(enable = "avx512f,avx512vl")]
51558    unsafe fn test_mm256_mask_store_epi64() {
51559        #[repr(align(32))]
51560        struct Align {
51561            data: [i64; 4],
51562        }
51563        let mut r = Align { data: [42; 4] };
51564        let a = _mm256_setr_epi64x(1, 2, 3, 4);
51565        let m = 0b1010;
51566        _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51567        let e = _mm256_setr_epi64x(42, 2, 42, 4);
51568        assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
51569    }
51570
51571    #[simd_test(enable = "avx512f,avx512vl")]
51572    unsafe fn test_mm256_mask_loadu_ps() {
51573        let src = _mm256_set1_ps(42.0);
51574        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51575        let p = a.as_ptr();
51576        let m = 0b11001010;
51577        let r = _mm256_mask_loadu_ps(src, m, black_box(p));
51578        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51579        assert_eq_m256(r, e);
51580    }
51581
51582    #[simd_test(enable = "avx512f,avx512vl")]
51583    unsafe fn test_mm256_maskz_loadu_ps() {
51584        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51585        let p = a.as_ptr();
51586        let m = 0b11001010;
51587        let r = _mm256_maskz_loadu_ps(m, black_box(p));
51588        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51589        assert_eq_m256(r, e);
51590    }
51591
51592    #[simd_test(enable = "avx512f,avx512vl")]
51593    unsafe fn test_mm256_mask_load_ps() {
51594        #[repr(align(32))]
51595        struct Align {
51596            data: [f32; 8], // 32 bytes
51597        }
51598        let src = _mm256_set1_ps(42.0);
51599        let a = Align {
51600            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51601        };
51602        let p = a.data.as_ptr();
51603        let m = 0b11001010;
51604        let r = _mm256_mask_load_ps(src, m, black_box(p));
51605        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51606        assert_eq_m256(r, e);
51607    }
51608
51609    #[simd_test(enable = "avx512f,avx512vl")]
51610    unsafe fn test_mm256_maskz_load_ps() {
51611        #[repr(align(32))]
51612        struct Align {
51613            data: [f32; 8], // 32 bytes
51614        }
51615        let a = Align {
51616            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51617        };
51618        let p = a.data.as_ptr();
51619        let m = 0b11001010;
51620        let r = _mm256_maskz_load_ps(m, black_box(p));
51621        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51622        assert_eq_m256(r, e);
51623    }
51624
51625    #[simd_test(enable = "avx512f,avx512vl")]
51626    unsafe fn test_mm256_mask_storeu_ps() {
51627        let mut r = [42_f32; 8];
51628        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51629        let m = 0b11001010;
51630        _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
51631        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51632        assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
51633    }
51634
51635    #[simd_test(enable = "avx512f,avx512vl")]
51636    unsafe fn test_mm256_mask_store_ps() {
51637        #[repr(align(32))]
51638        struct Align {
51639            data: [f32; 8],
51640        }
51641        let mut r = Align { data: [42.0; 8] };
51642        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51643        let m = 0b11001010;
51644        _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
51645        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51646        assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
51647    }
51648
51649    #[simd_test(enable = "avx512f,avx512vl")]
51650    unsafe fn test_mm256_mask_loadu_pd() {
51651        let src = _mm256_set1_pd(42.0);
51652        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51653        let p = a.as_ptr();
51654        let m = 0b1010;
51655        let r = _mm256_mask_loadu_pd(src, m, black_box(p));
51656        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51657        assert_eq_m256d(r, e);
51658    }
51659
51660    #[simd_test(enable = "avx512f,avx512vl")]
51661    unsafe fn test_mm256_maskz_loadu_pd() {
51662        let a = &[1.0_f64, 2.0, 3.0, 4.0];
51663        let p = a.as_ptr();
51664        let m = 0b1010;
51665        let r = _mm256_maskz_loadu_pd(m, black_box(p));
51666        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51667        assert_eq_m256d(r, e);
51668    }
51669
51670    #[simd_test(enable = "avx512f,avx512vl")]
51671    unsafe fn test_mm256_mask_load_pd() {
51672        #[repr(align(32))]
51673        struct Align {
51674            data: [f64; 4], // 32 bytes
51675        }
51676        let src = _mm256_set1_pd(42.0);
51677        let a = Align {
51678            data: [1.0_f64, 2.0, 3.0, 4.0],
51679        };
51680        let p = a.data.as_ptr();
51681        let m = 0b1010;
51682        let r = _mm256_mask_load_pd(src, m, black_box(p));
51683        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51684        assert_eq_m256d(r, e);
51685    }
51686
51687    #[simd_test(enable = "avx512f,avx512vl")]
51688    unsafe fn test_mm256_maskz_load_pd() {
51689        #[repr(align(32))]
51690        struct Align {
51691            data: [f64; 4], // 32 bytes
51692        }
51693        let a = Align {
51694            data: [1.0_f64, 2.0, 3.0, 4.0],
51695        };
51696        let p = a.data.as_ptr();
51697        let m = 0b1010;
51698        let r = _mm256_maskz_load_pd(m, black_box(p));
51699        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51700        assert_eq_m256d(r, e);
51701    }
51702
51703    #[simd_test(enable = "avx512f,avx512vl")]
51704    unsafe fn test_mm256_mask_storeu_pd() {
51705        let mut r = [42_f64; 4];
51706        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51707        let m = 0b1010;
51708        _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
51709        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51710        assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
51711    }
51712
51713    #[simd_test(enable = "avx512f,avx512vl")]
51714    unsafe fn test_mm256_mask_store_pd() {
51715        #[repr(align(32))]
51716        struct Align {
51717            data: [f64; 4],
51718        }
51719        let mut r = Align { data: [42.0; 4] };
51720        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51721        let m = 0b1010;
51722        _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
51723        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51724        assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
51725    }
51726
51727    #[simd_test(enable = "avx512f,avx512vl")]
51728    unsafe fn test_mm_mask_loadu_epi32() {
51729        let src = _mm_set1_epi32(42);
51730        let a = &[1_i32, 2, 3, 4];
51731        let p = a.as_ptr();
51732        let m = 0b1010;
51733        let r = _mm_mask_loadu_epi32(src, m, black_box(p));
51734        let e = _mm_setr_epi32(42, 2, 42, 4);
51735        assert_eq_m128i(r, e);
51736    }
51737
51738    #[simd_test(enable = "avx512f,avx512vl")]
51739    unsafe fn test_mm_maskz_loadu_epi32() {
51740        let a = &[1_i32, 2, 3, 4];
51741        let p = a.as_ptr();
51742        let m = 0b1010;
51743        let r = _mm_maskz_loadu_epi32(m, black_box(p));
51744        let e = _mm_setr_epi32(0, 2, 0, 4);
51745        assert_eq_m128i(r, e);
51746    }
51747
51748    #[simd_test(enable = "avx512f,avx512vl")]
51749    unsafe fn test_mm_mask_load_epi32() {
51750        #[repr(align(16))]
51751        struct Align {
51752            data: [i32; 4], // 32 bytes
51753        }
51754        let src = _mm_set1_epi32(42);
51755        let a = Align {
51756            data: [1_i32, 2, 3, 4],
51757        };
51758        let p = a.data.as_ptr();
51759        let m = 0b1010;
51760        let r = _mm_mask_load_epi32(src, m, black_box(p));
51761        let e = _mm_setr_epi32(42, 2, 42, 4);
51762        assert_eq_m128i(r, e);
51763    }
51764
51765    #[simd_test(enable = "avx512f,avx512vl")]
51766    unsafe fn test_mm_maskz_load_epi32() {
51767        #[repr(align(16))]
51768        struct Align {
51769            data: [i32; 4], // 16 bytes
51770        }
51771        let a = Align {
51772            data: [1_i32, 2, 3, 4],
51773        };
51774        let p = a.data.as_ptr();
51775        let m = 0b1010;
51776        let r = _mm_maskz_load_epi32(m, black_box(p));
51777        let e = _mm_setr_epi32(0, 2, 0, 4);
51778        assert_eq_m128i(r, e);
51779    }
51780
51781    #[simd_test(enable = "avx512f,avx512vl")]
51782    unsafe fn test_mm_mask_storeu_epi32() {
51783        let mut r = [42_i32; 4];
51784        let a = _mm_setr_epi32(1, 2, 3, 4);
51785        let m = 0b1010;
51786        _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51787        let e = _mm_setr_epi32(42, 2, 42, 4);
51788        assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
51789    }
51790
51791    #[simd_test(enable = "avx512f,avx512vl")]
51792    unsafe fn test_mm_mask_store_epi32() {
51793        #[repr(align(16))]
51794        struct Align {
51795            data: [i32; 4], // 16 bytes
51796        }
51797        let mut r = Align { data: [42; 4] };
51798        let a = _mm_setr_epi32(1, 2, 3, 4);
51799        let m = 0b1010;
51800        _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51801        let e = _mm_setr_epi32(42, 2, 42, 4);
51802        assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
51803    }
51804
51805    #[simd_test(enable = "avx512f,avx512vl")]
51806    unsafe fn test_mm_mask_loadu_epi64() {
51807        let src = _mm_set1_epi64x(42);
51808        let a = &[1_i64, 2];
51809        let p = a.as_ptr();
51810        let m = 0b10;
51811        let r = _mm_mask_loadu_epi64(src, m, black_box(p));
51812        let e = _mm_setr_epi64x(42, 2);
51813        assert_eq_m128i(r, e);
51814    }
51815
51816    #[simd_test(enable = "avx512f,avx512vl")]
51817    unsafe fn test_mm_maskz_loadu_epi64() {
51818        let a = &[1_i64, 2];
51819        let p = a.as_ptr();
51820        let m = 0b10;
51821        let r = _mm_maskz_loadu_epi64(m, black_box(p));
51822        let e = _mm_setr_epi64x(0, 2);
51823        assert_eq_m128i(r, e);
51824    }
51825
51826    #[simd_test(enable = "avx512f,avx512vl")]
51827    unsafe fn test_mm_mask_load_epi64() {
51828        #[repr(align(16))]
51829        struct Align {
51830            data: [i64; 2], // 16 bytes
51831        }
51832        let src = _mm_set1_epi64x(42);
51833        let a = Align { data: [1_i64, 2] };
51834        let p = a.data.as_ptr();
51835        let m = 0b10;
51836        let r = _mm_mask_load_epi64(src, m, black_box(p));
51837        let e = _mm_setr_epi64x(42, 2);
51838        assert_eq_m128i(r, e);
51839    }
51840
51841    #[simd_test(enable = "avx512f,avx512vl")]
51842    unsafe fn test_mm_maskz_load_epi64() {
51843        #[repr(align(16))]
51844        struct Align {
51845            data: [i64; 2], // 16 bytes
51846        }
51847        let a = Align { data: [1_i64, 2] };
51848        let p = a.data.as_ptr();
51849        let m = 0b10;
51850        let r = _mm_maskz_load_epi64(m, black_box(p));
51851        let e = _mm_setr_epi64x(0, 2);
51852        assert_eq_m128i(r, e);
51853    }
51854
51855    #[simd_test(enable = "avx512f,avx512vl")]
51856    unsafe fn test_mm_mask_storeu_epi64() {
51857        let mut r = [42_i64; 2];
51858        let a = _mm_setr_epi64x(1, 2);
51859        let m = 0b10;
51860        _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51861        let e = _mm_setr_epi64x(42, 2);
51862        assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
51863    }
51864
51865    #[simd_test(enable = "avx512f,avx512vl")]
51866    unsafe fn test_mm_mask_store_epi64() {
51867        #[repr(align(16))]
51868        struct Align {
51869            data: [i64; 2], // 16 bytes
51870        }
51871        let mut r = Align { data: [42; 2] };
51872        let a = _mm_setr_epi64x(1, 2);
51873        let m = 0b10;
51874        _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51875        let e = _mm_setr_epi64x(42, 2);
51876        assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
51877    }
51878
51879    #[simd_test(enable = "avx512f,avx512vl")]
51880    unsafe fn test_mm_mask_loadu_ps() {
51881        let src = _mm_set1_ps(42.0);
51882        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51883        let p = a.as_ptr();
51884        let m = 0b1010;
51885        let r = _mm_mask_loadu_ps(src, m, black_box(p));
51886        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51887        assert_eq_m128(r, e);
51888    }
51889
51890    #[simd_test(enable = "avx512f,avx512vl")]
51891    unsafe fn test_mm_maskz_loadu_ps() {
51892        let a = &[1.0_f32, 2.0, 3.0, 4.0];
51893        let p = a.as_ptr();
51894        let m = 0b1010;
51895        let r = _mm_maskz_loadu_ps(m, black_box(p));
51896        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51897        assert_eq_m128(r, e);
51898    }
51899
51900    #[simd_test(enable = "avx512f,avx512vl")]
51901    unsafe fn test_mm_mask_load_ps() {
51902        #[repr(align(16))]
51903        struct Align {
51904            data: [f32; 4], // 16 bytes
51905        }
51906        let src = _mm_set1_ps(42.0);
51907        let a = Align {
51908            data: [1.0_f32, 2.0, 3.0, 4.0],
51909        };
51910        let p = a.data.as_ptr();
51911        let m = 0b1010;
51912        let r = _mm_mask_load_ps(src, m, black_box(p));
51913        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51914        assert_eq_m128(r, e);
51915    }
51916
51917    #[simd_test(enable = "avx512f,avx512vl")]
51918    unsafe fn test_mm_maskz_load_ps() {
51919        #[repr(align(16))]
51920        struct Align {
51921            data: [f32; 4], // 16 bytes
51922        }
51923        let a = Align {
51924            data: [1.0_f32, 2.0, 3.0, 4.0],
51925        };
51926        let p = a.data.as_ptr();
51927        let m = 0b1010;
51928        let r = _mm_maskz_load_ps(m, black_box(p));
51929        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51930        assert_eq_m128(r, e);
51931    }
51932
51933    #[simd_test(enable = "avx512f,avx512vl")]
51934    unsafe fn test_mm_mask_storeu_ps() {
51935        let mut r = [42_f32; 4];
51936        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51937        let m = 0b1010;
51938        _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
51939        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51940        assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
51941    }
51942
51943    #[simd_test(enable = "avx512f,avx512vl")]
51944    unsafe fn test_mm_mask_store_ps() {
51945        #[repr(align(16))]
51946        struct Align {
51947            data: [f32; 4], // 16 bytes
51948        }
51949        let mut r = Align { data: [42.0; 4] };
51950        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51951        let m = 0b1010;
51952        _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
51953        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51954        assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
51955    }
51956
51957    #[simd_test(enable = "avx512f,avx512vl")]
51958    unsafe fn test_mm_mask_loadu_pd() {
51959        let src = _mm_set1_pd(42.0);
51960        let a = &[1.0_f64, 2.0];
51961        let p = a.as_ptr();
51962        let m = 0b10;
51963        let r = _mm_mask_loadu_pd(src, m, black_box(p));
51964        let e = _mm_setr_pd(42.0, 2.0);
51965        assert_eq_m128d(r, e);
51966    }
51967
51968    #[simd_test(enable = "avx512f,avx512vl")]
51969    unsafe fn test_mm_maskz_loadu_pd() {
51970        let a = &[1.0_f64, 2.0];
51971        let p = a.as_ptr();
51972        let m = 0b10;
51973        let r = _mm_maskz_loadu_pd(m, black_box(p));
51974        let e = _mm_setr_pd(0.0, 2.0);
51975        assert_eq_m128d(r, e);
51976    }
51977
51978    #[simd_test(enable = "avx512f,avx512vl")]
51979    unsafe fn test_mm_mask_load_pd() {
51980        #[repr(align(16))]
51981        struct Align {
51982            data: [f64; 2], // 16 bytes
51983        }
51984        let src = _mm_set1_pd(42.0);
51985        let a = Align {
51986            data: [1.0_f64, 2.0],
51987        };
51988        let p = a.data.as_ptr();
51989        let m = 0b10;
51990        let r = _mm_mask_load_pd(src, m, black_box(p));
51991        let e = _mm_setr_pd(42.0, 2.0);
51992        assert_eq_m128d(r, e);
51993    }
51994
51995    #[simd_test(enable = "avx512f,avx512vl")]
51996    unsafe fn test_mm_maskz_load_pd() {
51997        #[repr(align(16))]
51998        struct Align {
51999            data: [f64; 2], // 16 bytes
52000        }
52001        let a = Align {
52002            data: [1.0_f64, 2.0],
52003        };
52004        let p = a.data.as_ptr();
52005        let m = 0b10;
52006        let r = _mm_maskz_load_pd(m, black_box(p));
52007        let e = _mm_setr_pd(0.0, 2.0);
52008        assert_eq_m128d(r, e);
52009    }
52010
52011    #[simd_test(enable = "avx512f")]
52012    unsafe fn test_mm_mask_load_ss() {
52013        #[repr(align(16))]
52014        struct Align {
52015            data: f32,
52016        }
52017        let src = _mm_set_ss(2.0);
52018        let mem = Align { data: 1.0 };
52019        let r = _mm_mask_load_ss(src, 0b1, &mem.data);
52020        assert_eq_m128(r, _mm_set_ss(1.0));
52021        let r = _mm_mask_load_ss(src, 0b0, &mem.data);
52022        assert_eq_m128(r, _mm_set_ss(2.0));
52023    }
52024
52025    #[simd_test(enable = "avx512f")]
52026    unsafe fn test_mm_maskz_load_ss() {
52027        #[repr(align(16))]
52028        struct Align {
52029            data: f32,
52030        }
52031        let mem = Align { data: 1.0 };
52032        let r = _mm_maskz_load_ss(0b1, &mem.data);
52033        assert_eq_m128(r, _mm_set_ss(1.0));
52034        let r = _mm_maskz_load_ss(0b0, &mem.data);
52035        assert_eq_m128(r, _mm_set_ss(0.0));
52036    }
52037
52038    #[simd_test(enable = "avx512f")]
52039    unsafe fn test_mm_mask_load_sd() {
52040        #[repr(align(16))]
52041        struct Align {
52042            data: f64,
52043        }
52044        let src = _mm_set_sd(2.0);
52045        let mem = Align { data: 1.0 };
52046        let r = _mm_mask_load_sd(src, 0b1, &mem.data);
52047        assert_eq_m128d(r, _mm_set_sd(1.0));
52048        let r = _mm_mask_load_sd(src, 0b0, &mem.data);
52049        assert_eq_m128d(r, _mm_set_sd(2.0));
52050    }
52051
52052    #[simd_test(enable = "avx512f")]
52053    unsafe fn test_mm_maskz_load_sd() {
52054        #[repr(align(16))]
52055        struct Align {
52056            data: f64,
52057        }
52058        let mem = Align { data: 1.0 };
52059        let r = _mm_maskz_load_sd(0b1, &mem.data);
52060        assert_eq_m128d(r, _mm_set_sd(1.0));
52061        let r = _mm_maskz_load_sd(0b0, &mem.data);
52062        assert_eq_m128d(r, _mm_set_sd(0.0));
52063    }
52064
52065    #[simd_test(enable = "avx512f,avx512vl")]
52066    unsafe fn test_mm_mask_storeu_pd() {
52067        let mut r = [42_f64; 2];
52068        let a = _mm_setr_pd(1.0, 2.0);
52069        let m = 0b10;
52070        _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
52071        let e = _mm_setr_pd(42.0, 2.0);
52072        assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
52073    }
52074
52075    #[simd_test(enable = "avx512f,avx512vl")]
52076    unsafe fn test_mm_mask_store_pd() {
52077        #[repr(align(16))]
52078        struct Align {
52079            data: [f64; 2], // 16 bytes
52080        }
52081        let mut r = Align { data: [42.0; 2] };
52082        let a = _mm_setr_pd(1.0, 2.0);
52083        let m = 0b10;
52084        _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
52085        let e = _mm_setr_pd(42.0, 2.0);
52086        assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
52087    }
52088
52089    #[simd_test(enable = "avx512f")]
52090    unsafe fn test_mm_mask_store_ss() {
52091        #[repr(align(16))]
52092        struct Align {
52093            data: f32,
52094        }
52095        let a = _mm_set_ss(2.0);
52096        let mut mem = Align { data: 1.0 };
52097        _mm_mask_store_ss(&mut mem.data, 0b1, a);
52098        assert_eq!(mem.data, 2.0);
52099        _mm_mask_store_ss(&mut mem.data, 0b0, a);
52100        assert_eq!(mem.data, 2.0);
52101    }
52102
52103    #[simd_test(enable = "avx512f")]
52104    unsafe fn test_mm_mask_store_sd() {
52105        #[repr(align(16))]
52106        struct Align {
52107            data: f64,
52108        }
52109        let a = _mm_set_sd(2.0);
52110        let mut mem = Align { data: 1.0 };
52111        _mm_mask_store_sd(&mut mem.data, 0b1, a);
52112        assert_eq!(mem.data, 2.0);
52113        _mm_mask_store_sd(&mut mem.data, 0b0, a);
52114        assert_eq!(mem.data, 2.0);
52115    }
52116
52117    #[simd_test(enable = "avx512f")]
52118    unsafe fn test_mm512_setr_pd() {
52119        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52120        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52121    }
52122
52123    #[simd_test(enable = "avx512f")]
52124    unsafe fn test_mm512_set_pd() {
52125        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52126        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52127    }
52128
52129    #[simd_test(enable = "avx512f")]
52130    unsafe fn test_mm512_rol_epi32() {
52131        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52132        let r = _mm512_rol_epi32::<1>(a);
52133        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52134        assert_eq_m512i(r, e);
52135    }
52136
52137    #[simd_test(enable = "avx512f")]
52138    unsafe fn test_mm512_mask_rol_epi32() {
52139        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52140        let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
52141        assert_eq_m512i(r, a);
52142        let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
52143        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52144        assert_eq_m512i(r, e);
52145    }
52146
52147    #[simd_test(enable = "avx512f")]
52148    unsafe fn test_mm512_maskz_rol_epi32() {
52149        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52150        let r = _mm512_maskz_rol_epi32::<1>(0, a);
52151        assert_eq_m512i(r, _mm512_setzero_si512());
52152        let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
52153        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52154        assert_eq_m512i(r, e);
52155    }
52156
52157    #[simd_test(enable = "avx512f,avx512vl")]
52158    unsafe fn test_mm256_rol_epi32() {
52159        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52160        let r = _mm256_rol_epi32::<1>(a);
52161        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52162        assert_eq_m256i(r, e);
52163    }
52164
52165    #[simd_test(enable = "avx512f,avx512vl")]
52166    unsafe fn test_mm256_mask_rol_epi32() {
52167        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52168        let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
52169        assert_eq_m256i(r, a);
52170        let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
52171        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52172        assert_eq_m256i(r, e);
52173    }
52174
52175    #[simd_test(enable = "avx512f,avx512vl")]
52176    unsafe fn test_mm256_maskz_rol_epi32() {
52177        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52178        let r = _mm256_maskz_rol_epi32::<1>(0, a);
52179        assert_eq_m256i(r, _mm256_setzero_si256());
52180        let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
52181        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52182        assert_eq_m256i(r, e);
52183    }
52184
52185    #[simd_test(enable = "avx512f,avx512vl")]
52186    unsafe fn test_mm_rol_epi32() {
52187        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52188        let r = _mm_rol_epi32::<1>(a);
52189        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52190        assert_eq_m128i(r, e);
52191    }
52192
52193    #[simd_test(enable = "avx512f,avx512vl")]
52194    unsafe fn test_mm_mask_rol_epi32() {
52195        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52196        let r = _mm_mask_rol_epi32::<1>(a, 0, a);
52197        assert_eq_m128i(r, a);
52198        let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
52199        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52200        assert_eq_m128i(r, e);
52201    }
52202
52203    #[simd_test(enable = "avx512f,avx512vl")]
52204    unsafe fn test_mm_maskz_rol_epi32() {
52205        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52206        let r = _mm_maskz_rol_epi32::<1>(0, a);
52207        assert_eq_m128i(r, _mm_setzero_si128());
52208        let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
52209        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52210        assert_eq_m128i(r, e);
52211    }
52212
52213    #[simd_test(enable = "avx512f")]
52214    unsafe fn test_mm512_ror_epi32() {
52215        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52216        let r = _mm512_ror_epi32::<1>(a);
52217        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52218        assert_eq_m512i(r, e);
52219    }
52220
52221    #[simd_test(enable = "avx512f")]
52222    unsafe fn test_mm512_mask_ror_epi32() {
52223        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52224        let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
52225        assert_eq_m512i(r, a);
52226        let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
52227        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52228        assert_eq_m512i(r, e);
52229    }
52230
52231    #[simd_test(enable = "avx512f")]
52232    unsafe fn test_mm512_maskz_ror_epi32() {
52233        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52234        let r = _mm512_maskz_ror_epi32::<1>(0, a);
52235        assert_eq_m512i(r, _mm512_setzero_si512());
52236        let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
52237        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52238        assert_eq_m512i(r, e);
52239    }
52240
52241    #[simd_test(enable = "avx512f,avx512vl")]
52242    unsafe fn test_mm256_ror_epi32() {
52243        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52244        let r = _mm256_ror_epi32::<1>(a);
52245        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52246        assert_eq_m256i(r, e);
52247    }
52248
52249    #[simd_test(enable = "avx512f,avx512vl")]
52250    unsafe fn test_mm256_mask_ror_epi32() {
52251        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52252        let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
52253        assert_eq_m256i(r, a);
52254        let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
52255        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52256        assert_eq_m256i(r, e);
52257    }
52258
52259    #[simd_test(enable = "avx512f,avx512vl")]
52260    unsafe fn test_mm256_maskz_ror_epi32() {
52261        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52262        let r = _mm256_maskz_ror_epi32::<1>(0, a);
52263        assert_eq_m256i(r, _mm256_setzero_si256());
52264        let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
52265        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52266        assert_eq_m256i(r, e);
52267    }
52268
52269    #[simd_test(enable = "avx512f,avx512vl")]
52270    unsafe fn test_mm_ror_epi32() {
52271        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52272        let r = _mm_ror_epi32::<1>(a);
52273        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52274        assert_eq_m128i(r, e);
52275    }
52276
52277    #[simd_test(enable = "avx512f,avx512vl")]
52278    unsafe fn test_mm_mask_ror_epi32() {
52279        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52280        let r = _mm_mask_ror_epi32::<1>(a, 0, a);
52281        assert_eq_m128i(r, a);
52282        let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
52283        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52284        assert_eq_m128i(r, e);
52285    }
52286
52287    #[simd_test(enable = "avx512f,avx512vl")]
52288    unsafe fn test_mm_maskz_ror_epi32() {
52289        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52290        let r = _mm_maskz_ror_epi32::<1>(0, a);
52291        assert_eq_m128i(r, _mm_setzero_si128());
52292        let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
52293        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52294        assert_eq_m128i(r, e);
52295    }
52296
52297    #[simd_test(enable = "avx512f")]
52298    unsafe fn test_mm512_slli_epi32() {
52299        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52300        let r = _mm512_slli_epi32::<1>(a);
52301        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52302        assert_eq_m512i(r, e);
52303    }
52304
52305    #[simd_test(enable = "avx512f")]
52306    unsafe fn test_mm512_mask_slli_epi32() {
52307        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52308        let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
52309        assert_eq_m512i(r, a);
52310        let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
52311        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52312        assert_eq_m512i(r, e);
52313    }
52314
52315    #[simd_test(enable = "avx512f")]
52316    unsafe fn test_mm512_maskz_slli_epi32() {
52317        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52318        let r = _mm512_maskz_slli_epi32::<1>(0, a);
52319        assert_eq_m512i(r, _mm512_setzero_si512());
52320        let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
52321        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52322        assert_eq_m512i(r, e);
52323    }
52324
52325    #[simd_test(enable = "avx512f,avx512vl")]
52326    unsafe fn test_mm256_mask_slli_epi32() {
52327        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52328        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
52329        assert_eq_m256i(r, a);
52330        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
52331        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52332        assert_eq_m256i(r, e);
52333    }
52334
52335    #[simd_test(enable = "avx512f,avx512vl")]
52336    unsafe fn test_mm256_maskz_slli_epi32() {
52337        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52338        let r = _mm256_maskz_slli_epi32::<1>(0, a);
52339        assert_eq_m256i(r, _mm256_setzero_si256());
52340        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
52341        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52342        assert_eq_m256i(r, e);
52343    }
52344
52345    #[simd_test(enable = "avx512f,avx512vl")]
52346    unsafe fn test_mm_mask_slli_epi32() {
52347        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52348        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
52349        assert_eq_m128i(r, a);
52350        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
52351        let e = _mm_set_epi32(0, 2, 2, 2);
52352        assert_eq_m128i(r, e);
52353    }
52354
52355    #[simd_test(enable = "avx512f,avx512vl")]
52356    unsafe fn test_mm_maskz_slli_epi32() {
52357        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52358        let r = _mm_maskz_slli_epi32::<1>(0, a);
52359        assert_eq_m128i(r, _mm_setzero_si128());
52360        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
52361        let e = _mm_set_epi32(0, 2, 2, 2);
52362        assert_eq_m128i(r, e);
52363    }
52364
52365    #[simd_test(enable = "avx512f")]
52366    unsafe fn test_mm512_srli_epi32() {
52367        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52368        let r = _mm512_srli_epi32::<1>(a);
52369        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52370        assert_eq_m512i(r, e);
52371    }
52372
52373    #[simd_test(enable = "avx512f")]
52374    unsafe fn test_mm512_mask_srli_epi32() {
52375        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52376        let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
52377        assert_eq_m512i(r, a);
52378        let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
52379        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52380        assert_eq_m512i(r, e);
52381    }
52382
52383    #[simd_test(enable = "avx512f")]
52384    unsafe fn test_mm512_maskz_srli_epi32() {
52385        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52386        let r = _mm512_maskz_srli_epi32::<1>(0, a);
52387        assert_eq_m512i(r, _mm512_setzero_si512());
52388        let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
52389        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
52390        assert_eq_m512i(r, e);
52391    }
52392
52393    #[simd_test(enable = "avx512f,avx512vl")]
52394    unsafe fn test_mm256_mask_srli_epi32() {
52395        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52396        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
52397        assert_eq_m256i(r, a);
52398        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
52399        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52400        assert_eq_m256i(r, e);
52401    }
52402
52403    #[simd_test(enable = "avx512f,avx512vl")]
52404    unsafe fn test_mm256_maskz_srli_epi32() {
52405        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52406        let r = _mm256_maskz_srli_epi32::<1>(0, a);
52407        assert_eq_m256i(r, _mm256_setzero_si256());
52408        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
52409        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52410        assert_eq_m256i(r, e);
52411    }
52412
52413    #[simd_test(enable = "avx512f,avx512vl")]
52414    unsafe fn test_mm_mask_srli_epi32() {
52415        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52416        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
52417        assert_eq_m128i(r, a);
52418        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
52419        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52420        assert_eq_m128i(r, e);
52421    }
52422
52423    #[simd_test(enable = "avx512f,avx512vl")]
52424    unsafe fn test_mm_maskz_srli_epi32() {
52425        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52426        let r = _mm_maskz_srli_epi32::<1>(0, a);
52427        assert_eq_m128i(r, _mm_setzero_si128());
52428        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
52429        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52430        assert_eq_m128i(r, e);
52431    }
52432
52433    #[simd_test(enable = "avx512f")]
52434    unsafe fn test_mm512_rolv_epi32() {
52435        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52436        let b = _mm512_set1_epi32(1);
52437        let r = _mm512_rolv_epi32(a, b);
52438        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52439        assert_eq_m512i(r, e);
52440    }
52441
52442    #[simd_test(enable = "avx512f")]
52443    unsafe fn test_mm512_mask_rolv_epi32() {
52444        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52445        let b = _mm512_set1_epi32(1);
52446        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
52447        assert_eq_m512i(r, a);
52448        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
52449        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52450        assert_eq_m512i(r, e);
52451    }
52452
52453    #[simd_test(enable = "avx512f")]
52454    unsafe fn test_mm512_maskz_rolv_epi32() {
52455        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52456        let b = _mm512_set1_epi32(1);
52457        let r = _mm512_maskz_rolv_epi32(0, a, b);
52458        assert_eq_m512i(r, _mm512_setzero_si512());
52459        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
52460        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52461        assert_eq_m512i(r, e);
52462    }
52463
52464    #[simd_test(enable = "avx512f,avx512vl")]
52465    unsafe fn test_mm256_rolv_epi32() {
52466        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52467        let b = _mm256_set1_epi32(1);
52468        let r = _mm256_rolv_epi32(a, b);
52469        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52470        assert_eq_m256i(r, e);
52471    }
52472
52473    #[simd_test(enable = "avx512f,avx512vl")]
52474    unsafe fn test_mm256_mask_rolv_epi32() {
52475        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52476        let b = _mm256_set1_epi32(1);
52477        let r = _mm256_mask_rolv_epi32(a, 0, a, b);
52478        assert_eq_m256i(r, a);
52479        let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
52480        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52481        assert_eq_m256i(r, e);
52482    }
52483
52484    #[simd_test(enable = "avx512f,avx512vl")]
52485    unsafe fn test_mm256_maskz_rolv_epi32() {
52486        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52487        let b = _mm256_set1_epi32(1);
52488        let r = _mm256_maskz_rolv_epi32(0, a, b);
52489        assert_eq_m256i(r, _mm256_setzero_si256());
52490        let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
52491        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52492        assert_eq_m256i(r, e);
52493    }
52494
52495    #[simd_test(enable = "avx512f,avx512vl")]
52496    unsafe fn test_mm_rolv_epi32() {
52497        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52498        let b = _mm_set1_epi32(1);
52499        let r = _mm_rolv_epi32(a, b);
52500        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52501        assert_eq_m128i(r, e);
52502    }
52503
52504    #[simd_test(enable = "avx512f,avx512vl")]
52505    unsafe fn test_mm_mask_rolv_epi32() {
52506        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52507        let b = _mm_set1_epi32(1);
52508        let r = _mm_mask_rolv_epi32(a, 0, a, b);
52509        assert_eq_m128i(r, a);
52510        let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
52511        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52512        assert_eq_m128i(r, e);
52513    }
52514
52515    #[simd_test(enable = "avx512f,avx512vl")]
52516    unsafe fn test_mm_maskz_rolv_epi32() {
52517        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52518        let b = _mm_set1_epi32(1);
52519        let r = _mm_maskz_rolv_epi32(0, a, b);
52520        assert_eq_m128i(r, _mm_setzero_si128());
52521        let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
52522        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52523        assert_eq_m128i(r, e);
52524    }
52525
52526    #[simd_test(enable = "avx512f")]
52527    unsafe fn test_mm512_rorv_epi32() {
52528        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52529        let b = _mm512_set1_epi32(1);
52530        let r = _mm512_rorv_epi32(a, b);
52531        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52532        assert_eq_m512i(r, e);
52533    }
52534
52535    #[simd_test(enable = "avx512f")]
52536    unsafe fn test_mm512_mask_rorv_epi32() {
52537        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52538        let b = _mm512_set1_epi32(1);
52539        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
52540        assert_eq_m512i(r, a);
52541        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
52542        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52543        assert_eq_m512i(r, e);
52544    }
52545
52546    #[simd_test(enable = "avx512f")]
52547    unsafe fn test_mm512_maskz_rorv_epi32() {
52548        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52549        let b = _mm512_set1_epi32(1);
52550        let r = _mm512_maskz_rorv_epi32(0, a, b);
52551        assert_eq_m512i(r, _mm512_setzero_si512());
52552        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
52553        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52554        assert_eq_m512i(r, e);
52555    }
52556
52557    #[simd_test(enable = "avx512f,avx512vl")]
52558    unsafe fn test_mm256_rorv_epi32() {
52559        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52560        let b = _mm256_set1_epi32(1);
52561        let r = _mm256_rorv_epi32(a, b);
52562        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52563        assert_eq_m256i(r, e);
52564    }
52565
52566    #[simd_test(enable = "avx512f,avx512vl")]
52567    unsafe fn test_mm256_mask_rorv_epi32() {
52568        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52569        let b = _mm256_set1_epi32(1);
52570        let r = _mm256_mask_rorv_epi32(a, 0, a, b);
52571        assert_eq_m256i(r, a);
52572        let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
52573        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52574        assert_eq_m256i(r, e);
52575    }
52576
52577    #[simd_test(enable = "avx512f,avx512vl")]
52578    unsafe fn test_mm256_maskz_rorv_epi32() {
52579        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52580        let b = _mm256_set1_epi32(1);
52581        let r = _mm256_maskz_rorv_epi32(0, a, b);
52582        assert_eq_m256i(r, _mm256_setzero_si256());
52583        let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
52584        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52585        assert_eq_m256i(r, e);
52586    }
52587
52588    #[simd_test(enable = "avx512f,avx512vl")]
52589    unsafe fn test_mm_rorv_epi32() {
52590        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52591        let b = _mm_set1_epi32(1);
52592        let r = _mm_rorv_epi32(a, b);
52593        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52594        assert_eq_m128i(r, e);
52595    }
52596
52597    #[simd_test(enable = "avx512f,avx512vl")]
52598    unsafe fn test_mm_mask_rorv_epi32() {
52599        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52600        let b = _mm_set1_epi32(1);
52601        let r = _mm_mask_rorv_epi32(a, 0, a, b);
52602        assert_eq_m128i(r, a);
52603        let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
52604        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52605        assert_eq_m128i(r, e);
52606    }
52607
52608    #[simd_test(enable = "avx512f,avx512vl")]
52609    unsafe fn test_mm_maskz_rorv_epi32() {
52610        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52611        let b = _mm_set1_epi32(1);
52612        let r = _mm_maskz_rorv_epi32(0, a, b);
52613        assert_eq_m128i(r, _mm_setzero_si128());
52614        let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
52615        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52616        assert_eq_m128i(r, e);
52617    }
52618
52619    #[simd_test(enable = "avx512f")]
52620    unsafe fn test_mm512_sllv_epi32() {
52621        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52622        let count = _mm512_set1_epi32(1);
52623        let r = _mm512_sllv_epi32(a, count);
52624        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52625        assert_eq_m512i(r, e);
52626    }
52627
52628    #[simd_test(enable = "avx512f")]
52629    unsafe fn test_mm512_mask_sllv_epi32() {
52630        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52631        let count = _mm512_set1_epi32(1);
52632        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
52633        assert_eq_m512i(r, a);
52634        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
52635        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52636        assert_eq_m512i(r, e);
52637    }
52638
52639    #[simd_test(enable = "avx512f")]
52640    unsafe fn test_mm512_maskz_sllv_epi32() {
52641        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52642        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52643        let r = _mm512_maskz_sllv_epi32(0, a, count);
52644        assert_eq_m512i(r, _mm512_setzero_si512());
52645        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
52646        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52647        assert_eq_m512i(r, e);
52648    }
52649
52650    #[simd_test(enable = "avx512f,avx512vl")]
52651    unsafe fn test_mm256_mask_sllv_epi32() {
52652        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52653        let count = _mm256_set1_epi32(1);
52654        let r = _mm256_mask_sllv_epi32(a, 0, a, count);
52655        assert_eq_m256i(r, a);
52656        let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
52657        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52658        assert_eq_m256i(r, e);
52659    }
52660
52661    #[simd_test(enable = "avx512f,avx512vl")]
52662    unsafe fn test_mm256_maskz_sllv_epi32() {
52663        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52664        let count = _mm256_set1_epi32(1);
52665        let r = _mm256_maskz_sllv_epi32(0, a, count);
52666        assert_eq_m256i(r, _mm256_setzero_si256());
52667        let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
52668        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52669        assert_eq_m256i(r, e);
52670    }
52671
52672    #[simd_test(enable = "avx512f,avx512vl")]
52673    unsafe fn test_mm_mask_sllv_epi32() {
52674        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52675        let count = _mm_set1_epi32(1);
52676        let r = _mm_mask_sllv_epi32(a, 0, a, count);
52677        assert_eq_m128i(r, a);
52678        let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
52679        let e = _mm_set_epi32(0, 2, 2, 2);
52680        assert_eq_m128i(r, e);
52681    }
52682
52683    #[simd_test(enable = "avx512f,avx512vl")]
52684    unsafe fn test_mm_maskz_sllv_epi32() {
52685        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52686        let count = _mm_set1_epi32(1);
52687        let r = _mm_maskz_sllv_epi32(0, a, count);
52688        assert_eq_m128i(r, _mm_setzero_si128());
52689        let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
52690        let e = _mm_set_epi32(0, 2, 2, 2);
52691        assert_eq_m128i(r, e);
52692    }
52693
52694    #[simd_test(enable = "avx512f")]
52695    unsafe fn test_mm512_srlv_epi32() {
52696        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52697        let count = _mm512_set1_epi32(1);
52698        let r = _mm512_srlv_epi32(a, count);
52699        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52700        assert_eq_m512i(r, e);
52701    }
52702
52703    #[simd_test(enable = "avx512f")]
52704    unsafe fn test_mm512_mask_srlv_epi32() {
52705        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52706        let count = _mm512_set1_epi32(1);
52707        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
52708        assert_eq_m512i(r, a);
52709        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
52710        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52711        assert_eq_m512i(r, e);
52712    }
52713
52714    #[simd_test(enable = "avx512f")]
52715    unsafe fn test_mm512_maskz_srlv_epi32() {
52716        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52717        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52718        let r = _mm512_maskz_srlv_epi32(0, a, count);
52719        assert_eq_m512i(r, _mm512_setzero_si512());
52720        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
52721        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
52722        assert_eq_m512i(r, e);
52723    }
52724
52725    #[simd_test(enable = "avx512f,avx512vl")]
52726    unsafe fn test_mm256_mask_srlv_epi32() {
52727        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52728        let count = _mm256_set1_epi32(1);
52729        let r = _mm256_mask_srlv_epi32(a, 0, a, count);
52730        assert_eq_m256i(r, a);
52731        let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
52732        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52733        assert_eq_m256i(r, e);
52734    }
52735
52736    #[simd_test(enable = "avx512f,avx512vl")]
52737    unsafe fn test_mm256_maskz_srlv_epi32() {
52738        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52739        let count = _mm256_set1_epi32(1);
52740        let r = _mm256_maskz_srlv_epi32(0, a, count);
52741        assert_eq_m256i(r, _mm256_setzero_si256());
52742        let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
52743        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52744        assert_eq_m256i(r, e);
52745    }
52746
52747    #[simd_test(enable = "avx512f,avx512vl")]
52748    unsafe fn test_mm_mask_srlv_epi32() {
52749        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52750        let count = _mm_set1_epi32(1);
52751        let r = _mm_mask_srlv_epi32(a, 0, a, count);
52752        assert_eq_m128i(r, a);
52753        let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
52754        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52755        assert_eq_m128i(r, e);
52756    }
52757
52758    #[simd_test(enable = "avx512f,avx512vl")]
52759    unsafe fn test_mm_maskz_srlv_epi32() {
52760        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52761        let count = _mm_set1_epi32(1);
52762        let r = _mm_maskz_srlv_epi32(0, a, count);
52763        assert_eq_m128i(r, _mm_setzero_si128());
52764        let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
52765        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52766        assert_eq_m128i(r, e);
52767    }
52768
52769    #[simd_test(enable = "avx512f")]
52770    unsafe fn test_mm512_sll_epi32() {
52771        #[rustfmt::skip]
52772        let a = _mm512_set_epi32(
52773            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52774            0, 0, 0, 0,
52775            0, 0, 0, 0,
52776            0, 0, 0, 0,
52777        );
52778        let count = _mm_set_epi32(0, 0, 0, 2);
52779        let r = _mm512_sll_epi32(a, count);
52780        #[rustfmt::skip]
52781        let e = _mm512_set_epi32(
52782            0, 1 << 2, 1 << 3, 1 << 4,
52783            0, 0, 0, 0,
52784            0, 0, 0, 0,
52785            0, 0, 0, 0,
52786        );
52787        assert_eq_m512i(r, e);
52788    }
52789
52790    #[simd_test(enable = "avx512f")]
52791    unsafe fn test_mm512_mask_sll_epi32() {
52792        #[rustfmt::skip]
52793        let a = _mm512_set_epi32(
52794            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52795            0, 0, 0, 0,
52796            0, 0, 0, 0,
52797            0, 0, 0, 0,
52798        );
52799        let count = _mm_set_epi32(0, 0, 0, 2);
52800        let r = _mm512_mask_sll_epi32(a, 0, a, count);
52801        assert_eq_m512i(r, a);
52802        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
52803        #[rustfmt::skip]
52804        let e = _mm512_set_epi32(
52805            0, 1 << 2, 1 << 3, 1 << 4,
52806            0, 0, 0, 0,
52807            0, 0, 0, 0,
52808            0, 0, 0, 0,
52809        );
52810        assert_eq_m512i(r, e);
52811    }
52812
52813    #[simd_test(enable = "avx512f")]
52814    unsafe fn test_mm512_maskz_sll_epi32() {
52815        #[rustfmt::skip]
52816        let a = _mm512_set_epi32(
52817            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52818            0, 0, 0, 0,
52819            0, 0, 0, 0,
52820            0, 0, 0, 1 << 31,
52821        );
52822        let count = _mm_set_epi32(2, 0, 0, 2);
52823        let r = _mm512_maskz_sll_epi32(0, a, count);
52824        assert_eq_m512i(r, _mm512_setzero_si512());
52825        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
52826        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52827        assert_eq_m512i(r, e);
52828    }
52829
52830    #[simd_test(enable = "avx512f,avx512vl")]
52831    unsafe fn test_mm256_mask_sll_epi32() {
52832        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52833        let count = _mm_set_epi32(0, 0, 0, 1);
52834        let r = _mm256_mask_sll_epi32(a, 0, a, count);
52835        assert_eq_m256i(r, a);
52836        let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
52837        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52838        assert_eq_m256i(r, e);
52839    }
52840
52841    #[simd_test(enable = "avx512f,avx512vl")]
52842    unsafe fn test_mm256_maskz_sll_epi32() {
52843        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52844        let count = _mm_set_epi32(0, 0, 0, 1);
52845        let r = _mm256_maskz_sll_epi32(0, a, count);
52846        assert_eq_m256i(r, _mm256_setzero_si256());
52847        let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
52848        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52849        assert_eq_m256i(r, e);
52850    }
52851
52852    #[simd_test(enable = "avx512f,avx512vl")]
52853    unsafe fn test_mm_mask_sll_epi32() {
52854        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52855        let count = _mm_set_epi32(0, 0, 0, 1);
52856        let r = _mm_mask_sll_epi32(a, 0, a, count);
52857        assert_eq_m128i(r, a);
52858        let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
52859        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52860        assert_eq_m128i(r, e);
52861    }
52862
52863    #[simd_test(enable = "avx512f,avx512vl")]
52864    unsafe fn test_mm_maskz_sll_epi32() {
52865        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52866        let count = _mm_set_epi32(0, 0, 0, 1);
52867        let r = _mm_maskz_sll_epi32(0, a, count);
52868        assert_eq_m128i(r, _mm_setzero_si128());
52869        let r = _mm_maskz_sll_epi32(0b00001111, a, count);
52870        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52871        assert_eq_m128i(r, e);
52872    }
52873
52874    #[simd_test(enable = "avx512f")]
52875    unsafe fn test_mm512_srl_epi32() {
52876        #[rustfmt::skip]
52877        let a = _mm512_set_epi32(
52878            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52879            0, 0, 0, 0,
52880            0, 0, 0, 0,
52881            0, 0, 0, 0,
52882        );
52883        let count = _mm_set_epi32(0, 0, 0, 2);
52884        let r = _mm512_srl_epi32(a, count);
52885        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52886        assert_eq_m512i(r, e);
52887    }
52888
52889    #[simd_test(enable = "avx512f")]
52890    unsafe fn test_mm512_mask_srl_epi32() {
52891        #[rustfmt::skip]
52892        let a = _mm512_set_epi32(
52893            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52894            0, 0, 0, 0,
52895            0, 0, 0, 0,
52896            0, 0, 0, 0,
52897        );
52898        let count = _mm_set_epi32(0, 0, 0, 2);
52899        let r = _mm512_mask_srl_epi32(a, 0, a, count);
52900        assert_eq_m512i(r, a);
52901        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
52902        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52903        assert_eq_m512i(r, e);
52904    }
52905
52906    #[simd_test(enable = "avx512f")]
52907    unsafe fn test_mm512_maskz_srl_epi32() {
52908        #[rustfmt::skip]
52909        let a = _mm512_set_epi32(
52910            1 << 31, 1 << 0, 1 << 1, 1 << 2,
52911            0, 0, 0, 0,
52912            0, 0, 0, 0,
52913            0, 0, 0, 1 << 31,
52914        );
52915        let count = _mm_set_epi32(2, 0, 0, 2);
52916        let r = _mm512_maskz_srl_epi32(0, a, count);
52917        assert_eq_m512i(r, _mm512_setzero_si512());
52918        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
52919        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
52920        assert_eq_m512i(r, e);
52921    }
52922
52923    #[simd_test(enable = "avx512f,avx512vl")]
52924    unsafe fn test_mm256_mask_srl_epi32() {
52925        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52926        let count = _mm_set_epi32(0, 0, 0, 1);
52927        let r = _mm256_mask_srl_epi32(a, 0, a, count);
52928        assert_eq_m256i(r, a);
52929        let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
52930        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52931        assert_eq_m256i(r, e);
52932    }
52933
52934    #[simd_test(enable = "avx512f,avx512vl")]
52935    unsafe fn test_mm256_maskz_srl_epi32() {
52936        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52937        let count = _mm_set_epi32(0, 0, 0, 1);
52938        let r = _mm256_maskz_srl_epi32(0, a, count);
52939        assert_eq_m256i(r, _mm256_setzero_si256());
52940        let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
52941        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52942        assert_eq_m256i(r, e);
52943    }
52944
52945    #[simd_test(enable = "avx512f,avx512vl")]
52946    unsafe fn test_mm_mask_srl_epi32() {
52947        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52948        let count = _mm_set_epi32(0, 0, 0, 1);
52949        let r = _mm_mask_srl_epi32(a, 0, a, count);
52950        assert_eq_m128i(r, a);
52951        let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
52952        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52953        assert_eq_m128i(r, e);
52954    }
52955
52956    #[simd_test(enable = "avx512f,avx512vl")]
52957    unsafe fn test_mm_maskz_srl_epi32() {
52958        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52959        let count = _mm_set_epi32(0, 0, 0, 1);
52960        let r = _mm_maskz_srl_epi32(0, a, count);
52961        assert_eq_m128i(r, _mm_setzero_si128());
52962        let r = _mm_maskz_srl_epi32(0b00001111, a, count);
52963        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52964        assert_eq_m128i(r, e);
52965    }
52966
52967    #[simd_test(enable = "avx512f")]
52968    unsafe fn test_mm512_sra_epi32() {
52969        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
52970        let count = _mm_set_epi32(1, 0, 0, 2);
52971        let r = _mm512_sra_epi32(a, count);
52972        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52973        assert_eq_m512i(r, e);
52974    }
52975
52976    #[simd_test(enable = "avx512f")]
52977    unsafe fn test_mm512_mask_sra_epi32() {
52978        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
52979        let count = _mm_set_epi32(0, 0, 0, 2);
52980        let r = _mm512_mask_sra_epi32(a, 0, a, count);
52981        assert_eq_m512i(r, a);
52982        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
52983        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
52984        assert_eq_m512i(r, e);
52985    }
52986
52987    #[simd_test(enable = "avx512f")]
52988    unsafe fn test_mm512_maskz_sra_epi32() {
52989        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
52990        let count = _mm_set_epi32(2, 0, 0, 2);
52991        let r = _mm512_maskz_sra_epi32(0, a, count);
52992        assert_eq_m512i(r, _mm512_setzero_si512());
52993        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
52994        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
52995        assert_eq_m512i(r, e);
52996    }
52997
52998    #[simd_test(enable = "avx512f,avx512vl")]
52999    unsafe fn test_mm256_mask_sra_epi32() {
53000        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53001        let count = _mm_set_epi32(0, 0, 0, 1);
53002        let r = _mm256_mask_sra_epi32(a, 0, a, count);
53003        assert_eq_m256i(r, a);
53004        let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
53005        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53006        assert_eq_m256i(r, e);
53007    }
53008
53009    #[simd_test(enable = "avx512f,avx512vl")]
53010    unsafe fn test_mm256_maskz_sra_epi32() {
53011        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53012        let count = _mm_set_epi32(0, 0, 0, 1);
53013        let r = _mm256_maskz_sra_epi32(0, a, count);
53014        assert_eq_m256i(r, _mm256_setzero_si256());
53015        let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
53016        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53017        assert_eq_m256i(r, e);
53018    }
53019
53020    #[simd_test(enable = "avx512f,avx512vl")]
53021    unsafe fn test_mm_mask_sra_epi32() {
53022        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53023        let count = _mm_set_epi32(0, 0, 0, 1);
53024        let r = _mm_mask_sra_epi32(a, 0, a, count);
53025        assert_eq_m128i(r, a);
53026        let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
53027        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53028        assert_eq_m128i(r, e);
53029    }
53030
53031    #[simd_test(enable = "avx512f,avx512vl")]
53032    unsafe fn test_mm_maskz_sra_epi32() {
53033        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53034        let count = _mm_set_epi32(0, 0, 0, 1);
53035        let r = _mm_maskz_sra_epi32(0, a, count);
53036        assert_eq_m128i(r, _mm_setzero_si128());
53037        let r = _mm_maskz_sra_epi32(0b00001111, a, count);
53038        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53039        assert_eq_m128i(r, e);
53040    }
53041
53042    #[simd_test(enable = "avx512f")]
53043    unsafe fn test_mm512_srav_epi32() {
53044        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53045        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53046        let r = _mm512_srav_epi32(a, count);
53047        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53048        assert_eq_m512i(r, e);
53049    }
53050
53051    #[simd_test(enable = "avx512f")]
53052    unsafe fn test_mm512_mask_srav_epi32() {
53053        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53054        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53055        let r = _mm512_mask_srav_epi32(a, 0, a, count);
53056        assert_eq_m512i(r, a);
53057        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
53058        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
53059        assert_eq_m512i(r, e);
53060    }
53061
53062    #[simd_test(enable = "avx512f")]
53063    unsafe fn test_mm512_maskz_srav_epi32() {
53064        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53065        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
53066        let r = _mm512_maskz_srav_epi32(0, a, count);
53067        assert_eq_m512i(r, _mm512_setzero_si512());
53068        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
53069        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53070        assert_eq_m512i(r, e);
53071    }
53072
53073    #[simd_test(enable = "avx512f,avx512vl")]
53074    unsafe fn test_mm256_mask_srav_epi32() {
53075        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53076        let count = _mm256_set1_epi32(1);
53077        let r = _mm256_mask_srav_epi32(a, 0, a, count);
53078        assert_eq_m256i(r, a);
53079        let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
53080        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53081        assert_eq_m256i(r, e);
53082    }
53083
53084    #[simd_test(enable = "avx512f,avx512vl")]
53085    unsafe fn test_mm256_maskz_srav_epi32() {
53086        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53087        let count = _mm256_set1_epi32(1);
53088        let r = _mm256_maskz_srav_epi32(0, a, count);
53089        assert_eq_m256i(r, _mm256_setzero_si256());
53090        let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
53091        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53092        assert_eq_m256i(r, e);
53093    }
53094
53095    #[simd_test(enable = "avx512f,avx512vl")]
53096    unsafe fn test_mm_mask_srav_epi32() {
53097        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53098        let count = _mm_set1_epi32(1);
53099        let r = _mm_mask_srav_epi32(a, 0, a, count);
53100        assert_eq_m128i(r, a);
53101        let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
53102        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53103        assert_eq_m128i(r, e);
53104    }
53105
53106    #[simd_test(enable = "avx512f,avx512vl")]
53107    unsafe fn test_mm_maskz_srav_epi32() {
53108        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53109        let count = _mm_set1_epi32(1);
53110        let r = _mm_maskz_srav_epi32(0, a, count);
53111        assert_eq_m128i(r, _mm_setzero_si128());
53112        let r = _mm_maskz_srav_epi32(0b00001111, a, count);
53113        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53114        assert_eq_m128i(r, e);
53115    }
53116
53117    #[simd_test(enable = "avx512f")]
53118    unsafe fn test_mm512_srai_epi32() {
53119        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
53120        let r = _mm512_srai_epi32::<2>(a);
53121        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
53122        assert_eq_m512i(r, e);
53123    }
53124
53125    #[simd_test(enable = "avx512f")]
53126    unsafe fn test_mm512_mask_srai_epi32() {
53127        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53128        let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
53129        assert_eq_m512i(r, a);
53130        let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
53131        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53132        assert_eq_m512i(r, e);
53133    }
53134
53135    #[simd_test(enable = "avx512f")]
53136    unsafe fn test_mm512_maskz_srai_epi32() {
53137        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53138        let r = _mm512_maskz_srai_epi32::<2>(0, a);
53139        assert_eq_m512i(r, _mm512_setzero_si512());
53140        let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
53141        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53142        assert_eq_m512i(r, e);
53143    }
53144
53145    #[simd_test(enable = "avx512f,avx512vl")]
53146    unsafe fn test_mm256_mask_srai_epi32() {
53147        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53148        let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
53149        assert_eq_m256i(r, a);
53150        let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
53151        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53152        assert_eq_m256i(r, e);
53153    }
53154
53155    #[simd_test(enable = "avx512f,avx512vl")]
53156    unsafe fn test_mm256_maskz_srai_epi32() {
53157        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53158        let r = _mm256_maskz_srai_epi32::<1>(0, a);
53159        assert_eq_m256i(r, _mm256_setzero_si256());
53160        let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
53161        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53162        assert_eq_m256i(r, e);
53163    }
53164
53165    #[simd_test(enable = "avx512f,avx512vl")]
53166    unsafe fn test_mm_mask_srai_epi32() {
53167        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53168        let r = _mm_mask_srai_epi32::<1>(a, 0, a);
53169        assert_eq_m128i(r, a);
53170        let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
53171        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53172        assert_eq_m128i(r, e);
53173    }
53174
53175    #[simd_test(enable = "avx512f,avx512vl")]
53176    unsafe fn test_mm_maskz_srai_epi32() {
53177        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53178        let r = _mm_maskz_srai_epi32::<1>(0, a);
53179        assert_eq_m128i(r, _mm_setzero_si128());
53180        let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
53181        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53182        assert_eq_m128i(r, e);
53183    }
53184
53185    #[simd_test(enable = "avx512f")]
53186    unsafe fn test_mm512_permute_ps() {
53187        let a = _mm512_setr_ps(
53188            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53189        );
53190        let r = _mm512_permute_ps::<0b11_11_11_11>(a);
53191        let e = _mm512_setr_ps(
53192            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53193        );
53194        assert_eq_m512(r, e);
53195    }
53196
53197    #[simd_test(enable = "avx512f")]
53198    unsafe fn test_mm512_mask_permute_ps() {
53199        let a = _mm512_setr_ps(
53200            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53201        );
53202        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53203        assert_eq_m512(r, a);
53204        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
53205        let e = _mm512_setr_ps(
53206            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53207        );
53208        assert_eq_m512(r, e);
53209    }
53210
53211    #[simd_test(enable = "avx512f")]
53212    unsafe fn test_mm512_maskz_permute_ps() {
53213        let a = _mm512_setr_ps(
53214            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53215        );
53216        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
53217        assert_eq_m512(r, _mm512_setzero_ps());
53218        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
53219        let e = _mm512_setr_ps(
53220            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53221        );
53222        assert_eq_m512(r, e);
53223    }
53224
53225    #[simd_test(enable = "avx512f,avx512vl")]
53226    unsafe fn test_mm256_mask_permute_ps() {
53227        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53228        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53229        assert_eq_m256(r, a);
53230        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
53231        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53232        assert_eq_m256(r, e);
53233    }
53234
53235    #[simd_test(enable = "avx512f,avx512vl")]
53236    unsafe fn test_mm256_maskz_permute_ps() {
53237        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53238        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
53239        assert_eq_m256(r, _mm256_setzero_ps());
53240        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
53241        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53242        assert_eq_m256(r, e);
53243    }
53244
53245    #[simd_test(enable = "avx512f,avx512vl")]
53246    unsafe fn test_mm_mask_permute_ps() {
53247        let a = _mm_set_ps(0., 1., 2., 3.);
53248        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53249        assert_eq_m128(r, a);
53250        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
53251        let e = _mm_set_ps(0., 0., 0., 0.);
53252        assert_eq_m128(r, e);
53253    }
53254
53255    #[simd_test(enable = "avx512f,avx512vl")]
53256    unsafe fn test_mm_maskz_permute_ps() {
53257        let a = _mm_set_ps(0., 1., 2., 3.);
53258        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
53259        assert_eq_m128(r, _mm_setzero_ps());
53260        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
53261        let e = _mm_set_ps(0., 0., 0., 0.);
53262        assert_eq_m128(r, e);
53263    }
53264
53265    #[simd_test(enable = "avx512f")]
53266    unsafe fn test_mm512_permutevar_epi32() {
53267        let idx = _mm512_set1_epi32(1);
53268        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53269        let r = _mm512_permutevar_epi32(idx, a);
53270        let e = _mm512_set1_epi32(14);
53271        assert_eq_m512i(r, e);
53272    }
53273
53274    #[simd_test(enable = "avx512f")]
53275    unsafe fn test_mm512_mask_permutevar_epi32() {
53276        let idx = _mm512_set1_epi32(1);
53277        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53278        let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
53279        assert_eq_m512i(r, a);
53280        let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
53281        let e = _mm512_set1_epi32(14);
53282        assert_eq_m512i(r, e);
53283    }
53284
53285    #[simd_test(enable = "avx512f")]
53286    unsafe fn test_mm512_permutevar_ps() {
53287        let a = _mm512_set_ps(
53288            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53289        );
53290        let b = _mm512_set1_epi32(0b01);
53291        let r = _mm512_permutevar_ps(a, b);
53292        let e = _mm512_set_ps(
53293            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53294        );
53295        assert_eq_m512(r, e);
53296    }
53297
53298    #[simd_test(enable = "avx512f")]
53299    unsafe fn test_mm512_mask_permutevar_ps() {
53300        let a = _mm512_set_ps(
53301            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53302        );
53303        let b = _mm512_set1_epi32(0b01);
53304        let r = _mm512_mask_permutevar_ps(a, 0, a, b);
53305        assert_eq_m512(r, a);
53306        let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
53307        let e = _mm512_set_ps(
53308            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53309        );
53310        assert_eq_m512(r, e);
53311    }
53312
53313    #[simd_test(enable = "avx512f")]
53314    unsafe fn test_mm512_maskz_permutevar_ps() {
53315        let a = _mm512_set_ps(
53316            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53317        );
53318        let b = _mm512_set1_epi32(0b01);
53319        let r = _mm512_maskz_permutevar_ps(0, a, b);
53320        assert_eq_m512(r, _mm512_setzero_ps());
53321        let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
53322        let e = _mm512_set_ps(
53323            0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
53324        );
53325        assert_eq_m512(r, e);
53326    }
53327
53328    #[simd_test(enable = "avx512f,avx512vl")]
53329    unsafe fn test_mm256_mask_permutevar_ps() {
53330        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53331        let b = _mm256_set1_epi32(0b01);
53332        let r = _mm256_mask_permutevar_ps(a, 0, a, b);
53333        assert_eq_m256(r, a);
53334        let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
53335        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53336        assert_eq_m256(r, e);
53337    }
53338
53339    #[simd_test(enable = "avx512f,avx512vl")]
53340    unsafe fn test_mm256_maskz_permutevar_ps() {
53341        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53342        let b = _mm256_set1_epi32(0b01);
53343        let r = _mm256_maskz_permutevar_ps(0, a, b);
53344        assert_eq_m256(r, _mm256_setzero_ps());
53345        let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
53346        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53347        assert_eq_m256(r, e);
53348    }
53349
53350    #[simd_test(enable = "avx512f,avx512vl")]
53351    unsafe fn test_mm_mask_permutevar_ps() {
53352        let a = _mm_set_ps(0., 1., 2., 3.);
53353        let b = _mm_set1_epi32(0b01);
53354        let r = _mm_mask_permutevar_ps(a, 0, a, b);
53355        assert_eq_m128(r, a);
53356        let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
53357        let e = _mm_set_ps(2., 2., 2., 2.);
53358        assert_eq_m128(r, e);
53359    }
53360
53361    #[simd_test(enable = "avx512f,avx512vl")]
53362    unsafe fn test_mm_maskz_permutevar_ps() {
53363        let a = _mm_set_ps(0., 1., 2., 3.);
53364        let b = _mm_set1_epi32(0b01);
53365        let r = _mm_maskz_permutevar_ps(0, a, b);
53366        assert_eq_m128(r, _mm_setzero_ps());
53367        let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
53368        let e = _mm_set_ps(2., 2., 2., 2.);
53369        assert_eq_m128(r, e);
53370    }
53371
53372    #[simd_test(enable = "avx512f")]
53373    unsafe fn test_mm512_permutexvar_epi32() {
53374        let idx = _mm512_set1_epi32(1);
53375        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53376        let r = _mm512_permutexvar_epi32(idx, a);
53377        let e = _mm512_set1_epi32(14);
53378        assert_eq_m512i(r, e);
53379    }
53380
53381    #[simd_test(enable = "avx512f")]
53382    unsafe fn test_mm512_mask_permutexvar_epi32() {
53383        let idx = _mm512_set1_epi32(1);
53384        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53385        let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
53386        assert_eq_m512i(r, a);
53387        let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
53388        let e = _mm512_set1_epi32(14);
53389        assert_eq_m512i(r, e);
53390    }
53391
53392    #[simd_test(enable = "avx512f")]
53393    unsafe fn test_mm512_maskz_permutexvar_epi32() {
53394        let idx = _mm512_set1_epi32(1);
53395        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53396        let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
53397        assert_eq_m512i(r, _mm512_setzero_si512());
53398        let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
53399        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
53400        assert_eq_m512i(r, e);
53401    }
53402
53403    #[simd_test(enable = "avx512f,avx512vl")]
53404    unsafe fn test_mm256_permutexvar_epi32() {
53405        let idx = _mm256_set1_epi32(1);
53406        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53407        let r = _mm256_permutexvar_epi32(idx, a);
53408        let e = _mm256_set1_epi32(6);
53409        assert_eq_m256i(r, e);
53410    }
53411
53412    #[simd_test(enable = "avx512f,avx512vl")]
53413    unsafe fn test_mm256_mask_permutexvar_epi32() {
53414        let idx = _mm256_set1_epi32(1);
53415        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53416        let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
53417        assert_eq_m256i(r, a);
53418        let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
53419        let e = _mm256_set1_epi32(6);
53420        assert_eq_m256i(r, e);
53421    }
53422
53423    #[simd_test(enable = "avx512f,avx512vl")]
53424    unsafe fn test_mm256_maskz_permutexvar_epi32() {
53425        let idx = _mm256_set1_epi32(1);
53426        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53427        let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
53428        assert_eq_m256i(r, _mm256_setzero_si256());
53429        let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
53430        let e = _mm256_set1_epi32(6);
53431        assert_eq_m256i(r, e);
53432    }
53433
53434    #[simd_test(enable = "avx512f")]
53435    unsafe fn test_mm512_permutexvar_ps() {
53436        let idx = _mm512_set1_epi32(1);
53437        let a = _mm512_set_ps(
53438            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53439        );
53440        let r = _mm512_permutexvar_ps(idx, a);
53441        let e = _mm512_set1_ps(14.);
53442        assert_eq_m512(r, e);
53443    }
53444
53445    #[simd_test(enable = "avx512f")]
53446    unsafe fn test_mm512_mask_permutexvar_ps() {
53447        let idx = _mm512_set1_epi32(1);
53448        let a = _mm512_set_ps(
53449            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53450        );
53451        let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
53452        assert_eq_m512(r, a);
53453        let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
53454        let e = _mm512_set1_ps(14.);
53455        assert_eq_m512(r, e);
53456    }
53457
53458    #[simd_test(enable = "avx512f")]
53459    unsafe fn test_mm512_maskz_permutexvar_ps() {
53460        let idx = _mm512_set1_epi32(1);
53461        let a = _mm512_set_ps(
53462            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53463        );
53464        let r = _mm512_maskz_permutexvar_ps(0, idx, a);
53465        assert_eq_m512(r, _mm512_setzero_ps());
53466        let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
53467        let e = _mm512_set_ps(
53468            0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
53469        );
53470        assert_eq_m512(r, e);
53471    }
53472
53473    #[simd_test(enable = "avx512f,avx512vl")]
53474    unsafe fn test_mm256_permutexvar_ps() {
53475        let idx = _mm256_set1_epi32(1);
53476        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53477        let r = _mm256_permutexvar_ps(idx, a);
53478        let e = _mm256_set1_ps(6.);
53479        assert_eq_m256(r, e);
53480    }
53481
53482    #[simd_test(enable = "avx512f,avx512vl")]
53483    unsafe fn test_mm256_mask_permutexvar_ps() {
53484        let idx = _mm256_set1_epi32(1);
53485        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53486        let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
53487        assert_eq_m256(r, a);
53488        let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
53489        let e = _mm256_set1_ps(6.);
53490        assert_eq_m256(r, e);
53491    }
53492
53493    #[simd_test(enable = "avx512f,avx512vl")]
53494    unsafe fn test_mm256_maskz_permutexvar_ps() {
53495        let idx = _mm256_set1_epi32(1);
53496        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53497        let r = _mm256_maskz_permutexvar_ps(0, idx, a);
53498        assert_eq_m256(r, _mm256_setzero_ps());
53499        let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
53500        let e = _mm256_set1_ps(6.);
53501        assert_eq_m256(r, e);
53502    }
53503
53504    #[simd_test(enable = "avx512f")]
53505    unsafe fn test_mm512_permutex2var_epi32() {
53506        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53507        #[rustfmt::skip]
53508        let idx = _mm512_set_epi32(
53509            1, 1 << 4, 2, 1 << 4,
53510            3, 1 << 4, 4, 1 << 4,
53511            5, 1 << 4, 6, 1 << 4,
53512            7, 1 << 4, 8, 1 << 4,
53513        );
53514        let b = _mm512_set1_epi32(100);
53515        let r = _mm512_permutex2var_epi32(a, idx, b);
53516        let e = _mm512_set_epi32(
53517            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53518        );
53519        assert_eq_m512i(r, e);
53520    }
53521
53522    #[simd_test(enable = "avx512f")]
53523    unsafe fn test_mm512_mask_permutex2var_epi32() {
53524        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53525        #[rustfmt::skip]
53526        let idx = _mm512_set_epi32(
53527            1, 1 << 4, 2, 1 << 4,
53528            3, 1 << 4, 4, 1 << 4,
53529            5, 1 << 4, 6, 1 << 4,
53530            7, 1 << 4, 8, 1 << 4,
53531        );
53532        let b = _mm512_set1_epi32(100);
53533        let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
53534        assert_eq_m512i(r, a);
53535        let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
53536        let e = _mm512_set_epi32(
53537            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53538        );
53539        assert_eq_m512i(r, e);
53540    }
53541
53542    #[simd_test(enable = "avx512f")]
53543    unsafe fn test_mm512_maskz_permutex2var_epi32() {
53544        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53545        #[rustfmt::skip]
53546        let idx = _mm512_set_epi32(
53547            1, 1 << 4, 2, 1 << 4,
53548            3, 1 << 4, 4, 1 << 4,
53549            5, 1 << 4, 6, 1 << 4,
53550            7, 1 << 4, 8, 1 << 4,
53551        );
53552        let b = _mm512_set1_epi32(100);
53553        let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
53554        assert_eq_m512i(r, _mm512_setzero_si512());
53555        let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
53556        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
53557        assert_eq_m512i(r, e);
53558    }
53559
53560    #[simd_test(enable = "avx512f")]
53561    unsafe fn test_mm512_mask2_permutex2var_epi32() {
53562        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53563        #[rustfmt::skip]
53564        let idx = _mm512_set_epi32(
53565            1000, 1 << 4, 2000, 1 << 4,
53566            3000, 1 << 4, 4000, 1 << 4,
53567            5, 1 << 4, 6, 1 << 4,
53568            7, 1 << 4, 8, 1 << 4,
53569        );
53570        let b = _mm512_set1_epi32(100);
53571        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
53572        assert_eq_m512i(r, idx);
53573        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
53574        #[rustfmt::skip]
53575        let e = _mm512_set_epi32(
53576            1000, 1 << 4, 2000, 1 << 4,
53577            3000, 1 << 4, 4000, 1 << 4,
53578            10, 100, 9, 100,
53579            8, 100, 7, 100,
53580        );
53581        assert_eq_m512i(r, e);
53582    }
53583
53584    #[simd_test(enable = "avx512f,avx512vl")]
53585    unsafe fn test_mm256_permutex2var_epi32() {
53586        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53587        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53588        let b = _mm256_set1_epi32(100);
53589        let r = _mm256_permutex2var_epi32(a, idx, b);
53590        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53591        assert_eq_m256i(r, e);
53592    }
53593
53594    #[simd_test(enable = "avx512f,avx512vl")]
53595    unsafe fn test_mm256_mask_permutex2var_epi32() {
53596        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53597        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53598        let b = _mm256_set1_epi32(100);
53599        let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
53600        assert_eq_m256i(r, a);
53601        let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
53602        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53603        assert_eq_m256i(r, e);
53604    }
53605
53606    #[simd_test(enable = "avx512f,avx512vl")]
53607    unsafe fn test_mm256_maskz_permutex2var_epi32() {
53608        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53609        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53610        let b = _mm256_set1_epi32(100);
53611        let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
53612        assert_eq_m256i(r, _mm256_setzero_si256());
53613        let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
53614        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53615        assert_eq_m256i(r, e);
53616    }
53617
53618    #[simd_test(enable = "avx512f,avx512vl")]
53619    unsafe fn test_mm256_mask2_permutex2var_epi32() {
53620        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53621        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53622        let b = _mm256_set1_epi32(100);
53623        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
53624        assert_eq_m256i(r, idx);
53625        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
53626        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53627        assert_eq_m256i(r, e);
53628    }
53629
53630    #[simd_test(enable = "avx512f,avx512vl")]
53631    unsafe fn test_mm_permutex2var_epi32() {
53632        let a = _mm_set_epi32(0, 1, 2, 3);
53633        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53634        let b = _mm_set1_epi32(100);
53635        let r = _mm_permutex2var_epi32(a, idx, b);
53636        let e = _mm_set_epi32(2, 100, 1, 100);
53637        assert_eq_m128i(r, e);
53638    }
53639
53640    #[simd_test(enable = "avx512f,avx512vl")]
53641    unsafe fn test_mm_mask_permutex2var_epi32() {
53642        let a = _mm_set_epi32(0, 1, 2, 3);
53643        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53644        let b = _mm_set1_epi32(100);
53645        let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
53646        assert_eq_m128i(r, a);
53647        let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
53648        let e = _mm_set_epi32(2, 100, 1, 100);
53649        assert_eq_m128i(r, e);
53650    }
53651
53652    #[simd_test(enable = "avx512f,avx512vl")]
53653    unsafe fn test_mm_maskz_permutex2var_epi32() {
53654        let a = _mm_set_epi32(0, 1, 2, 3);
53655        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53656        let b = _mm_set1_epi32(100);
53657        let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
53658        assert_eq_m128i(r, _mm_setzero_si128());
53659        let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
53660        let e = _mm_set_epi32(2, 100, 1, 100);
53661        assert_eq_m128i(r, e);
53662    }
53663
53664    #[simd_test(enable = "avx512f,avx512vl")]
53665    unsafe fn test_mm_mask2_permutex2var_epi32() {
53666        let a = _mm_set_epi32(0, 1, 2, 3);
53667        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53668        let b = _mm_set1_epi32(100);
53669        let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
53670        assert_eq_m128i(r, idx);
53671        let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
53672        let e = _mm_set_epi32(2, 100, 1, 100);
53673        assert_eq_m128i(r, e);
53674    }
53675
53676    #[simd_test(enable = "avx512f")]
53677    unsafe fn test_mm512_permutex2var_ps() {
53678        let a = _mm512_set_ps(
53679            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53680        );
53681        #[rustfmt::skip]
53682        let idx = _mm512_set_epi32(
53683            1, 1 << 4, 2, 1 << 4,
53684            3, 1 << 4, 4, 1 << 4,
53685            5, 1 << 4, 6, 1 << 4,
53686            7, 1 << 4, 8, 1 << 4,
53687        );
53688        let b = _mm512_set1_ps(100.);
53689        let r = _mm512_permutex2var_ps(a, idx, b);
53690        let e = _mm512_set_ps(
53691            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53692        );
53693        assert_eq_m512(r, e);
53694    }
53695
53696    #[simd_test(enable = "avx512f")]
53697    unsafe fn test_mm512_mask_permutex2var_ps() {
53698        let a = _mm512_set_ps(
53699            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53700        );
53701        #[rustfmt::skip]
53702        let idx = _mm512_set_epi32(
53703            1, 1 << 4, 2, 1 << 4,
53704            3, 1 << 4, 4, 1 << 4,
53705            5, 1 << 4, 6, 1 << 4,
53706            7, 1 << 4, 8, 1 << 4,
53707        );
53708        let b = _mm512_set1_ps(100.);
53709        let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
53710        assert_eq_m512(r, a);
53711        let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
53712        let e = _mm512_set_ps(
53713            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53714        );
53715        assert_eq_m512(r, e);
53716    }
53717
53718    #[simd_test(enable = "avx512f")]
53719    unsafe fn test_mm512_maskz_permutex2var_ps() {
53720        let a = _mm512_set_ps(
53721            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53722        );
53723        #[rustfmt::skip]
53724        let idx = _mm512_set_epi32(
53725            1, 1 << 4, 2, 1 << 4,
53726            3, 1 << 4, 4, 1 << 4,
53727            5, 1 << 4, 6, 1 << 4,
53728            7, 1 << 4, 8, 1 << 4,
53729        );
53730        let b = _mm512_set1_ps(100.);
53731        let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
53732        assert_eq_m512(r, _mm512_setzero_ps());
53733        let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
53734        let e = _mm512_set_ps(
53735            0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
53736        );
53737        assert_eq_m512(r, e);
53738    }
53739
53740    #[simd_test(enable = "avx512f")]
53741    unsafe fn test_mm512_mask2_permutex2var_ps() {
53742        let a = _mm512_set_ps(
53743            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53744        );
53745        #[rustfmt::skip]
53746        let idx = _mm512_set_epi32(
53747            1, 1 << 4, 2, 1 << 4,
53748            3, 1 << 4, 4, 1 << 4,
53749            5, 1 << 4, 6, 1 << 4,
53750            7, 1 << 4, 8, 1 << 4,
53751        );
53752        let b = _mm512_set1_ps(100.);
53753        let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
53754        assert_eq_m512(r, _mm512_castsi512_ps(idx));
53755        let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
53756        let e = _mm512_set_ps(
53757            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53758        );
53759        assert_eq_m512(r, e);
53760    }
53761
53762    #[simd_test(enable = "avx512f,avx512vl")]
53763    unsafe fn test_mm256_permutex2var_ps() {
53764        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53765        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53766        let b = _mm256_set1_ps(100.);
53767        let r = _mm256_permutex2var_ps(a, idx, b);
53768        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53769        assert_eq_m256(r, e);
53770    }
53771
53772    #[simd_test(enable = "avx512f,avx512vl")]
53773    unsafe fn test_mm256_mask_permutex2var_ps() {
53774        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53775        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53776        let b = _mm256_set1_ps(100.);
53777        let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
53778        assert_eq_m256(r, a);
53779        let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
53780        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53781        assert_eq_m256(r, e);
53782    }
53783
53784    #[simd_test(enable = "avx512f,avx512vl")]
53785    unsafe fn test_mm256_maskz_permutex2var_ps() {
53786        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53787        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53788        let b = _mm256_set1_ps(100.);
53789        let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
53790        assert_eq_m256(r, _mm256_setzero_ps());
53791        let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
53792        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53793        assert_eq_m256(r, e);
53794    }
53795
53796    #[simd_test(enable = "avx512f,avx512vl")]
53797    unsafe fn test_mm256_mask2_permutex2var_ps() {
53798        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53799        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53800        let b = _mm256_set1_ps(100.);
53801        let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
53802        assert_eq_m256(r, _mm256_castsi256_ps(idx));
53803        let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
53804        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53805        assert_eq_m256(r, e);
53806    }
53807
53808    #[simd_test(enable = "avx512f,avx512vl")]
53809    unsafe fn test_mm_permutex2var_ps() {
53810        let a = _mm_set_ps(0., 1., 2., 3.);
53811        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53812        let b = _mm_set1_ps(100.);
53813        let r = _mm_permutex2var_ps(a, idx, b);
53814        let e = _mm_set_ps(2., 100., 1., 100.);
53815        assert_eq_m128(r, e);
53816    }
53817
53818    #[simd_test(enable = "avx512f,avx512vl")]
53819    unsafe fn test_mm_mask_permutex2var_ps() {
53820        let a = _mm_set_ps(0., 1., 2., 3.);
53821        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53822        let b = _mm_set1_ps(100.);
53823        let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
53824        assert_eq_m128(r, a);
53825        let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
53826        let e = _mm_set_ps(2., 100., 1., 100.);
53827        assert_eq_m128(r, e);
53828    }
53829
53830    #[simd_test(enable = "avx512f,avx512vl")]
53831    unsafe fn test_mm_maskz_permutex2var_ps() {
53832        let a = _mm_set_ps(0., 1., 2., 3.);
53833        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53834        let b = _mm_set1_ps(100.);
53835        let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
53836        assert_eq_m128(r, _mm_setzero_ps());
53837        let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
53838        let e = _mm_set_ps(2., 100., 1., 100.);
53839        assert_eq_m128(r, e);
53840    }
53841
53842    #[simd_test(enable = "avx512f,avx512vl")]
53843    unsafe fn test_mm_mask2_permutex2var_ps() {
53844        let a = _mm_set_ps(0., 1., 2., 3.);
53845        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53846        let b = _mm_set1_ps(100.);
53847        let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
53848        assert_eq_m128(r, _mm_castsi128_ps(idx));
53849        let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
53850        let e = _mm_set_ps(2., 100., 1., 100.);
53851        assert_eq_m128(r, e);
53852    }
53853
53854    #[simd_test(enable = "avx512f")]
53855    unsafe fn test_mm512_shuffle_epi32() {
53856        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53857        let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
53858        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53859        assert_eq_m512i(r, e);
53860    }
53861
53862    #[simd_test(enable = "avx512f")]
53863    unsafe fn test_mm512_mask_shuffle_epi32() {
53864        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53865        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53866        assert_eq_m512i(r, a);
53867        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
53868        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53869        assert_eq_m512i(r, e);
53870    }
53871
53872    #[simd_test(enable = "avx512f")]
53873    unsafe fn test_mm512_maskz_shuffle_epi32() {
53874        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53875        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53876        assert_eq_m512i(r, _mm512_setzero_si512());
53877        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
53878        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
53879        assert_eq_m512i(r, e);
53880    }
53881
53882    #[simd_test(enable = "avx512f,avx512vl")]
53883    unsafe fn test_mm256_mask_shuffle_epi32() {
53884        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53885        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53886        assert_eq_m256i(r, a);
53887        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
53888        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53889        assert_eq_m256i(r, e);
53890    }
53891
53892    #[simd_test(enable = "avx512f,avx512vl")]
53893    unsafe fn test_mm256_maskz_shuffle_epi32() {
53894        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53895        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53896        assert_eq_m256i(r, _mm256_setzero_si256());
53897        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
53898        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53899        assert_eq_m256i(r, e);
53900    }
53901
53902    #[simd_test(enable = "avx512f,avx512vl")]
53903    unsafe fn test_mm_mask_shuffle_epi32() {
53904        let a = _mm_set_epi32(1, 4, 5, 8);
53905        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53906        assert_eq_m128i(r, a);
53907        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
53908        let e = _mm_set_epi32(8, 8, 1, 1);
53909        assert_eq_m128i(r, e);
53910    }
53911
53912    #[simd_test(enable = "avx512f,avx512vl")]
53913    unsafe fn test_mm_maskz_shuffle_epi32() {
53914        let a = _mm_set_epi32(1, 4, 5, 8);
53915        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53916        assert_eq_m128i(r, _mm_setzero_si128());
53917        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
53918        let e = _mm_set_epi32(8, 8, 1, 1);
53919        assert_eq_m128i(r, e);
53920    }
53921
53922    #[simd_test(enable = "avx512f")]
53923    unsafe fn test_mm512_shuffle_ps() {
53924        let a = _mm512_setr_ps(
53925            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53926        );
53927        let b = _mm512_setr_ps(
53928            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53929        );
53930        let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
53931        let e = _mm512_setr_ps(
53932            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53933        );
53934        assert_eq_m512(r, e);
53935    }
53936
53937    #[simd_test(enable = "avx512f")]
53938    unsafe fn test_mm512_mask_shuffle_ps() {
53939        let a = _mm512_setr_ps(
53940            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53941        );
53942        let b = _mm512_setr_ps(
53943            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53944        );
53945        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
53946        assert_eq_m512(r, a);
53947        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
53948        let e = _mm512_setr_ps(
53949            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53950        );
53951        assert_eq_m512(r, e);
53952    }
53953
53954    #[simd_test(enable = "avx512f")]
53955    unsafe fn test_mm512_maskz_shuffle_ps() {
53956        let a = _mm512_setr_ps(
53957            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53958        );
53959        let b = _mm512_setr_ps(
53960            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53961        );
53962        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
53963        assert_eq_m512(r, _mm512_setzero_ps());
53964        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
53965        let e = _mm512_setr_ps(
53966            8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
53967        );
53968        assert_eq_m512(r, e);
53969    }
53970
53971    #[simd_test(enable = "avx512f,avx512vl")]
53972    unsafe fn test_mm256_mask_shuffle_ps() {
53973        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53974        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53975        let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
53976        assert_eq_m256(r, a);
53977        let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
53978        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53979        assert_eq_m256(r, e);
53980    }
53981
53982    #[simd_test(enable = "avx512f,avx512vl")]
53983    unsafe fn test_mm256_maskz_shuffle_ps() {
53984        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53985        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53986        let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
53987        assert_eq_m256(r, _mm256_setzero_ps());
53988        let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
53989        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53990        assert_eq_m256(r, e);
53991    }
53992
53993    #[simd_test(enable = "avx512f,avx512vl")]
53994    unsafe fn test_mm_mask_shuffle_ps() {
53995        let a = _mm_set_ps(1., 4., 5., 8.);
53996        let b = _mm_set_ps(2., 3., 6., 7.);
53997        let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
53998        assert_eq_m128(r, a);
53999        let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
54000        let e = _mm_set_ps(7., 7., 1., 1.);
54001        assert_eq_m128(r, e);
54002    }
54003
54004    #[simd_test(enable = "avx512f,avx512vl")]
54005    unsafe fn test_mm_maskz_shuffle_ps() {
54006        let a = _mm_set_ps(1., 4., 5., 8.);
54007        let b = _mm_set_ps(2., 3., 6., 7.);
54008        let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54009        assert_eq_m128(r, _mm_setzero_ps());
54010        let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
54011        let e = _mm_set_ps(7., 7., 1., 1.);
54012        assert_eq_m128(r, e);
54013    }
54014
54015    #[simd_test(enable = "avx512f")]
54016    unsafe fn test_mm512_shuffle_i32x4() {
54017        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54018        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54019        let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
54020        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54021        assert_eq_m512i(r, e);
54022    }
54023
54024    #[simd_test(enable = "avx512f")]
54025    unsafe fn test_mm512_mask_shuffle_i32x4() {
54026        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54027        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54028        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
54029        assert_eq_m512i(r, a);
54030        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54031        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54032        assert_eq_m512i(r, e);
54033    }
54034
54035    #[simd_test(enable = "avx512f")]
54036    unsafe fn test_mm512_maskz_shuffle_i32x4() {
54037        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54038        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54039        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
54040        assert_eq_m512i(r, _mm512_setzero_si512());
54041        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54042        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54043        assert_eq_m512i(r, e);
54044    }
54045
54046    #[simd_test(enable = "avx512f,avx512vl")]
54047    unsafe fn test_mm256_shuffle_i32x4() {
54048        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54049        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54050        let r = _mm256_shuffle_i32x4::<0b00>(a, b);
54051        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54052        assert_eq_m256i(r, e);
54053    }
54054
54055    #[simd_test(enable = "avx512f,avx512vl")]
54056    unsafe fn test_mm256_mask_shuffle_i32x4() {
54057        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54058        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54059        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
54060        assert_eq_m256i(r, a);
54061        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
54062        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54063        assert_eq_m256i(r, e);
54064    }
54065
54066    #[simd_test(enable = "avx512f,avx512vl")]
54067    unsafe fn test_mm256_maskz_shuffle_i32x4() {
54068        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54069        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54070        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
54071        assert_eq_m256i(r, _mm256_setzero_si256());
54072        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
54073        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54074        assert_eq_m256i(r, e);
54075    }
54076
54077    #[simd_test(enable = "avx512f")]
54078    unsafe fn test_mm512_shuffle_f32x4() {
54079        let a = _mm512_setr_ps(
54080            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54081        );
54082        let b = _mm512_setr_ps(
54083            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54084        );
54085        let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
54086        let e = _mm512_setr_ps(
54087            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54088        );
54089        assert_eq_m512(r, e);
54090    }
54091
54092    #[simd_test(enable = "avx512f")]
54093    unsafe fn test_mm512_mask_shuffle_f32x4() {
54094        let a = _mm512_setr_ps(
54095            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54096        );
54097        let b = _mm512_setr_ps(
54098            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54099        );
54100        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
54101        assert_eq_m512(r, a);
54102        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54103        let e = _mm512_setr_ps(
54104            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54105        );
54106        assert_eq_m512(r, e);
54107    }
54108
54109    #[simd_test(enable = "avx512f")]
54110    unsafe fn test_mm512_maskz_shuffle_f32x4() {
54111        let a = _mm512_setr_ps(
54112            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54113        );
54114        let b = _mm512_setr_ps(
54115            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54116        );
54117        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
54118        assert_eq_m512(r, _mm512_setzero_ps());
54119        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54120        let e = _mm512_setr_ps(
54121            1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54122        );
54123        assert_eq_m512(r, e);
54124    }
54125
54126    #[simd_test(enable = "avx512f,avx512vl")]
54127    unsafe fn test_mm256_shuffle_f32x4() {
54128        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54129        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54130        let r = _mm256_shuffle_f32x4::<0b00>(a, b);
54131        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54132        assert_eq_m256(r, e);
54133    }
54134
54135    #[simd_test(enable = "avx512f,avx512vl")]
54136    unsafe fn test_mm256_mask_shuffle_f32x4() {
54137        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54138        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54139        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
54140        assert_eq_m256(r, a);
54141        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
54142        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54143        assert_eq_m256(r, e);
54144    }
54145
54146    #[simd_test(enable = "avx512f,avx512vl")]
54147    unsafe fn test_mm256_maskz_shuffle_f32x4() {
54148        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54149        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54150        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
54151        assert_eq_m256(r, _mm256_setzero_ps());
54152        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
54153        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54154        assert_eq_m256(r, e);
54155    }
54156
54157    #[simd_test(enable = "avx512f")]
54158    unsafe fn test_mm512_extractf32x4_ps() {
54159        let a = _mm512_setr_ps(
54160            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54161        );
54162        let r = _mm512_extractf32x4_ps::<1>(a);
54163        let e = _mm_setr_ps(5., 6., 7., 8.);
54164        assert_eq_m128(r, e);
54165    }
54166
54167    #[simd_test(enable = "avx512f")]
54168    unsafe fn test_mm512_mask_extractf32x4_ps() {
54169        let a = _mm512_setr_ps(
54170            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54171        );
54172        let src = _mm_set1_ps(100.);
54173        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
54174        assert_eq_m128(r, src);
54175        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
54176        let e = _mm_setr_ps(5., 6., 7., 8.);
54177        assert_eq_m128(r, e);
54178    }
54179
54180    #[simd_test(enable = "avx512f")]
54181    unsafe fn test_mm512_maskz_extractf32x4_ps() {
54182        let a = _mm512_setr_ps(
54183            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54184        );
54185        let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
54186        assert_eq_m128(r, _mm_setzero_ps());
54187        let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
54188        let e = _mm_setr_ps(5., 0., 0., 0.);
54189        assert_eq_m128(r, e);
54190    }
54191
54192    #[simd_test(enable = "avx512f,avx512vl")]
54193    unsafe fn test_mm256_extractf32x4_ps() {
54194        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54195        let r = _mm256_extractf32x4_ps::<1>(a);
54196        let e = _mm_set_ps(1., 2., 3., 4.);
54197        assert_eq_m128(r, e);
54198    }
54199
54200    #[simd_test(enable = "avx512f,avx512vl")]
54201    unsafe fn test_mm256_mask_extractf32x4_ps() {
54202        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54203        let src = _mm_set1_ps(100.);
54204        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
54205        assert_eq_m128(r, src);
54206        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
54207        let e = _mm_set_ps(1., 2., 3., 4.);
54208        assert_eq_m128(r, e);
54209    }
54210
54211    #[simd_test(enable = "avx512f,avx512vl")]
54212    unsafe fn test_mm256_maskz_extractf32x4_ps() {
54213        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54214        let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
54215        assert_eq_m128(r, _mm_setzero_ps());
54216        let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
54217        let e = _mm_set_ps(1., 2., 3., 4.);
54218        assert_eq_m128(r, e);
54219    }
54220
54221    #[simd_test(enable = "avx512f")]
54222    unsafe fn test_mm512_extracti32x4_epi32() {
54223        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54224        let r = _mm512_extracti32x4_epi32::<1>(a);
54225        let e = _mm_setr_epi32(5, 6, 7, 8);
54226        assert_eq_m128i(r, e);
54227    }
54228
54229    #[simd_test(enable = "avx512f")]
54230    unsafe fn test_mm512_mask_extracti32x4_epi32() {
54231        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54232        let src = _mm_set1_epi32(100);
54233        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
54234        assert_eq_m128i(r, src);
54235        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
54236        let e = _mm_setr_epi32(5, 6, 7, 8);
54237        assert_eq_m128i(r, e);
54238    }
54239
54240    #[simd_test(enable = "avx512f,avx512vl")]
54241    unsafe fn test_mm512_maskz_extracti32x4_epi32() {
54242        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54243        let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
54244        assert_eq_m128i(r, _mm_setzero_si128());
54245        let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
54246        let e = _mm_setr_epi32(5, 0, 0, 0);
54247        assert_eq_m128i(r, e);
54248    }
54249
54250    #[simd_test(enable = "avx512f,avx512vl")]
54251    unsafe fn test_mm256_extracti32x4_epi32() {
54252        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54253        let r = _mm256_extracti32x4_epi32::<1>(a);
54254        let e = _mm_set_epi32(1, 2, 3, 4);
54255        assert_eq_m128i(r, e);
54256    }
54257
54258    #[simd_test(enable = "avx512f,avx512vl")]
54259    unsafe fn test_mm256_mask_extracti32x4_epi32() {
54260        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54261        let src = _mm_set1_epi32(100);
54262        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
54263        assert_eq_m128i(r, src);
54264        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
54265        let e = _mm_set_epi32(1, 2, 3, 4);
54266        assert_eq_m128i(r, e);
54267    }
54268
54269    #[simd_test(enable = "avx512f,avx512vl")]
54270    unsafe fn test_mm256_maskz_extracti32x4_epi32() {
54271        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54272        let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
54273        assert_eq_m128i(r, _mm_setzero_si128());
54274        let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
54275        let e = _mm_set_epi32(1, 2, 3, 4);
54276        assert_eq_m128i(r, e);
54277    }
54278
54279    #[simd_test(enable = "avx512f")]
54280    unsafe fn test_mm512_moveldup_ps() {
54281        let a = _mm512_setr_ps(
54282            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54283        );
54284        let r = _mm512_moveldup_ps(a);
54285        let e = _mm512_setr_ps(
54286            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54287        );
54288        assert_eq_m512(r, e);
54289    }
54290
54291    #[simd_test(enable = "avx512f")]
54292    unsafe fn test_mm512_mask_moveldup_ps() {
54293        let a = _mm512_setr_ps(
54294            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54295        );
54296        let r = _mm512_mask_moveldup_ps(a, 0, a);
54297        assert_eq_m512(r, a);
54298        let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
54299        let e = _mm512_setr_ps(
54300            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54301        );
54302        assert_eq_m512(r, e);
54303    }
54304
54305    #[simd_test(enable = "avx512f")]
54306    unsafe fn test_mm512_maskz_moveldup_ps() {
54307        let a = _mm512_setr_ps(
54308            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54309        );
54310        let r = _mm512_maskz_moveldup_ps(0, a);
54311        assert_eq_m512(r, _mm512_setzero_ps());
54312        let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
54313        let e = _mm512_setr_ps(
54314            1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
54315        );
54316        assert_eq_m512(r, e);
54317    }
54318
54319    #[simd_test(enable = "avx512f,avx512vl")]
54320    unsafe fn test_mm256_mask_moveldup_ps() {
54321        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54322        let r = _mm256_mask_moveldup_ps(a, 0, a);
54323        assert_eq_m256(r, a);
54324        let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
54325        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54326        assert_eq_m256(r, e);
54327    }
54328
54329    #[simd_test(enable = "avx512f,avx512vl")]
54330    unsafe fn test_mm256_maskz_moveldup_ps() {
54331        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54332        let r = _mm256_maskz_moveldup_ps(0, a);
54333        assert_eq_m256(r, _mm256_setzero_ps());
54334        let r = _mm256_maskz_moveldup_ps(0b11111111, a);
54335        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54336        assert_eq_m256(r, e);
54337    }
54338
54339    #[simd_test(enable = "avx512f,avx512vl")]
54340    unsafe fn test_mm_mask_moveldup_ps() {
54341        let a = _mm_set_ps(1., 2., 3., 4.);
54342        let r = _mm_mask_moveldup_ps(a, 0, a);
54343        assert_eq_m128(r, a);
54344        let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
54345        let e = _mm_set_ps(2., 2., 4., 4.);
54346        assert_eq_m128(r, e);
54347    }
54348
54349    #[simd_test(enable = "avx512f,avx512vl")]
54350    unsafe fn test_mm_maskz_moveldup_ps() {
54351        let a = _mm_set_ps(1., 2., 3., 4.);
54352        let r = _mm_maskz_moveldup_ps(0, a);
54353        assert_eq_m128(r, _mm_setzero_ps());
54354        let r = _mm_maskz_moveldup_ps(0b00001111, a);
54355        let e = _mm_set_ps(2., 2., 4., 4.);
54356        assert_eq_m128(r, e);
54357    }
54358
54359    #[simd_test(enable = "avx512f")]
54360    unsafe fn test_mm512_movehdup_ps() {
54361        let a = _mm512_setr_ps(
54362            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54363        );
54364        let r = _mm512_movehdup_ps(a);
54365        let e = _mm512_setr_ps(
54366            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54367        );
54368        assert_eq_m512(r, e);
54369    }
54370
54371    #[simd_test(enable = "avx512f")]
54372    unsafe fn test_mm512_mask_movehdup_ps() {
54373        let a = _mm512_setr_ps(
54374            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54375        );
54376        let r = _mm512_mask_movehdup_ps(a, 0, a);
54377        assert_eq_m512(r, a);
54378        let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
54379        let e = _mm512_setr_ps(
54380            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54381        );
54382        assert_eq_m512(r, e);
54383    }
54384
54385    #[simd_test(enable = "avx512f")]
54386    unsafe fn test_mm512_maskz_movehdup_ps() {
54387        let a = _mm512_setr_ps(
54388            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54389        );
54390        let r = _mm512_maskz_movehdup_ps(0, a);
54391        assert_eq_m512(r, _mm512_setzero_ps());
54392        let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
54393        let e = _mm512_setr_ps(
54394            2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54395        );
54396        assert_eq_m512(r, e);
54397    }
54398
54399    #[simd_test(enable = "avx512f,avx512vl")]
54400    unsafe fn test_mm256_mask_movehdup_ps() {
54401        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54402        let r = _mm256_mask_movehdup_ps(a, 0, a);
54403        assert_eq_m256(r, a);
54404        let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
54405        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54406        assert_eq_m256(r, e);
54407    }
54408
54409    #[simd_test(enable = "avx512f,avx512vl")]
54410    unsafe fn test_mm256_maskz_movehdup_ps() {
54411        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54412        let r = _mm256_maskz_movehdup_ps(0, a);
54413        assert_eq_m256(r, _mm256_setzero_ps());
54414        let r = _mm256_maskz_movehdup_ps(0b11111111, a);
54415        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54416        assert_eq_m256(r, e);
54417    }
54418
54419    #[simd_test(enable = "avx512f,avx512vl")]
54420    unsafe fn test_mm_mask_movehdup_ps() {
54421        let a = _mm_set_ps(1., 2., 3., 4.);
54422        let r = _mm_mask_movehdup_ps(a, 0, a);
54423        assert_eq_m128(r, a);
54424        let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
54425        let e = _mm_set_ps(1., 1., 3., 3.);
54426        assert_eq_m128(r, e);
54427    }
54428
54429    #[simd_test(enable = "avx512f,avx512vl")]
54430    unsafe fn test_mm_maskz_movehdup_ps() {
54431        let a = _mm_set_ps(1., 2., 3., 4.);
54432        let r = _mm_maskz_movehdup_ps(0, a);
54433        assert_eq_m128(r, _mm_setzero_ps());
54434        let r = _mm_maskz_movehdup_ps(0b00001111, a);
54435        let e = _mm_set_ps(1., 1., 3., 3.);
54436        assert_eq_m128(r, e);
54437    }
54438
54439    #[simd_test(enable = "avx512f")]
54440    unsafe fn test_mm512_inserti32x4() {
54441        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54442        let b = _mm_setr_epi32(17, 18, 19, 20);
54443        let r = _mm512_inserti32x4::<0>(a, b);
54444        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54445        assert_eq_m512i(r, e);
54446    }
54447
54448    #[simd_test(enable = "avx512f")]
54449    unsafe fn test_mm512_mask_inserti32x4() {
54450        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54451        let b = _mm_setr_epi32(17, 18, 19, 20);
54452        let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
54453        assert_eq_m512i(r, a);
54454        let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
54455        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54456        assert_eq_m512i(r, e);
54457    }
54458
54459    #[simd_test(enable = "avx512f")]
54460    unsafe fn test_mm512_maskz_inserti32x4() {
54461        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54462        let b = _mm_setr_epi32(17, 18, 19, 20);
54463        let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
54464        assert_eq_m512i(r, _mm512_setzero_si512());
54465        let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
54466        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54467        assert_eq_m512i(r, e);
54468    }
54469
54470    #[simd_test(enable = "avx512f,avx512vl")]
54471    unsafe fn test_mm256_inserti32x4() {
54472        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54473        let b = _mm_set_epi32(17, 18, 19, 20);
54474        let r = _mm256_inserti32x4::<1>(a, b);
54475        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54476        assert_eq_m256i(r, e);
54477    }
54478
54479    #[simd_test(enable = "avx512f,avx512vl")]
54480    unsafe fn test_mm256_mask_inserti32x4() {
54481        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54482        let b = _mm_set_epi32(17, 18, 19, 20);
54483        let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
54484        assert_eq_m256i(r, a);
54485        let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
54486        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54487        assert_eq_m256i(r, e);
54488    }
54489
54490    #[simd_test(enable = "avx512f,avx512vl")]
54491    unsafe fn test_mm256_maskz_inserti32x4() {
54492        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54493        let b = _mm_set_epi32(17, 18, 19, 20);
54494        let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
54495        assert_eq_m256i(r, _mm256_setzero_si256());
54496        let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
54497        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54498        assert_eq_m256i(r, e);
54499    }
54500
54501    #[simd_test(enable = "avx512f")]
54502    unsafe fn test_mm512_insertf32x4() {
54503        let a = _mm512_setr_ps(
54504            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54505        );
54506        let b = _mm_setr_ps(17., 18., 19., 20.);
54507        let r = _mm512_insertf32x4::<0>(a, b);
54508        let e = _mm512_setr_ps(
54509            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54510        );
54511        assert_eq_m512(r, e);
54512    }
54513
54514    #[simd_test(enable = "avx512f")]
54515    unsafe fn test_mm512_mask_insertf32x4() {
54516        let a = _mm512_setr_ps(
54517            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54518        );
54519        let b = _mm_setr_ps(17., 18., 19., 20.);
54520        let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
54521        assert_eq_m512(r, a);
54522        let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
54523        let e = _mm512_setr_ps(
54524            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54525        );
54526        assert_eq_m512(r, e);
54527    }
54528
54529    #[simd_test(enable = "avx512f")]
54530    unsafe fn test_mm512_maskz_insertf32x4() {
54531        let a = _mm512_setr_ps(
54532            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54533        );
54534        let b = _mm_setr_ps(17., 18., 19., 20.);
54535        let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
54536        assert_eq_m512(r, _mm512_setzero_ps());
54537        let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
54538        let e = _mm512_setr_ps(
54539            17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54540        );
54541        assert_eq_m512(r, e);
54542    }
54543
54544    #[simd_test(enable = "avx512f,avx512vl")]
54545    unsafe fn test_mm256_insertf32x4() {
54546        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54547        let b = _mm_set_ps(17., 18., 19., 20.);
54548        let r = _mm256_insertf32x4::<1>(a, b);
54549        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54550        assert_eq_m256(r, e);
54551    }
54552
54553    #[simd_test(enable = "avx512f,avx512vl")]
54554    unsafe fn test_mm256_mask_insertf32x4() {
54555        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54556        let b = _mm_set_ps(17., 18., 19., 20.);
54557        let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
54558        assert_eq_m256(r, a);
54559        let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
54560        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54561        assert_eq_m256(r, e);
54562    }
54563
54564    #[simd_test(enable = "avx512f,avx512vl")]
54565    unsafe fn test_mm256_maskz_insertf32x4() {
54566        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54567        let b = _mm_set_ps(17., 18., 19., 20.);
54568        let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
54569        assert_eq_m256(r, _mm256_setzero_ps());
54570        let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
54571        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54572        assert_eq_m256(r, e);
54573    }
54574
54575    #[simd_test(enable = "avx512f")]
54576    unsafe fn test_mm512_castps128_ps512() {
54577        let a = _mm_setr_ps(17., 18., 19., 20.);
54578        let r = _mm512_castps128_ps512(a);
54579        assert_eq_m128(_mm512_castps512_ps128(r), a);
54580    }
54581
54582    #[simd_test(enable = "avx512f")]
54583    unsafe fn test_mm512_castps256_ps512() {
54584        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54585        let r = _mm512_castps256_ps512(a);
54586        assert_eq_m256(_mm512_castps512_ps256(r), a);
54587    }
54588
54589    #[simd_test(enable = "avx512f")]
54590    unsafe fn test_mm512_zextps128_ps512() {
54591        let a = _mm_setr_ps(17., 18., 19., 20.);
54592        let r = _mm512_zextps128_ps512(a);
54593        let e = _mm512_setr_ps(
54594            17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
54595        );
54596        assert_eq_m512(r, e);
54597    }
54598
54599    #[simd_test(enable = "avx512f")]
54600    unsafe fn test_mm512_zextps256_ps512() {
54601        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54602        let r = _mm512_zextps256_ps512(a);
54603        let e = _mm512_setr_ps(
54604            17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
54605        );
54606        assert_eq_m512(r, e);
54607    }
54608
54609    #[simd_test(enable = "avx512f")]
54610    unsafe fn test_mm512_castps512_ps128() {
54611        let a = _mm512_setr_ps(
54612            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
54613        );
54614        let r = _mm512_castps512_ps128(a);
54615        let e = _mm_setr_ps(17., 18., 19., 20.);
54616        assert_eq_m128(r, e);
54617    }
54618
54619    #[simd_test(enable = "avx512f")]
54620    unsafe fn test_mm512_castps512_ps256() {
54621        let a = _mm512_setr_ps(
54622            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
54623        );
54624        let r = _mm512_castps512_ps256(a);
54625        let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54626        assert_eq_m256(r, e);
54627    }
54628
54629    #[simd_test(enable = "avx512f")]
54630    unsafe fn test_mm512_castps_pd() {
54631        let a = _mm512_set1_ps(1.);
54632        let r = _mm512_castps_pd(a);
54633        let e = _mm512_set1_pd(0.007812501848093234);
54634        assert_eq_m512d(r, e);
54635    }
54636
54637    #[simd_test(enable = "avx512f")]
54638    unsafe fn test_mm512_castps_si512() {
54639        let a = _mm512_set1_ps(1.);
54640        let r = _mm512_castps_si512(a);
54641        let e = _mm512_set1_epi32(1065353216);
54642        assert_eq_m512i(r, e);
54643    }
54644
54645    #[simd_test(enable = "avx512f")]
54646    unsafe fn test_mm512_broadcastd_epi32() {
54647        let a = _mm_set_epi32(17, 18, 19, 20);
54648        let r = _mm512_broadcastd_epi32(a);
54649        let e = _mm512_set1_epi32(20);
54650        assert_eq_m512i(r, e);
54651    }
54652
54653    #[simd_test(enable = "avx512f")]
54654    unsafe fn test_mm512_mask_broadcastd_epi32() {
54655        let src = _mm512_set1_epi32(20);
54656        let a = _mm_set_epi32(17, 18, 19, 20);
54657        let r = _mm512_mask_broadcastd_epi32(src, 0, a);
54658        assert_eq_m512i(r, src);
54659        let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
54660        let e = _mm512_set1_epi32(20);
54661        assert_eq_m512i(r, e);
54662    }
54663
54664    #[simd_test(enable = "avx512f")]
54665    unsafe fn test_mm512_maskz_broadcastd_epi32() {
54666        let a = _mm_set_epi32(17, 18, 19, 20);
54667        let r = _mm512_maskz_broadcastd_epi32(0, a);
54668        assert_eq_m512i(r, _mm512_setzero_si512());
54669        let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
54670        let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
54671        assert_eq_m512i(r, e);
54672    }
54673
54674    #[simd_test(enable = "avx512f,avx512vl")]
54675    unsafe fn test_mm256_mask_broadcastd_epi32() {
54676        let src = _mm256_set1_epi32(20);
54677        let a = _mm_set_epi32(17, 18, 19, 20);
54678        let r = _mm256_mask_broadcastd_epi32(src, 0, a);
54679        assert_eq_m256i(r, src);
54680        let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
54681        let e = _mm256_set1_epi32(20);
54682        assert_eq_m256i(r, e);
54683    }
54684
54685    #[simd_test(enable = "avx512f,avx512vl")]
54686    unsafe fn test_mm256_maskz_broadcastd_epi32() {
54687        let a = _mm_set_epi32(17, 18, 19, 20);
54688        let r = _mm256_maskz_broadcastd_epi32(0, a);
54689        assert_eq_m256i(r, _mm256_setzero_si256());
54690        let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
54691        let e = _mm256_set1_epi32(20);
54692        assert_eq_m256i(r, e);
54693    }
54694
54695    #[simd_test(enable = "avx512f,avx512vl")]
54696    unsafe fn test_mm_mask_broadcastd_epi32() {
54697        let src = _mm_set1_epi32(20);
54698        let a = _mm_set_epi32(17, 18, 19, 20);
54699        let r = _mm_mask_broadcastd_epi32(src, 0, a);
54700        assert_eq_m128i(r, src);
54701        let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
54702        let e = _mm_set1_epi32(20);
54703        assert_eq_m128i(r, e);
54704    }
54705
54706    #[simd_test(enable = "avx512f,avx512vl")]
54707    unsafe fn test_mm_maskz_broadcastd_epi32() {
54708        let a = _mm_set_epi32(17, 18, 19, 20);
54709        let r = _mm_maskz_broadcastd_epi32(0, a);
54710        assert_eq_m128i(r, _mm_setzero_si128());
54711        let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
54712        let e = _mm_set1_epi32(20);
54713        assert_eq_m128i(r, e);
54714    }
54715
54716    #[simd_test(enable = "avx512f")]
54717    unsafe fn test_mm512_broadcastss_ps() {
54718        let a = _mm_set_ps(17., 18., 19., 20.);
54719        let r = _mm512_broadcastss_ps(a);
54720        let e = _mm512_set1_ps(20.);
54721        assert_eq_m512(r, e);
54722    }
54723
54724    #[simd_test(enable = "avx512f")]
54725    unsafe fn test_mm512_mask_broadcastss_ps() {
54726        let src = _mm512_set1_ps(20.);
54727        let a = _mm_set_ps(17., 18., 19., 20.);
54728        let r = _mm512_mask_broadcastss_ps(src, 0, a);
54729        assert_eq_m512(r, src);
54730        let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
54731        let e = _mm512_set1_ps(20.);
54732        assert_eq_m512(r, e);
54733    }
54734
54735    #[simd_test(enable = "avx512f")]
54736    unsafe fn test_mm512_maskz_broadcastss_ps() {
54737        let a = _mm_set_ps(17., 18., 19., 20.);
54738        let r = _mm512_maskz_broadcastss_ps(0, a);
54739        assert_eq_m512(r, _mm512_setzero_ps());
54740        let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
54741        let e = _mm512_setr_ps(
54742            20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
54743        );
54744        assert_eq_m512(r, e);
54745    }
54746
54747    #[simd_test(enable = "avx512f,avx512vl")]
54748    unsafe fn test_mm256_mask_broadcastss_ps() {
54749        let src = _mm256_set1_ps(20.);
54750        let a = _mm_set_ps(17., 18., 19., 20.);
54751        let r = _mm256_mask_broadcastss_ps(src, 0, a);
54752        assert_eq_m256(r, src);
54753        let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
54754        let e = _mm256_set1_ps(20.);
54755        assert_eq_m256(r, e);
54756    }
54757
54758    #[simd_test(enable = "avx512f,avx512vl")]
54759    unsafe fn test_mm256_maskz_broadcastss_ps() {
54760        let a = _mm_set_ps(17., 18., 19., 20.);
54761        let r = _mm256_maskz_broadcastss_ps(0, a);
54762        assert_eq_m256(r, _mm256_setzero_ps());
54763        let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
54764        let e = _mm256_set1_ps(20.);
54765        assert_eq_m256(r, e);
54766    }
54767
54768    #[simd_test(enable = "avx512f,avx512vl")]
54769    unsafe fn test_mm_mask_broadcastss_ps() {
54770        let src = _mm_set1_ps(20.);
54771        let a = _mm_set_ps(17., 18., 19., 20.);
54772        let r = _mm_mask_broadcastss_ps(src, 0, a);
54773        assert_eq_m128(r, src);
54774        let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
54775        let e = _mm_set1_ps(20.);
54776        assert_eq_m128(r, e);
54777    }
54778
54779    #[simd_test(enable = "avx512f,avx512vl")]
54780    unsafe fn test_mm_maskz_broadcastss_ps() {
54781        let a = _mm_set_ps(17., 18., 19., 20.);
54782        let r = _mm_maskz_broadcastss_ps(0, a);
54783        assert_eq_m128(r, _mm_setzero_ps());
54784        let r = _mm_maskz_broadcastss_ps(0b00001111, a);
54785        let e = _mm_set1_ps(20.);
54786        assert_eq_m128(r, e);
54787    }
54788
54789    #[simd_test(enable = "avx512f")]
54790    unsafe fn test_mm512_broadcast_i32x4() {
54791        let a = _mm_set_epi32(17, 18, 19, 20);
54792        let r = _mm512_broadcast_i32x4(a);
54793        let e = _mm512_set_epi32(
54794            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54795        );
54796        assert_eq_m512i(r, e);
54797    }
54798
54799    #[simd_test(enable = "avx512f")]
54800    unsafe fn test_mm512_mask_broadcast_i32x4() {
54801        let src = _mm512_set1_epi32(20);
54802        let a = _mm_set_epi32(17, 18, 19, 20);
54803        let r = _mm512_mask_broadcast_i32x4(src, 0, a);
54804        assert_eq_m512i(r, src);
54805        let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
54806        let e = _mm512_set_epi32(
54807            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54808        );
54809        assert_eq_m512i(r, e);
54810    }
54811
54812    #[simd_test(enable = "avx512f")]
54813    unsafe fn test_mm512_maskz_broadcast_i32x4() {
54814        let a = _mm_set_epi32(17, 18, 19, 20);
54815        let r = _mm512_maskz_broadcast_i32x4(0, a);
54816        assert_eq_m512i(r, _mm512_setzero_si512());
54817        let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
54818        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
54819        assert_eq_m512i(r, e);
54820    }
54821
54822    #[simd_test(enable = "avx512f,avx512vl")]
54823    unsafe fn test_mm256_broadcast_i32x4() {
54824        let a = _mm_set_epi32(17, 18, 19, 20);
54825        let r = _mm256_broadcast_i32x4(a);
54826        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54827        assert_eq_m256i(r, e);
54828    }
54829
54830    #[simd_test(enable = "avx512f,avx512vl")]
54831    unsafe fn test_mm256_mask_broadcast_i32x4() {
54832        let src = _mm256_set1_epi32(20);
54833        let a = _mm_set_epi32(17, 18, 19, 20);
54834        let r = _mm256_mask_broadcast_i32x4(src, 0, a);
54835        assert_eq_m256i(r, src);
54836        let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
54837        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54838        assert_eq_m256i(r, e);
54839    }
54840
54841    #[simd_test(enable = "avx512f,avx512vl")]
54842    unsafe fn test_mm256_maskz_broadcast_i32x4() {
54843        let a = _mm_set_epi32(17, 18, 19, 20);
54844        let r = _mm256_maskz_broadcast_i32x4(0, a);
54845        assert_eq_m256i(r, _mm256_setzero_si256());
54846        let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
54847        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54848        assert_eq_m256i(r, e);
54849    }
54850
54851    #[simd_test(enable = "avx512f")]
54852    unsafe fn test_mm512_broadcast_f32x4() {
54853        let a = _mm_set_ps(17., 18., 19., 20.);
54854        let r = _mm512_broadcast_f32x4(a);
54855        let e = _mm512_set_ps(
54856            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54857        );
54858        assert_eq_m512(r, e);
54859    }
54860
54861    #[simd_test(enable = "avx512f")]
54862    unsafe fn test_mm512_mask_broadcast_f32x4() {
54863        let src = _mm512_set1_ps(20.);
54864        let a = _mm_set_ps(17., 18., 19., 20.);
54865        let r = _mm512_mask_broadcast_f32x4(src, 0, a);
54866        assert_eq_m512(r, src);
54867        let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
54868        let e = _mm512_set_ps(
54869            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54870        );
54871        assert_eq_m512(r, e);
54872    }
54873
54874    #[simd_test(enable = "avx512f")]
54875    unsafe fn test_mm512_maskz_broadcast_f32x4() {
54876        let a = _mm_set_ps(17., 18., 19., 20.);
54877        let r = _mm512_maskz_broadcast_f32x4(0, a);
54878        assert_eq_m512(r, _mm512_setzero_ps());
54879        let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
54880        let e = _mm512_set_ps(
54881            0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
54882        );
54883        assert_eq_m512(r, e);
54884    }
54885
54886    #[simd_test(enable = "avx512f,avx512vl")]
54887    unsafe fn test_mm256_broadcast_f32x4() {
54888        let a = _mm_set_ps(17., 18., 19., 20.);
54889        let r = _mm256_broadcast_f32x4(a);
54890        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54891        assert_eq_m256(r, e);
54892    }
54893
54894    #[simd_test(enable = "avx512f,avx512vl")]
54895    unsafe fn test_mm256_mask_broadcast_f32x4() {
54896        let src = _mm256_set1_ps(20.);
54897        let a = _mm_set_ps(17., 18., 19., 20.);
54898        let r = _mm256_mask_broadcast_f32x4(src, 0, a);
54899        assert_eq_m256(r, src);
54900        let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
54901        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54902        assert_eq_m256(r, e);
54903    }
54904
54905    #[simd_test(enable = "avx512f,avx512vl")]
54906    unsafe fn test_mm256_maskz_broadcast_f32x4() {
54907        let a = _mm_set_ps(17., 18., 19., 20.);
54908        let r = _mm256_maskz_broadcast_f32x4(0, a);
54909        assert_eq_m256(r, _mm256_setzero_ps());
54910        let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
54911        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54912        assert_eq_m256(r, e);
54913    }
54914
54915    #[simd_test(enable = "avx512f")]
54916    unsafe fn test_mm512_mask_blend_epi32() {
54917        let a = _mm512_set1_epi32(1);
54918        let b = _mm512_set1_epi32(2);
54919        let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
54920        let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
54921        assert_eq_m512i(r, e);
54922    }
54923
54924    #[simd_test(enable = "avx512f,avx512vl")]
54925    unsafe fn test_mm256_mask_blend_epi32() {
54926        let a = _mm256_set1_epi32(1);
54927        let b = _mm256_set1_epi32(2);
54928        let r = _mm256_mask_blend_epi32(0b11111111, a, b);
54929        let e = _mm256_set1_epi32(2);
54930        assert_eq_m256i(r, e);
54931    }
54932
54933    #[simd_test(enable = "avx512f,avx512vl")]
54934    unsafe fn test_mm_mask_blend_epi32() {
54935        let a = _mm_set1_epi32(1);
54936        let b = _mm_set1_epi32(2);
54937        let r = _mm_mask_blend_epi32(0b00001111, a, b);
54938        let e = _mm_set1_epi32(2);
54939        assert_eq_m128i(r, e);
54940    }
54941
54942    #[simd_test(enable = "avx512f")]
54943    unsafe fn test_mm512_mask_blend_ps() {
54944        let a = _mm512_set1_ps(1.);
54945        let b = _mm512_set1_ps(2.);
54946        let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
54947        let e = _mm512_set_ps(
54948            2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
54949        );
54950        assert_eq_m512(r, e);
54951    }
54952
54953    #[simd_test(enable = "avx512f,avx512vl")]
54954    unsafe fn test_mm256_mask_blend_ps() {
54955        let a = _mm256_set1_ps(1.);
54956        let b = _mm256_set1_ps(2.);
54957        let r = _mm256_mask_blend_ps(0b11111111, a, b);
54958        let e = _mm256_set1_ps(2.);
54959        assert_eq_m256(r, e);
54960    }
54961
54962    #[simd_test(enable = "avx512f,avx512vl")]
54963    unsafe fn test_mm_mask_blend_ps() {
54964        let a = _mm_set1_ps(1.);
54965        let b = _mm_set1_ps(2.);
54966        let r = _mm_mask_blend_ps(0b00001111, a, b);
54967        let e = _mm_set1_ps(2.);
54968        assert_eq_m128(r, e);
54969    }
54970
54971    #[simd_test(enable = "avx512f")]
54972    unsafe fn test_mm512_unpackhi_epi32() {
54973        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54974        let b = _mm512_set_epi32(
54975            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54976        );
54977        let r = _mm512_unpackhi_epi32(a, b);
54978        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54979        assert_eq_m512i(r, e);
54980    }
54981
54982    #[simd_test(enable = "avx512f")]
54983    unsafe fn test_mm512_mask_unpackhi_epi32() {
54984        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54985        let b = _mm512_set_epi32(
54986            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54987        );
54988        let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
54989        assert_eq_m512i(r, a);
54990        let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
54991        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54992        assert_eq_m512i(r, e);
54993    }
54994
54995    #[simd_test(enable = "avx512f")]
54996    unsafe fn test_mm512_maskz_unpackhi_epi32() {
54997        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54998        let b = _mm512_set_epi32(
54999            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55000        );
55001        let r = _mm512_maskz_unpackhi_epi32(0, a, b);
55002        assert_eq_m512i(r, _mm512_setzero_si512());
55003        let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
55004        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
55005        assert_eq_m512i(r, e);
55006    }
55007
55008    #[simd_test(enable = "avx512f,avx512vl")]
55009    unsafe fn test_mm256_mask_unpackhi_epi32() {
55010        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55011        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55012        let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
55013        assert_eq_m256i(r, a);
55014        let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
55015        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55016        assert_eq_m256i(r, e);
55017    }
55018
55019    #[simd_test(enable = "avx512f,avx512vl")]
55020    unsafe fn test_mm256_maskz_unpackhi_epi32() {
55021        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55022        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55023        let r = _mm256_maskz_unpackhi_epi32(0, a, b);
55024        assert_eq_m256i(r, _mm256_setzero_si256());
55025        let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
55026        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55027        assert_eq_m256i(r, e);
55028    }
55029
55030    #[simd_test(enable = "avx512f,avx512vl")]
55031    unsafe fn test_mm_mask_unpackhi_epi32() {
55032        let a = _mm_set_epi32(1, 2, 3, 4);
55033        let b = _mm_set_epi32(17, 18, 19, 20);
55034        let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
55035        assert_eq_m128i(r, a);
55036        let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
55037        let e = _mm_set_epi32(17, 1, 18, 2);
55038        assert_eq_m128i(r, e);
55039    }
55040
55041    #[simd_test(enable = "avx512f,avx512vl")]
55042    unsafe fn test_mm_maskz_unpackhi_epi32() {
55043        let a = _mm_set_epi32(1, 2, 3, 4);
55044        let b = _mm_set_epi32(17, 18, 19, 20);
55045        let r = _mm_maskz_unpackhi_epi32(0, a, b);
55046        assert_eq_m128i(r, _mm_setzero_si128());
55047        let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
55048        let e = _mm_set_epi32(17, 1, 18, 2);
55049        assert_eq_m128i(r, e);
55050    }
55051
55052    #[simd_test(enable = "avx512f")]
55053    unsafe fn test_mm512_unpackhi_ps() {
55054        let a = _mm512_set_ps(
55055            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55056        );
55057        let b = _mm512_set_ps(
55058            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55059        );
55060        let r = _mm512_unpackhi_ps(a, b);
55061        let e = _mm512_set_ps(
55062            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55063        );
55064        assert_eq_m512(r, e);
55065    }
55066
55067    #[simd_test(enable = "avx512f")]
55068    unsafe fn test_mm512_mask_unpackhi_ps() {
55069        let a = _mm512_set_ps(
55070            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55071        );
55072        let b = _mm512_set_ps(
55073            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55074        );
55075        let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
55076        assert_eq_m512(r, a);
55077        let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
55078        let e = _mm512_set_ps(
55079            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55080        );
55081        assert_eq_m512(r, e);
55082    }
55083
55084    #[simd_test(enable = "avx512f")]
55085    unsafe fn test_mm512_maskz_unpackhi_ps() {
55086        let a = _mm512_set_ps(
55087            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55088        );
55089        let b = _mm512_set_ps(
55090            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55091        );
55092        let r = _mm512_maskz_unpackhi_ps(0, a, b);
55093        assert_eq_m512(r, _mm512_setzero_ps());
55094        let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
55095        let e = _mm512_set_ps(
55096            0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
55097        );
55098        assert_eq_m512(r, e);
55099    }
55100
55101    #[simd_test(enable = "avx512f,avx512vl")]
55102    unsafe fn test_mm256_mask_unpackhi_ps() {
55103        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55104        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55105        let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
55106        assert_eq_m256(r, a);
55107        let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
55108        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55109        assert_eq_m256(r, e);
55110    }
55111
55112    #[simd_test(enable = "avx512f,avx512vl")]
55113    unsafe fn test_mm256_maskz_unpackhi_ps() {
55114        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55115        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55116        let r = _mm256_maskz_unpackhi_ps(0, a, b);
55117        assert_eq_m256(r, _mm256_setzero_ps());
55118        let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
55119        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55120        assert_eq_m256(r, e);
55121    }
55122
55123    #[simd_test(enable = "avx512f,avx512vl")]
55124    unsafe fn test_mm_mask_unpackhi_ps() {
55125        let a = _mm_set_ps(1., 2., 3., 4.);
55126        let b = _mm_set_ps(17., 18., 19., 20.);
55127        let r = _mm_mask_unpackhi_ps(a, 0, a, b);
55128        assert_eq_m128(r, a);
55129        let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
55130        let e = _mm_set_ps(17., 1., 18., 2.);
55131        assert_eq_m128(r, e);
55132    }
55133
55134    #[simd_test(enable = "avx512f,avx512vl")]
55135    unsafe fn test_mm_maskz_unpackhi_ps() {
55136        let a = _mm_set_ps(1., 2., 3., 4.);
55137        let b = _mm_set_ps(17., 18., 19., 20.);
55138        let r = _mm_maskz_unpackhi_ps(0, a, b);
55139        assert_eq_m128(r, _mm_setzero_ps());
55140        let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
55141        let e = _mm_set_ps(17., 1., 18., 2.);
55142        assert_eq_m128(r, e);
55143    }
55144
55145    #[simd_test(enable = "avx512f")]
55146    unsafe fn test_mm512_unpacklo_epi32() {
55147        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55148        let b = _mm512_set_epi32(
55149            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55150        );
55151        let r = _mm512_unpacklo_epi32(a, b);
55152        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55153        assert_eq_m512i(r, e);
55154    }
55155
55156    #[simd_test(enable = "avx512f")]
55157    unsafe fn test_mm512_mask_unpacklo_epi32() {
55158        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55159        let b = _mm512_set_epi32(
55160            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55161        );
55162        let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
55163        assert_eq_m512i(r, a);
55164        let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
55165        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55166        assert_eq_m512i(r, e);
55167    }
55168
55169    #[simd_test(enable = "avx512f")]
55170    unsafe fn test_mm512_maskz_unpacklo_epi32() {
55171        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55172        let b = _mm512_set_epi32(
55173            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55174        );
55175        let r = _mm512_maskz_unpacklo_epi32(0, a, b);
55176        assert_eq_m512i(r, _mm512_setzero_si512());
55177        let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
55178        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
55179        assert_eq_m512i(r, e);
55180    }
55181
55182    #[simd_test(enable = "avx512f,avx512vl")]
55183    unsafe fn test_mm256_mask_unpacklo_epi32() {
55184        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55185        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55186        let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
55187        assert_eq_m256i(r, a);
55188        let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
55189        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55190        assert_eq_m256i(r, e);
55191    }
55192
55193    #[simd_test(enable = "avx512f,avx512vl")]
55194    unsafe fn test_mm256_maskz_unpacklo_epi32() {
55195        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55196        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55197        let r = _mm256_maskz_unpacklo_epi32(0, a, b);
55198        assert_eq_m256i(r, _mm256_setzero_si256());
55199        let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
55200        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55201        assert_eq_m256i(r, e);
55202    }
55203
55204    #[simd_test(enable = "avx512f,avx512vl")]
55205    unsafe fn test_mm_mask_unpacklo_epi32() {
55206        let a = _mm_set_epi32(1, 2, 3, 4);
55207        let b = _mm_set_epi32(17, 18, 19, 20);
55208        let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
55209        assert_eq_m128i(r, a);
55210        let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
55211        let e = _mm_set_epi32(19, 3, 20, 4);
55212        assert_eq_m128i(r, e);
55213    }
55214
55215    #[simd_test(enable = "avx512f,avx512vl")]
55216    unsafe fn test_mm_maskz_unpacklo_epi32() {
55217        let a = _mm_set_epi32(1, 2, 3, 4);
55218        let b = _mm_set_epi32(17, 18, 19, 20);
55219        let r = _mm_maskz_unpacklo_epi32(0, a, b);
55220        assert_eq_m128i(r, _mm_setzero_si128());
55221        let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
55222        let e = _mm_set_epi32(19, 3, 20, 4);
55223        assert_eq_m128i(r, e);
55224    }
55225
55226    #[simd_test(enable = "avx512f")]
55227    unsafe fn test_mm512_unpacklo_ps() {
55228        let a = _mm512_set_ps(
55229            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55230        );
55231        let b = _mm512_set_ps(
55232            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55233        );
55234        let r = _mm512_unpacklo_ps(a, b);
55235        let e = _mm512_set_ps(
55236            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55237        );
55238        assert_eq_m512(r, e);
55239    }
55240
55241    #[simd_test(enable = "avx512f")]
55242    unsafe fn test_mm512_mask_unpacklo_ps() {
55243        let a = _mm512_set_ps(
55244            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55245        );
55246        let b = _mm512_set_ps(
55247            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55248        );
55249        let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
55250        assert_eq_m512(r, a);
55251        let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
55252        let e = _mm512_set_ps(
55253            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55254        );
55255        assert_eq_m512(r, e);
55256    }
55257
55258    #[simd_test(enable = "avx512f")]
55259    unsafe fn test_mm512_maskz_unpacklo_ps() {
55260        let a = _mm512_set_ps(
55261            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55262        );
55263        let b = _mm512_set_ps(
55264            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55265        );
55266        let r = _mm512_maskz_unpacklo_ps(0, a, b);
55267        assert_eq_m512(r, _mm512_setzero_ps());
55268        let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
55269        let e = _mm512_set_ps(
55270            0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
55271        );
55272        assert_eq_m512(r, e);
55273    }
55274
55275    #[simd_test(enable = "avx512f,avx512vl")]
55276    unsafe fn test_mm256_mask_unpacklo_ps() {
55277        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55278        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55279        let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
55280        assert_eq_m256(r, a);
55281        let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
55282        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55283        assert_eq_m256(r, e);
55284    }
55285
55286    #[simd_test(enable = "avx512f,avx512vl")]
55287    unsafe fn test_mm256_maskz_unpacklo_ps() {
55288        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55289        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55290        let r = _mm256_maskz_unpacklo_ps(0, a, b);
55291        assert_eq_m256(r, _mm256_setzero_ps());
55292        let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
55293        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55294        assert_eq_m256(r, e);
55295    }
55296
55297    #[simd_test(enable = "avx512f,avx512vl")]
55298    unsafe fn test_mm_mask_unpacklo_ps() {
55299        let a = _mm_set_ps(1., 2., 3., 4.);
55300        let b = _mm_set_ps(17., 18., 19., 20.);
55301        let r = _mm_mask_unpacklo_ps(a, 0, a, b);
55302        assert_eq_m128(r, a);
55303        let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
55304        let e = _mm_set_ps(19., 3., 20., 4.);
55305        assert_eq_m128(r, e);
55306    }
55307
55308    #[simd_test(enable = "avx512f,avx512vl")]
55309    unsafe fn test_mm_maskz_unpacklo_ps() {
55310        let a = _mm_set_ps(1., 2., 3., 4.);
55311        let b = _mm_set_ps(17., 18., 19., 20.);
55312        let r = _mm_maskz_unpacklo_ps(0, a, b);
55313        assert_eq_m128(r, _mm_setzero_ps());
55314        let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
55315        let e = _mm_set_ps(19., 3., 20., 4.);
55316        assert_eq_m128(r, e);
55317    }
55318
55319    #[simd_test(enable = "avx512f")]
55320    unsafe fn test_mm512_alignr_epi32() {
55321        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55322        let b = _mm512_set_epi32(
55323            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55324        );
55325        let r = _mm512_alignr_epi32::<0>(a, b);
55326        assert_eq_m512i(r, b);
55327        let r = _mm512_alignr_epi32::<16>(a, b);
55328        assert_eq_m512i(r, b);
55329        let r = _mm512_alignr_epi32::<1>(a, b);
55330        let e = _mm512_set_epi32(
55331            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55332        );
55333        assert_eq_m512i(r, e);
55334    }
55335
55336    #[simd_test(enable = "avx512f")]
55337    unsafe fn test_mm512_mask_alignr_epi32() {
55338        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55339        let b = _mm512_set_epi32(
55340            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55341        );
55342        let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
55343        assert_eq_m512i(r, a);
55344        let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
55345        let e = _mm512_set_epi32(
55346            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55347        );
55348        assert_eq_m512i(r, e);
55349    }
55350
55351    #[simd_test(enable = "avx512f")]
55352    unsafe fn test_mm512_maskz_alignr_epi32() {
55353        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55354        let b = _mm512_set_epi32(
55355            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55356        );
55357        let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
55358        assert_eq_m512i(r, _mm512_setzero_si512());
55359        let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
55360        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
55361        assert_eq_m512i(r, e);
55362    }
55363
55364    #[simd_test(enable = "avx512f,avx512vl")]
55365    unsafe fn test_mm256_alignr_epi32() {
55366        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55367        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55368        let r = _mm256_alignr_epi32::<0>(a, b);
55369        assert_eq_m256i(r, b);
55370        let r = _mm256_alignr_epi32::<1>(a, b);
55371        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55372        assert_eq_m256i(r, e);
55373    }
55374
55375    #[simd_test(enable = "avx512f,avx512vl")]
55376    unsafe fn test_mm256_mask_alignr_epi32() {
55377        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55378        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55379        let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
55380        assert_eq_m256i(r, a);
55381        let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
55382        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55383        assert_eq_m256i(r, e);
55384    }
55385
55386    #[simd_test(enable = "avx512f,avx512vl")]
55387    unsafe fn test_mm256_maskz_alignr_epi32() {
55388        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55389        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55390        let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
55391        assert_eq_m256i(r, _mm256_setzero_si256());
55392        let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
55393        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55394        assert_eq_m256i(r, e);
55395    }
55396
55397    #[simd_test(enable = "avx512f,avx512vl")]
55398    unsafe fn test_mm_alignr_epi32() {
55399        let a = _mm_set_epi32(4, 3, 2, 1);
55400        let b = _mm_set_epi32(8, 7, 6, 5);
55401        let r = _mm_alignr_epi32::<0>(a, b);
55402        assert_eq_m128i(r, b);
55403        let r = _mm_alignr_epi32::<1>(a, b);
55404        let e = _mm_set_epi32(1, 8, 7, 6);
55405        assert_eq_m128i(r, e);
55406    }
55407
55408    #[simd_test(enable = "avx512f,avx512vl")]
55409    unsafe fn test_mm_mask_alignr_epi32() {
55410        let a = _mm_set_epi32(4, 3, 2, 1);
55411        let b = _mm_set_epi32(8, 7, 6, 5);
55412        let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
55413        assert_eq_m128i(r, a);
55414        let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
55415        let e = _mm_set_epi32(1, 8, 7, 6);
55416        assert_eq_m128i(r, e);
55417    }
55418
55419    #[simd_test(enable = "avx512f,avx512vl")]
55420    unsafe fn test_mm_maskz_alignr_epi32() {
55421        let a = _mm_set_epi32(4, 3, 2, 1);
55422        let b = _mm_set_epi32(8, 7, 6, 5);
55423        let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
55424        assert_eq_m128i(r, _mm_setzero_si128());
55425        let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
55426        let e = _mm_set_epi32(1, 8, 7, 6);
55427        assert_eq_m128i(r, e);
55428    }
55429
55430    #[simd_test(enable = "avx512f")]
55431    unsafe fn test_mm512_and_epi32() {
55432        #[rustfmt::skip]
55433        let a = _mm512_set_epi32(
55434            1 << 1 | 1 << 2, 0, 0, 0,
55435            0, 0, 0, 0,
55436            0, 0, 0, 0,
55437            0, 0, 0, 1 << 1 | 1 << 3,
55438        );
55439        #[rustfmt::skip]
55440        let b = _mm512_set_epi32(
55441            1 << 1, 0, 0, 0,
55442            0, 0, 0, 0,
55443            0, 0, 0, 0,
55444            0, 0, 0, 1 << 3 | 1 << 4,
55445        );
55446        let r = _mm512_and_epi32(a, b);
55447        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55448        assert_eq_m512i(r, e);
55449    }
55450
55451    #[simd_test(enable = "avx512f")]
55452    unsafe fn test_mm512_mask_and_epi32() {
55453        #[rustfmt::skip]
55454        let a = _mm512_set_epi32(
55455            1 << 1 | 1 << 2, 0, 0, 0,
55456            0, 0, 0, 0,
55457            0, 0, 0, 0,
55458            0, 0, 0, 1 << 1 | 1 << 3,
55459        );
55460        #[rustfmt::skip]
55461        let b = _mm512_set_epi32(
55462            1 << 1, 0, 0, 0,
55463            0, 0, 0, 0,
55464            0, 0, 0, 0,
55465            0, 0, 0, 1 << 3 | 1 << 4,
55466        );
55467        let r = _mm512_mask_and_epi32(a, 0, a, b);
55468        assert_eq_m512i(r, a);
55469        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
55470        #[rustfmt::skip]
55471        let e = _mm512_set_epi32(
55472            1 << 1 | 1 << 2, 0, 0, 0,
55473            0, 0, 0, 0,
55474            0, 0, 0, 0,
55475            0, 0, 0, 1 << 3,
55476        );
55477        assert_eq_m512i(r, e);
55478    }
55479
55480    #[simd_test(enable = "avx512f")]
55481    unsafe fn test_mm512_maskz_and_epi32() {
55482        #[rustfmt::skip]
55483        let a = _mm512_set_epi32(
55484            1 << 1 | 1 << 2, 0, 0, 0,
55485            0, 0, 0, 0,
55486            0, 0, 0, 0,
55487            0, 0, 0, 1 << 1 | 1 << 3,
55488        );
55489        #[rustfmt::skip]
55490        let b = _mm512_set_epi32(
55491            1 << 1, 0, 0, 0,
55492            0, 0, 0, 0,
55493            0, 0, 0, 0,
55494            0, 0, 0, 1 << 3 | 1 << 4,
55495        );
55496        let r = _mm512_maskz_and_epi32(0, a, b);
55497        assert_eq_m512i(r, _mm512_setzero_si512());
55498        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
55499        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55500        assert_eq_m512i(r, e);
55501    }
55502
55503    #[simd_test(enable = "avx512f,avx512vl")]
55504    unsafe fn test_mm256_mask_and_epi32() {
55505        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55506        let b = _mm256_set1_epi32(1 << 1);
55507        let r = _mm256_mask_and_epi32(a, 0, a, b);
55508        assert_eq_m256i(r, a);
55509        let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
55510        let e = _mm256_set1_epi32(1 << 1);
55511        assert_eq_m256i(r, e);
55512    }
55513
55514    #[simd_test(enable = "avx512f,avx512vl")]
55515    unsafe fn test_mm256_maskz_and_epi32() {
55516        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55517        let b = _mm256_set1_epi32(1 << 1);
55518        let r = _mm256_maskz_and_epi32(0, a, b);
55519        assert_eq_m256i(r, _mm256_setzero_si256());
55520        let r = _mm256_maskz_and_epi32(0b11111111, a, b);
55521        let e = _mm256_set1_epi32(1 << 1);
55522        assert_eq_m256i(r, e);
55523    }
55524
55525    #[simd_test(enable = "avx512f,avx512vl")]
55526    unsafe fn test_mm_mask_and_epi32() {
55527        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55528        let b = _mm_set1_epi32(1 << 1);
55529        let r = _mm_mask_and_epi32(a, 0, a, b);
55530        assert_eq_m128i(r, a);
55531        let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
55532        let e = _mm_set1_epi32(1 << 1);
55533        assert_eq_m128i(r, e);
55534    }
55535
55536    #[simd_test(enable = "avx512f,avx512vl")]
55537    unsafe fn test_mm_maskz_and_epi32() {
55538        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55539        let b = _mm_set1_epi32(1 << 1);
55540        let r = _mm_maskz_and_epi32(0, a, b);
55541        assert_eq_m128i(r, _mm_setzero_si128());
55542        let r = _mm_maskz_and_epi32(0b00001111, a, b);
55543        let e = _mm_set1_epi32(1 << 1);
55544        assert_eq_m128i(r, e);
55545    }
55546
55547    #[simd_test(enable = "avx512f")]
55548    unsafe fn test_mm512_and_si512() {
55549        #[rustfmt::skip]
55550        let a = _mm512_set_epi32(
55551            1 << 1 | 1 << 2, 0, 0, 0,
55552            0, 0, 0, 0,
55553            0, 0, 0, 0,
55554            0, 0, 0, 1 << 1 | 1 << 3,
55555        );
55556        #[rustfmt::skip]
55557        let b = _mm512_set_epi32(
55558            1 << 1, 0, 0, 0,
55559            0, 0, 0, 0,
55560            0, 0, 0, 0,
55561            0, 0, 0, 1 << 3 | 1 << 4,
55562        );
55563        let r = _mm512_and_epi32(a, b);
55564        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55565        assert_eq_m512i(r, e);
55566    }
55567
55568    #[simd_test(enable = "avx512f")]
55569    unsafe fn test_mm512_or_epi32() {
55570        #[rustfmt::skip]
55571        let a = _mm512_set_epi32(
55572            1 << 1 | 1 << 2, 0, 0, 0,
55573            0, 0, 0, 0,
55574            0, 0, 0, 0,
55575            0, 0, 0, 1 << 1 | 1 << 3,
55576        );
55577        #[rustfmt::skip]
55578        let b = _mm512_set_epi32(
55579            1 << 1, 0, 0, 0,
55580            0, 0, 0, 0,
55581            0, 0, 0, 0,
55582            0, 0, 0, 1 << 3 | 1 << 4,
55583        );
55584        let r = _mm512_or_epi32(a, b);
55585        #[rustfmt::skip]
55586        let e = _mm512_set_epi32(
55587            1 << 1 | 1 << 2, 0, 0, 0,
55588            0, 0, 0, 0,
55589            0, 0, 0, 0,
55590            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55591        );
55592        assert_eq_m512i(r, e);
55593    }
55594
55595    #[simd_test(enable = "avx512f")]
55596    unsafe fn test_mm512_mask_or_epi32() {
55597        #[rustfmt::skip]
55598        let a = _mm512_set_epi32(
55599            1 << 1 | 1 << 2, 0, 0, 0,
55600            0, 0, 0, 0,
55601            0, 0, 0, 0,
55602            0, 0, 0, 1 << 1 | 1 << 3,
55603        );
55604        #[rustfmt::skip]
55605        let b = _mm512_set_epi32(
55606            1 << 1, 0, 0, 0,
55607            0, 0, 0, 0,
55608            0, 0, 0, 0,
55609            0, 0, 0, 1 << 3 | 1 << 4,
55610        );
55611        let r = _mm512_mask_or_epi32(a, 0, a, b);
55612        assert_eq_m512i(r, a);
55613        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
55614        #[rustfmt::skip]
55615        let e = _mm512_set_epi32(
55616            1 << 1 | 1 << 2, 0, 0, 0,
55617            0, 0, 0, 0,
55618            0, 0, 0, 0,
55619            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55620        );
55621        assert_eq_m512i(r, e);
55622    }
55623
55624    #[simd_test(enable = "avx512f")]
55625    unsafe fn test_mm512_maskz_or_epi32() {
55626        #[rustfmt::skip]
55627        let a = _mm512_set_epi32(
55628            1 << 1 | 1 << 2, 0, 0, 0,
55629            0, 0, 0, 0,
55630            0, 0, 0, 0,
55631            0, 0, 0, 1 << 1 | 1 << 3,
55632        );
55633        #[rustfmt::skip]
55634        let b = _mm512_set_epi32(
55635            1 << 1, 0, 0, 0,
55636            0, 0, 0, 0,
55637            0, 0, 0, 0,
55638            0, 0, 0, 1 << 3 | 1 << 4,
55639        );
55640        let r = _mm512_maskz_or_epi32(0, a, b);
55641        assert_eq_m512i(r, _mm512_setzero_si512());
55642        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
55643        #[rustfmt::skip]
55644        let e = _mm512_set_epi32(
55645            0, 0, 0, 0,
55646            0, 0, 0, 0,
55647            0, 0, 0, 0,
55648            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55649        );
55650        assert_eq_m512i(r, e);
55651    }
55652
55653    #[simd_test(enable = "avx512f,avx512vl")]
55654    unsafe fn test_mm256_or_epi32() {
55655        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55656        let b = _mm256_set1_epi32(1 << 1);
55657        let r = _mm256_or_epi32(a, b);
55658        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55659        assert_eq_m256i(r, e);
55660    }
55661
55662    #[simd_test(enable = "avx512f,avx512vl")]
55663    unsafe fn test_mm256_mask_or_epi32() {
55664        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55665        let b = _mm256_set1_epi32(1 << 1);
55666        let r = _mm256_mask_or_epi32(a, 0, a, b);
55667        assert_eq_m256i(r, a);
55668        let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
55669        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55670        assert_eq_m256i(r, e);
55671    }
55672
55673    #[simd_test(enable = "avx512f,avx512vl")]
55674    unsafe fn test_mm256_maskz_or_epi32() {
55675        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55676        let b = _mm256_set1_epi32(1 << 1);
55677        let r = _mm256_maskz_or_epi32(0, a, b);
55678        assert_eq_m256i(r, _mm256_setzero_si256());
55679        let r = _mm256_maskz_or_epi32(0b11111111, a, b);
55680        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55681        assert_eq_m256i(r, e);
55682    }
55683
55684    #[simd_test(enable = "avx512f,avx512vl")]
55685    unsafe fn test_mm_or_epi32() {
55686        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55687        let b = _mm_set1_epi32(1 << 1);
55688        let r = _mm_or_epi32(a, b);
55689        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55690        assert_eq_m128i(r, e);
55691    }
55692
55693    #[simd_test(enable = "avx512f,avx512vl")]
55694    unsafe fn test_mm_mask_or_epi32() {
55695        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55696        let b = _mm_set1_epi32(1 << 1);
55697        let r = _mm_mask_or_epi32(a, 0, a, b);
55698        assert_eq_m128i(r, a);
55699        let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
55700        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55701        assert_eq_m128i(r, e);
55702    }
55703
55704    #[simd_test(enable = "avx512f,avx512vl")]
55705    unsafe fn test_mm_maskz_or_epi32() {
55706        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55707        let b = _mm_set1_epi32(1 << 1);
55708        let r = _mm_maskz_or_epi32(0, a, b);
55709        assert_eq_m128i(r, _mm_setzero_si128());
55710        let r = _mm_maskz_or_epi32(0b00001111, a, b);
55711        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55712        assert_eq_m128i(r, e);
55713    }
55714
55715    #[simd_test(enable = "avx512f")]
55716    unsafe fn test_mm512_or_si512() {
55717        #[rustfmt::skip]
55718        let a = _mm512_set_epi32(
55719            1 << 1 | 1 << 2, 0, 0, 0,
55720            0, 0, 0, 0,
55721            0, 0, 0, 0,
55722            0, 0, 0, 1 << 1 | 1 << 3,
55723        );
55724        #[rustfmt::skip]
55725        let b = _mm512_set_epi32(
55726            1 << 1, 0, 0, 0,
55727            0, 0, 0, 0,
55728            0, 0, 0, 0,
55729            0, 0, 0, 1 << 3 | 1 << 4,
55730        );
55731        let r = _mm512_or_epi32(a, b);
55732        #[rustfmt::skip]
55733        let e = _mm512_set_epi32(
55734            1 << 1 | 1 << 2, 0, 0, 0,
55735            0, 0, 0, 0,
55736            0, 0, 0, 0,
55737            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55738        );
55739        assert_eq_m512i(r, e);
55740    }
55741
55742    #[simd_test(enable = "avx512f")]
55743    unsafe fn test_mm512_xor_epi32() {
55744        #[rustfmt::skip]
55745        let a = _mm512_set_epi32(
55746            1 << 1 | 1 << 2, 0, 0, 0,
55747            0, 0, 0, 0,
55748            0, 0, 0, 0,
55749            0, 0, 0, 1 << 1 | 1 << 3,
55750        );
55751        #[rustfmt::skip]
55752        let b = _mm512_set_epi32(
55753            1 << 1, 0, 0, 0,
55754            0, 0, 0, 0,
55755            0, 0, 0, 0,
55756            0, 0, 0, 1 << 3 | 1 << 4,
55757        );
55758        let r = _mm512_xor_epi32(a, b);
55759        #[rustfmt::skip]
55760        let e = _mm512_set_epi32(
55761            1 << 2, 0, 0, 0,
55762            0, 0, 0, 0,
55763            0, 0, 0, 0,
55764            0, 0, 0, 1 << 1 | 1 << 4,
55765        );
55766        assert_eq_m512i(r, e);
55767    }
55768
55769    #[simd_test(enable = "avx512f")]
55770    unsafe fn test_mm512_mask_xor_epi32() {
55771        #[rustfmt::skip]
55772        let a = _mm512_set_epi32(
55773            1 << 1 | 1 << 2, 0, 0, 0,
55774            0, 0, 0, 0,
55775            0, 0, 0, 0,
55776            0, 0, 0, 1 << 1 | 1 << 3,
55777        );
55778        #[rustfmt::skip]
55779        let b = _mm512_set_epi32(
55780            1 << 1, 0, 0, 0,
55781            0, 0, 0, 0,
55782            0, 0, 0, 0,
55783            0, 0, 0, 1 << 3 | 1 << 4,
55784        );
55785        let r = _mm512_mask_xor_epi32(a, 0, a, b);
55786        assert_eq_m512i(r, a);
55787        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
55788        #[rustfmt::skip]
55789        let e = _mm512_set_epi32(
55790            1 << 1 | 1 << 2, 0, 0, 0,
55791            0, 0, 0, 0,
55792            0, 0, 0, 0,
55793            0, 0, 0, 1 << 1 | 1 << 4,
55794        );
55795        assert_eq_m512i(r, e);
55796    }
55797
55798    #[simd_test(enable = "avx512f")]
55799    unsafe fn test_mm512_maskz_xor_epi32() {
55800        #[rustfmt::skip]
55801        let a = _mm512_set_epi32(
55802            1 << 1 | 1 << 2, 0, 0, 0,
55803            0, 0, 0, 0,
55804            0, 0, 0, 0,
55805            0, 0, 0, 1 << 1 | 1 << 3,
55806        );
55807        #[rustfmt::skip]
55808        let b = _mm512_set_epi32(
55809            1 << 1, 0, 0, 0,
55810            0, 0, 0, 0,
55811            0, 0, 0, 0,
55812            0, 0, 0, 1 << 3 | 1 << 4,
55813        );
55814        let r = _mm512_maskz_xor_epi32(0, a, b);
55815        assert_eq_m512i(r, _mm512_setzero_si512());
55816        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
55817        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
55818        assert_eq_m512i(r, e);
55819    }
55820
55821    #[simd_test(enable = "avx512f,avx512vl")]
55822    unsafe fn test_mm256_xor_epi32() {
55823        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55824        let b = _mm256_set1_epi32(1 << 1);
55825        let r = _mm256_xor_epi32(a, b);
55826        let e = _mm256_set1_epi32(1 << 2);
55827        assert_eq_m256i(r, e);
55828    }
55829
55830    #[simd_test(enable = "avx512f,avx512vl")]
55831    unsafe fn test_mm256_mask_xor_epi32() {
55832        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55833        let b = _mm256_set1_epi32(1 << 1);
55834        let r = _mm256_mask_xor_epi32(a, 0, a, b);
55835        assert_eq_m256i(r, a);
55836        let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
55837        let e = _mm256_set1_epi32(1 << 2);
55838        assert_eq_m256i(r, e);
55839    }
55840
55841    #[simd_test(enable = "avx512f,avx512vl")]
55842    unsafe fn test_mm256_maskz_xor_epi32() {
55843        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55844        let b = _mm256_set1_epi32(1 << 1);
55845        let r = _mm256_maskz_xor_epi32(0, a, b);
55846        assert_eq_m256i(r, _mm256_setzero_si256());
55847        let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
55848        let e = _mm256_set1_epi32(1 << 2);
55849        assert_eq_m256i(r, e);
55850    }
55851
55852    #[simd_test(enable = "avx512f,avx512vl")]
55853    unsafe fn test_mm_xor_epi32() {
55854        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55855        let b = _mm_set1_epi32(1 << 1);
55856        let r = _mm_xor_epi32(a, b);
55857        let e = _mm_set1_epi32(1 << 2);
55858        assert_eq_m128i(r, e);
55859    }
55860
55861    #[simd_test(enable = "avx512f,avx512vl")]
55862    unsafe fn test_mm_mask_xor_epi32() {
55863        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55864        let b = _mm_set1_epi32(1 << 1);
55865        let r = _mm_mask_xor_epi32(a, 0, a, b);
55866        assert_eq_m128i(r, a);
55867        let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
55868        let e = _mm_set1_epi32(1 << 2);
55869        assert_eq_m128i(r, e);
55870    }
55871
55872    #[simd_test(enable = "avx512f,avx512vl")]
55873    unsafe fn test_mm_maskz_xor_epi32() {
55874        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55875        let b = _mm_set1_epi32(1 << 1);
55876        let r = _mm_maskz_xor_epi32(0, a, b);
55877        assert_eq_m128i(r, _mm_setzero_si128());
55878        let r = _mm_maskz_xor_epi32(0b00001111, a, b);
55879        let e = _mm_set1_epi32(1 << 2);
55880        assert_eq_m128i(r, e);
55881    }
55882
55883    #[simd_test(enable = "avx512f")]
55884    unsafe fn test_mm512_xor_si512() {
55885        #[rustfmt::skip]
55886        let a = _mm512_set_epi32(
55887            1 << 1 | 1 << 2, 0, 0, 0,
55888            0, 0, 0, 0,
55889            0, 0, 0, 0,
55890            0, 0, 0, 1 << 1 | 1 << 3,
55891        );
55892        #[rustfmt::skip]
55893        let b = _mm512_set_epi32(
55894            1 << 1, 0, 0, 0,
55895            0, 0, 0, 0,
55896            0, 0, 0, 0,
55897            0, 0, 0, 1 << 3 | 1 << 4,
55898        );
55899        let r = _mm512_xor_epi32(a, b);
55900        #[rustfmt::skip]
55901        let e = _mm512_set_epi32(
55902            1 << 2, 0, 0, 0,
55903            0, 0, 0, 0,
55904            0, 0, 0, 0,
55905            0, 0, 0, 1 << 1 | 1 << 4,
55906        );
55907        assert_eq_m512i(r, e);
55908    }
55909
55910    #[simd_test(enable = "avx512f")]
55911    unsafe fn test_mm512_andnot_epi32() {
55912        let a = _mm512_set1_epi32(0);
55913        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55914        let r = _mm512_andnot_epi32(a, b);
55915        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55916        assert_eq_m512i(r, e);
55917    }
55918
55919    #[simd_test(enable = "avx512f")]
55920    unsafe fn test_mm512_mask_andnot_epi32() {
55921        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55922        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55923        let r = _mm512_mask_andnot_epi32(a, 0, a, b);
55924        assert_eq_m512i(r, a);
55925        let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
55926        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55927        assert_eq_m512i(r, e);
55928    }
55929
55930    #[simd_test(enable = "avx512f")]
55931    unsafe fn test_mm512_maskz_andnot_epi32() {
55932        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55933        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55934        let r = _mm512_maskz_andnot_epi32(0, a, b);
55935        assert_eq_m512i(r, _mm512_setzero_si512());
55936        let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
55937        #[rustfmt::skip]
55938        let e = _mm512_set_epi32(
55939            0, 0, 0, 0,
55940            0, 0, 0, 0,
55941            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55942            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55943        );
55944        assert_eq_m512i(r, e);
55945    }
55946
55947    #[simd_test(enable = "avx512f,avx512vl")]
55948    unsafe fn test_mm256_mask_andnot_epi32() {
55949        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55950        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55951        let r = _mm256_mask_andnot_epi32(a, 0, a, b);
55952        assert_eq_m256i(r, a);
55953        let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
55954        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55955        assert_eq_m256i(r, e);
55956    }
55957
55958    #[simd_test(enable = "avx512f,avx512vl")]
55959    unsafe fn test_mm256_maskz_andnot_epi32() {
55960        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55961        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55962        let r = _mm256_maskz_andnot_epi32(0, a, b);
55963        assert_eq_m256i(r, _mm256_setzero_si256());
55964        let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
55965        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55966        assert_eq_m256i(r, e);
55967    }
55968
55969    #[simd_test(enable = "avx512f,avx512vl")]
55970    unsafe fn test_mm_mask_andnot_epi32() {
55971        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55972        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55973        let r = _mm_mask_andnot_epi32(a, 0, a, b);
55974        assert_eq_m128i(r, a);
55975        let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
55976        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55977        assert_eq_m128i(r, e);
55978    }
55979
55980    #[simd_test(enable = "avx512f,avx512vl")]
55981    unsafe fn test_mm_maskz_andnot_epi32() {
55982        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55983        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55984        let r = _mm_maskz_andnot_epi32(0, a, b);
55985        assert_eq_m128i(r, _mm_setzero_si128());
55986        let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
55987        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55988        assert_eq_m128i(r, e);
55989    }
55990
55991    #[simd_test(enable = "avx512f")]
55992    unsafe fn test_cvtmask16_u32() {
55993        let a: __mmask16 = 0b11001100_00110011;
55994        let r = _cvtmask16_u32(a);
55995        let e: u32 = 0b11001100_00110011;
55996        assert_eq!(r, e);
55997    }
55998
55999    #[simd_test(enable = "avx512f")]
56000    unsafe fn test_cvtu32_mask16() {
56001        let a: u32 = 0b11001100_00110011;
56002        let r = _cvtu32_mask16(a);
56003        let e: __mmask16 = 0b11001100_00110011;
56004        assert_eq!(r, e);
56005    }
56006
56007    #[simd_test(enable = "avx512f")]
56008    unsafe fn test_mm512_kand() {
56009        let a: u16 = 0b11001100_00110011;
56010        let b: u16 = 0b11001100_00110011;
56011        let r = _mm512_kand(a, b);
56012        let e: u16 = 0b11001100_00110011;
56013        assert_eq!(r, e);
56014    }
56015
56016    #[simd_test(enable = "avx512f")]
56017    unsafe fn test_kand_mask16() {
56018        let a: u16 = 0b11001100_00110011;
56019        let b: u16 = 0b11001100_00110011;
56020        let r = _kand_mask16(a, b);
56021        let e: u16 = 0b11001100_00110011;
56022        assert_eq!(r, e);
56023    }
56024
56025    #[simd_test(enable = "avx512f")]
56026    unsafe fn test_mm512_kor() {
56027        let a: u16 = 0b11001100_00110011;
56028        let b: u16 = 0b00101110_00001011;
56029        let r = _mm512_kor(a, b);
56030        let e: u16 = 0b11101110_00111011;
56031        assert_eq!(r, e);
56032    }
56033
56034    #[simd_test(enable = "avx512f")]
56035    unsafe fn test_kor_mask16() {
56036        let a: u16 = 0b11001100_00110011;
56037        let b: u16 = 0b00101110_00001011;
56038        let r = _kor_mask16(a, b);
56039        let e: u16 = 0b11101110_00111011;
56040        assert_eq!(r, e);
56041    }
56042
56043    #[simd_test(enable = "avx512f")]
56044    unsafe fn test_mm512_kxor() {
56045        let a: u16 = 0b11001100_00110011;
56046        let b: u16 = 0b00101110_00001011;
56047        let r = _mm512_kxor(a, b);
56048        let e: u16 = 0b11100010_00111000;
56049        assert_eq!(r, e);
56050    }
56051
56052    #[simd_test(enable = "avx512f")]
56053    unsafe fn test_kxor_mask16() {
56054        let a: u16 = 0b11001100_00110011;
56055        let b: u16 = 0b00101110_00001011;
56056        let r = _kxor_mask16(a, b);
56057        let e: u16 = 0b11100010_00111000;
56058        assert_eq!(r, e);
56059    }
56060
56061    #[simd_test(enable = "avx512f")]
56062    unsafe fn test_mm512_knot() {
56063        let a: u16 = 0b11001100_00110011;
56064        let r = _mm512_knot(a);
56065        let e: u16 = 0b00110011_11001100;
56066        assert_eq!(r, e);
56067    }
56068
56069    #[simd_test(enable = "avx512f")]
56070    unsafe fn test_knot_mask16() {
56071        let a: u16 = 0b11001100_00110011;
56072        let r = _knot_mask16(a);
56073        let e: u16 = 0b00110011_11001100;
56074        assert_eq!(r, e);
56075    }
56076
56077    #[simd_test(enable = "avx512f")]
56078    unsafe fn test_mm512_kandn() {
56079        let a: u16 = 0b11001100_00110011;
56080        let b: u16 = 0b00101110_00001011;
56081        let r = _mm512_kandn(a, b);
56082        let e: u16 = 0b00100010_00001000;
56083        assert_eq!(r, e);
56084    }
56085
56086    #[simd_test(enable = "avx512f")]
56087    unsafe fn test_kandn_mask16() {
56088        let a: u16 = 0b11001100_00110011;
56089        let b: u16 = 0b00101110_00001011;
56090        let r = _kandn_mask16(a, b);
56091        let e: u16 = 0b00100010_00001000;
56092        assert_eq!(r, e);
56093    }
56094
56095    #[simd_test(enable = "avx512f")]
56096    unsafe fn test_mm512_kxnor() {
56097        let a: u16 = 0b11001100_00110011;
56098        let b: u16 = 0b00101110_00001011;
56099        let r = _mm512_kxnor(a, b);
56100        let e: u16 = 0b00011101_11000111;
56101        assert_eq!(r, e);
56102    }
56103
56104    #[simd_test(enable = "avx512f")]
56105    unsafe fn test_kxnor_mask16() {
56106        let a: u16 = 0b11001100_00110011;
56107        let b: u16 = 0b00101110_00001011;
56108        let r = _kxnor_mask16(a, b);
56109        let e: u16 = 0b00011101_11000111;
56110        assert_eq!(r, e);
56111    }
56112
56113    #[simd_test(enable = "avx512dq")]
56114    unsafe fn test_kortest_mask16_u8() {
56115        let a: __mmask16 = 0b0110100101101001;
56116        let b: __mmask16 = 0b1011011010110110;
56117        let mut all_ones: u8 = 0;
56118        let r = _kortest_mask16_u8(a, b, &mut all_ones);
56119        assert_eq!(r, 0);
56120        assert_eq!(all_ones, 1);
56121    }
56122
56123    #[simd_test(enable = "avx512dq")]
56124    unsafe fn test_kortestc_mask16_u8() {
56125        let a: __mmask16 = 0b0110100101101001;
56126        let b: __mmask16 = 0b1011011010110110;
56127        let r = _kortestc_mask16_u8(a, b);
56128        assert_eq!(r, 1);
56129    }
56130
56131    #[simd_test(enable = "avx512dq")]
56132    unsafe fn test_kortestz_mask16_u8() {
56133        let a: __mmask16 = 0b0110100101101001;
56134        let b: __mmask16 = 0b1011011010110110;
56135        let r = _kortestz_mask16_u8(a, b);
56136        assert_eq!(r, 0);
56137    }
56138
56139    #[simd_test(enable = "avx512dq")]
56140    unsafe fn test_kshiftli_mask16() {
56141        let a: __mmask16 = 0b1001011011000011;
56142        let r = _kshiftli_mask16::<3>(a);
56143        let e: __mmask16 = 0b1011011000011000;
56144        assert_eq!(r, e);
56145    }
56146
56147    #[simd_test(enable = "avx512dq")]
56148    unsafe fn test_kshiftri_mask16() {
56149        let a: __mmask16 = 0b0110100100111100;
56150        let r = _kshiftri_mask16::<3>(a);
56151        let e: __mmask16 = 0b0000110100100111;
56152        assert_eq!(r, e);
56153    }
56154
56155    #[simd_test(enable = "avx512f")]
56156    unsafe fn test_load_mask16() {
56157        let a: __mmask16 = 0b1001011011000011;
56158        let r = _load_mask16(&a);
56159        let e: __mmask16 = 0b1001011011000011;
56160        assert_eq!(r, e);
56161    }
56162
56163    #[simd_test(enable = "avx512f")]
56164    unsafe fn test_store_mask16() {
56165        let a: __mmask16 = 0b0110100100111100;
56166        let mut r = 0;
56167        _store_mask16(&mut r, a);
56168        let e: __mmask16 = 0b0110100100111100;
56169        assert_eq!(r, e);
56170    }
56171
56172    #[simd_test(enable = "avx512f")]
56173    unsafe fn test_mm512_kmov() {
56174        let a: u16 = 0b11001100_00110011;
56175        let r = _mm512_kmov(a);
56176        let e: u16 = 0b11001100_00110011;
56177        assert_eq!(r, e);
56178    }
56179
56180    #[simd_test(enable = "avx512f")]
56181    unsafe fn test_mm512_int2mask() {
56182        let a: i32 = 0b11001100_00110011;
56183        let r = _mm512_int2mask(a);
56184        let e: u16 = 0b11001100_00110011;
56185        assert_eq!(r, e);
56186    }
56187
56188    #[simd_test(enable = "avx512f")]
56189    unsafe fn test_mm512_mask2int() {
56190        let k1: __mmask16 = 0b11001100_00110011;
56191        let r = _mm512_mask2int(k1);
56192        let e: i32 = 0b11001100_00110011;
56193        assert_eq!(r, e);
56194    }
56195
56196    #[simd_test(enable = "avx512f")]
56197    unsafe fn test_mm512_kunpackb() {
56198        let a: u16 = 0b11001100_00110011;
56199        let b: u16 = 0b00101110_00001011;
56200        let r = _mm512_kunpackb(a, b);
56201        let e: u16 = 0b00110011_00001011;
56202        assert_eq!(r, e);
56203    }
56204
56205    #[simd_test(enable = "avx512f")]
56206    unsafe fn test_mm512_kortestc() {
56207        let a: u16 = 0b11001100_00110011;
56208        let b: u16 = 0b00101110_00001011;
56209        let r = _mm512_kortestc(a, b);
56210        assert_eq!(r, 0);
56211        let b: u16 = 0b11111111_11111111;
56212        let r = _mm512_kortestc(a, b);
56213        assert_eq!(r, 1);
56214    }
56215
56216    #[simd_test(enable = "avx512f")]
56217    unsafe fn test_mm512_kortestz() {
56218        let a: u16 = 0b11001100_00110011;
56219        let b: u16 = 0b00101110_00001011;
56220        let r = _mm512_kortestz(a, b);
56221        assert_eq!(r, 0);
56222        let r = _mm512_kortestz(0, 0);
56223        assert_eq!(r, 1);
56224    }
56225
56226    #[simd_test(enable = "avx512f")]
56227    unsafe fn test_mm512_test_epi32_mask() {
56228        let a = _mm512_set1_epi32(1 << 0);
56229        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56230        let r = _mm512_test_epi32_mask(a, b);
56231        let e: __mmask16 = 0b11111111_11111111;
56232        assert_eq!(r, e);
56233    }
56234
56235    #[simd_test(enable = "avx512f")]
56236    unsafe fn test_mm512_mask_test_epi32_mask() {
56237        let a = _mm512_set1_epi32(1 << 0);
56238        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56239        let r = _mm512_mask_test_epi32_mask(0, a, b);
56240        assert_eq!(r, 0);
56241        let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
56242        let e: __mmask16 = 0b11111111_11111111;
56243        assert_eq!(r, e);
56244    }
56245
56246    #[simd_test(enable = "avx512f,avx512vl")]
56247    unsafe fn test_mm256_test_epi32_mask() {
56248        let a = _mm256_set1_epi32(1 << 0);
56249        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56250        let r = _mm256_test_epi32_mask(a, b);
56251        let e: __mmask8 = 0b11111111;
56252        assert_eq!(r, e);
56253    }
56254
56255    #[simd_test(enable = "avx512f,avx512vl")]
56256    unsafe fn test_mm256_mask_test_epi32_mask() {
56257        let a = _mm256_set1_epi32(1 << 0);
56258        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56259        let r = _mm256_mask_test_epi32_mask(0, a, b);
56260        assert_eq!(r, 0);
56261        let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
56262        let e: __mmask8 = 0b11111111;
56263        assert_eq!(r, e);
56264    }
56265
56266    #[simd_test(enable = "avx512f,avx512vl")]
56267    unsafe fn test_mm_test_epi32_mask() {
56268        let a = _mm_set1_epi32(1 << 0);
56269        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56270        let r = _mm_test_epi32_mask(a, b);
56271        let e: __mmask8 = 0b00001111;
56272        assert_eq!(r, e);
56273    }
56274
56275    #[simd_test(enable = "avx512f,avx512vl")]
56276    unsafe fn test_mm_mask_test_epi32_mask() {
56277        let a = _mm_set1_epi32(1 << 0);
56278        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56279        let r = _mm_mask_test_epi32_mask(0, a, b);
56280        assert_eq!(r, 0);
56281        let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
56282        let e: __mmask8 = 0b00001111;
56283        assert_eq!(r, e);
56284    }
56285
56286    #[simd_test(enable = "avx512f")]
56287    unsafe fn test_mm512_testn_epi32_mask() {
56288        let a = _mm512_set1_epi32(1 << 0);
56289        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56290        let r = _mm512_testn_epi32_mask(a, b);
56291        let e: __mmask16 = 0b00000000_00000000;
56292        assert_eq!(r, e);
56293    }
56294
56295    #[simd_test(enable = "avx512f")]
56296    unsafe fn test_mm512_mask_testn_epi32_mask() {
56297        let a = _mm512_set1_epi32(1 << 0);
56298        let b = _mm512_set1_epi32(1 << 1);
56299        let r = _mm512_mask_test_epi32_mask(0, a, b);
56300        assert_eq!(r, 0);
56301        let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
56302        let e: __mmask16 = 0b11111111_11111111;
56303        assert_eq!(r, e);
56304    }
56305
56306    #[simd_test(enable = "avx512f,avx512vl")]
56307    unsafe fn test_mm256_testn_epi32_mask() {
56308        let a = _mm256_set1_epi32(1 << 0);
56309        let b = _mm256_set1_epi32(1 << 1);
56310        let r = _mm256_testn_epi32_mask(a, b);
56311        let e: __mmask8 = 0b11111111;
56312        assert_eq!(r, e);
56313    }
56314
56315    #[simd_test(enable = "avx512f,avx512vl")]
56316    unsafe fn test_mm256_mask_testn_epi32_mask() {
56317        let a = _mm256_set1_epi32(1 << 0);
56318        let b = _mm256_set1_epi32(1 << 1);
56319        let r = _mm256_mask_test_epi32_mask(0, a, b);
56320        assert_eq!(r, 0);
56321        let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
56322        let e: __mmask8 = 0b11111111;
56323        assert_eq!(r, e);
56324    }
56325
56326    #[simd_test(enable = "avx512f,avx512vl")]
56327    unsafe fn test_mm_testn_epi32_mask() {
56328        let a = _mm_set1_epi32(1 << 0);
56329        let b = _mm_set1_epi32(1 << 1);
56330        let r = _mm_testn_epi32_mask(a, b);
56331        let e: __mmask8 = 0b00001111;
56332        assert_eq!(r, e);
56333    }
56334
56335    #[simd_test(enable = "avx512f,avx512vl")]
56336    unsafe fn test_mm_mask_testn_epi32_mask() {
56337        let a = _mm_set1_epi32(1 << 0);
56338        let b = _mm_set1_epi32(1 << 1);
56339        let r = _mm_mask_test_epi32_mask(0, a, b);
56340        assert_eq!(r, 0);
56341        let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
56342        let e: __mmask8 = 0b00001111;
56343        assert_eq!(r, e);
56344    }
56345
56346    #[simd_test(enable = "avx512f")]
56347    #[cfg_attr(miri, ignore)]
56348    unsafe fn test_mm512_stream_ps() {
56349        #[repr(align(64))]
56350        struct Memory {
56351            pub data: [f32; 16], // 64 bytes
56352        }
56353        let a = _mm512_set1_ps(7.0);
56354        let mut mem = Memory { data: [-1.0; 16] };
56355
56356        _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
56357        for i in 0..16 {
56358            assert_eq!(mem.data[i], get_m512(a, i));
56359        }
56360    }
56361
56362    #[simd_test(enable = "avx512f")]
56363    #[cfg_attr(miri, ignore)]
56364    unsafe fn test_mm512_stream_pd() {
56365        #[repr(align(64))]
56366        struct Memory {
56367            pub data: [f64; 8],
56368        }
56369        let a = _mm512_set1_pd(7.0);
56370        let mut mem = Memory { data: [-1.0; 8] };
56371
56372        _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
56373        for i in 0..8 {
56374            assert_eq!(mem.data[i], get_m512d(a, i));
56375        }
56376    }
56377
56378    #[simd_test(enable = "avx512f")]
56379    #[cfg_attr(miri, ignore)]
56380    unsafe fn test_mm512_stream_si512() {
56381        #[repr(align(64))]
56382        struct Memory {
56383            pub data: [i64; 8],
56384        }
56385        let a = _mm512_set1_epi32(7);
56386        let mut mem = Memory { data: [-1; 8] };
56387
56388        _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
56389        for i in 0..8 {
56390            assert_eq!(mem.data[i], get_m512i(a, i));
56391        }
56392    }
56393
56394    #[simd_test(enable = "avx512f")]
56395    unsafe fn test_mm512_stream_load_si512() {
56396        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56397        let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
56398        assert_eq_m512i(a, r);
56399    }
56400
56401    #[simd_test(enable = "avx512f")]
56402    unsafe fn test_mm512_reduce_add_epi32() {
56403        let a = _mm512_set1_epi32(1);
56404        let e: i32 = _mm512_reduce_add_epi32(a);
56405        assert_eq!(16, e);
56406    }
56407
56408    #[simd_test(enable = "avx512f")]
56409    unsafe fn test_mm512_mask_reduce_add_epi32() {
56410        let a = _mm512_set1_epi32(1);
56411        let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
56412        assert_eq!(8, e);
56413    }
56414
56415    #[simd_test(enable = "avx512f")]
56416    unsafe fn test_mm512_reduce_add_ps() {
56417        let a = _mm512_set1_ps(1.);
56418        let e: f32 = _mm512_reduce_add_ps(a);
56419        assert_eq!(16., e);
56420    }
56421
56422    #[simd_test(enable = "avx512f")]
56423    unsafe fn test_mm512_mask_reduce_add_ps() {
56424        let a = _mm512_set1_ps(1.);
56425        let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
56426        assert_eq!(8., e);
56427    }
56428
56429    #[simd_test(enable = "avx512f")]
56430    unsafe fn test_mm512_reduce_mul_epi32() {
56431        let a = _mm512_set1_epi32(2);
56432        let e: i32 = _mm512_reduce_mul_epi32(a);
56433        assert_eq!(65536, e);
56434    }
56435
56436    #[simd_test(enable = "avx512f")]
56437    unsafe fn test_mm512_mask_reduce_mul_epi32() {
56438        let a = _mm512_set1_epi32(2);
56439        let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
56440        assert_eq!(256, e);
56441    }
56442
56443    #[simd_test(enable = "avx512f")]
56444    unsafe fn test_mm512_reduce_mul_ps() {
56445        let a = _mm512_set1_ps(2.);
56446        let e: f32 = _mm512_reduce_mul_ps(a);
56447        assert_eq!(65536., e);
56448    }
56449
56450    #[simd_test(enable = "avx512f")]
56451    unsafe fn test_mm512_mask_reduce_mul_ps() {
56452        let a = _mm512_set1_ps(2.);
56453        let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
56454        assert_eq!(256., e);
56455    }
56456
56457    #[simd_test(enable = "avx512f")]
56458    unsafe fn test_mm512_reduce_max_epi32() {
56459        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56460        let e: i32 = _mm512_reduce_max_epi32(a);
56461        assert_eq!(15, e);
56462    }
56463
56464    #[simd_test(enable = "avx512f")]
56465    unsafe fn test_mm512_mask_reduce_max_epi32() {
56466        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56467        let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
56468        assert_eq!(7, e);
56469    }
56470
56471    #[simd_test(enable = "avx512f")]
56472    unsafe fn test_mm512_reduce_max_epu32() {
56473        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56474        let e: u32 = _mm512_reduce_max_epu32(a);
56475        assert_eq!(15, e);
56476    }
56477
56478    #[simd_test(enable = "avx512f")]
56479    unsafe fn test_mm512_mask_reduce_max_epu32() {
56480        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56481        let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
56482        assert_eq!(7, e);
56483    }
56484
56485    #[simd_test(enable = "avx512f")]
56486    unsafe fn test_mm512_reduce_max_ps() {
56487        let a = _mm512_set_ps(
56488            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56489        );
56490        let e: f32 = _mm512_reduce_max_ps(a);
56491        assert_eq!(15., e);
56492    }
56493
56494    #[simd_test(enable = "avx512f")]
56495    unsafe fn test_mm512_mask_reduce_max_ps() {
56496        let a = _mm512_set_ps(
56497            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56498        );
56499        let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
56500        assert_eq!(7., e);
56501    }
56502
56503    #[simd_test(enable = "avx512f")]
56504    unsafe fn test_mm512_reduce_min_epi32() {
56505        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56506        let e: i32 = _mm512_reduce_min_epi32(a);
56507        assert_eq!(0, e);
56508    }
56509
56510    #[simd_test(enable = "avx512f")]
56511    unsafe fn test_mm512_mask_reduce_min_epi32() {
56512        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56513        let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
56514        assert_eq!(0, e);
56515    }
56516
56517    #[simd_test(enable = "avx512f")]
56518    unsafe fn test_mm512_reduce_min_epu32() {
56519        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56520        let e: u32 = _mm512_reduce_min_epu32(a);
56521        assert_eq!(0, e);
56522    }
56523
56524    #[simd_test(enable = "avx512f")]
56525    unsafe fn test_mm512_mask_reduce_min_epu32() {
56526        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56527        let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
56528        assert_eq!(0, e);
56529    }
56530
56531    #[simd_test(enable = "avx512f")]
56532    unsafe fn test_mm512_reduce_min_ps() {
56533        let a = _mm512_set_ps(
56534            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56535        );
56536        let e: f32 = _mm512_reduce_min_ps(a);
56537        assert_eq!(0., e);
56538    }
56539
56540    #[simd_test(enable = "avx512f")]
56541    unsafe fn test_mm512_mask_reduce_min_ps() {
56542        let a = _mm512_set_ps(
56543            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56544        );
56545        let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
56546        assert_eq!(0., e);
56547    }
56548
56549    #[simd_test(enable = "avx512f")]
56550    unsafe fn test_mm512_reduce_and_epi32() {
56551        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56552        let e: i32 = _mm512_reduce_and_epi32(a);
56553        assert_eq!(0, e);
56554    }
56555
56556    #[simd_test(enable = "avx512f")]
56557    unsafe fn test_mm512_mask_reduce_and_epi32() {
56558        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56559        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56560        assert_eq!(1, e);
56561    }
56562
56563    #[simd_test(enable = "avx512f")]
56564    unsafe fn test_mm512_reduce_or_epi32() {
56565        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56566        let e: i32 = _mm512_reduce_or_epi32(a);
56567        assert_eq!(3, e);
56568    }
56569
56570    #[simd_test(enable = "avx512f")]
56571    unsafe fn test_mm512_mask_reduce_or_epi32() {
56572        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56573        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56574        assert_eq!(1, e);
56575    }
56576
56577    #[simd_test(enable = "avx512f")]
56578    unsafe fn test_mm512_mask_compress_epi32() {
56579        let src = _mm512_set1_epi32(200);
56580        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56581        let r = _mm512_mask_compress_epi32(src, 0, a);
56582        assert_eq_m512i(r, src);
56583        let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
56584        let e = _mm512_set_epi32(
56585            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
56586        );
56587        assert_eq_m512i(r, e);
56588    }
56589
56590    #[simd_test(enable = "avx512f")]
56591    unsafe fn test_mm512_maskz_compress_epi32() {
56592        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56593        let r = _mm512_maskz_compress_epi32(0, a);
56594        assert_eq_m512i(r, _mm512_setzero_si512());
56595        let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
56596        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
56597        assert_eq_m512i(r, e);
56598    }
56599
56600    #[simd_test(enable = "avx512f,avx512vl")]
56601    unsafe fn test_mm256_mask_compress_epi32() {
56602        let src = _mm256_set1_epi32(200);
56603        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56604        let r = _mm256_mask_compress_epi32(src, 0, a);
56605        assert_eq_m256i(r, src);
56606        let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
56607        let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
56608        assert_eq_m256i(r, e);
56609    }
56610
56611    #[simd_test(enable = "avx512f,avx512vl")]
56612    unsafe fn test_mm256_maskz_compress_epi32() {
56613        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56614        let r = _mm256_maskz_compress_epi32(0, a);
56615        assert_eq_m256i(r, _mm256_setzero_si256());
56616        let r = _mm256_maskz_compress_epi32(0b01010101, a);
56617        let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
56618        assert_eq_m256i(r, e);
56619    }
56620
56621    #[simd_test(enable = "avx512f,avx512vl")]
56622    unsafe fn test_mm_mask_compress_epi32() {
56623        let src = _mm_set1_epi32(200);
56624        let a = _mm_set_epi32(0, 1, 2, 3);
56625        let r = _mm_mask_compress_epi32(src, 0, a);
56626        assert_eq_m128i(r, src);
56627        let r = _mm_mask_compress_epi32(src, 0b00000101, a);
56628        let e = _mm_set_epi32(200, 200, 1, 3);
56629        assert_eq_m128i(r, e);
56630    }
56631
56632    #[simd_test(enable = "avx512f,avx512vl")]
56633    unsafe fn test_mm_maskz_compress_epi32() {
56634        let a = _mm_set_epi32(0, 1, 2, 3);
56635        let r = _mm_maskz_compress_epi32(0, a);
56636        assert_eq_m128i(r, _mm_setzero_si128());
56637        let r = _mm_maskz_compress_epi32(0b00000101, a);
56638        let e = _mm_set_epi32(0, 0, 1, 3);
56639        assert_eq_m128i(r, e);
56640    }
56641
56642    #[simd_test(enable = "avx512f")]
56643    unsafe fn test_mm512_mask_compress_ps() {
56644        let src = _mm512_set1_ps(200.);
56645        let a = _mm512_set_ps(
56646            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56647        );
56648        let r = _mm512_mask_compress_ps(src, 0, a);
56649        assert_eq_m512(r, src);
56650        let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
56651        let e = _mm512_set_ps(
56652            200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
56653        );
56654        assert_eq_m512(r, e);
56655    }
56656
56657    #[simd_test(enable = "avx512f")]
56658    unsafe fn test_mm512_maskz_compress_ps() {
56659        let a = _mm512_set_ps(
56660            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56661        );
56662        let r = _mm512_maskz_compress_ps(0, a);
56663        assert_eq_m512(r, _mm512_setzero_ps());
56664        let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
56665        let e = _mm512_set_ps(
56666            0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
56667        );
56668        assert_eq_m512(r, e);
56669    }
56670
56671    #[simd_test(enable = "avx512f,avx512vl")]
56672    unsafe fn test_mm256_mask_compress_ps() {
56673        let src = _mm256_set1_ps(200.);
56674        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56675        let r = _mm256_mask_compress_ps(src, 0, a);
56676        assert_eq_m256(r, src);
56677        let r = _mm256_mask_compress_ps(src, 0b01010101, a);
56678        let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
56679        assert_eq_m256(r, e);
56680    }
56681
56682    #[simd_test(enable = "avx512f,avx512vl")]
56683    unsafe fn test_mm256_maskz_compress_ps() {
56684        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56685        let r = _mm256_maskz_compress_ps(0, a);
56686        assert_eq_m256(r, _mm256_setzero_ps());
56687        let r = _mm256_maskz_compress_ps(0b01010101, a);
56688        let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
56689        assert_eq_m256(r, e);
56690    }
56691
56692    #[simd_test(enable = "avx512f,avx512vl")]
56693    unsafe fn test_mm_mask_compress_ps() {
56694        let src = _mm_set1_ps(200.);
56695        let a = _mm_set_ps(0., 1., 2., 3.);
56696        let r = _mm_mask_compress_ps(src, 0, a);
56697        assert_eq_m128(r, src);
56698        let r = _mm_mask_compress_ps(src, 0b00000101, a);
56699        let e = _mm_set_ps(200., 200., 1., 3.);
56700        assert_eq_m128(r, e);
56701    }
56702
56703    #[simd_test(enable = "avx512f,avx512vl")]
56704    unsafe fn test_mm_maskz_compress_ps() {
56705        let a = _mm_set_ps(0., 1., 2., 3.);
56706        let r = _mm_maskz_compress_ps(0, a);
56707        assert_eq_m128(r, _mm_setzero_ps());
56708        let r = _mm_maskz_compress_ps(0b00000101, a);
56709        let e = _mm_set_ps(0., 0., 1., 3.);
56710        assert_eq_m128(r, e);
56711    }
56712
56713    #[simd_test(enable = "avx512f")]
56714    unsafe fn test_mm512_mask_compressstoreu_epi32() {
56715        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56716        let mut r = [0_i32; 16];
56717        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56718        assert_eq!(&r, &[0_i32; 16]);
56719        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
56720        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
56721    }
56722
56723    #[simd_test(enable = "avx512f,avx512vl")]
56724    unsafe fn test_mm256_mask_compressstoreu_epi32() {
56725        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56726        let mut r = [0_i32; 8];
56727        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56728        assert_eq!(&r, &[0_i32; 8]);
56729        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b11001010, a);
56730        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56731    }
56732
56733    #[simd_test(enable = "avx512f,avx512vl")]
56734    unsafe fn test_mm_mask_compressstoreu_epi32() {
56735        let a = _mm_setr_epi32(1, 2, 3, 4);
56736        let mut r = [0_i32; 4];
56737        _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56738        assert_eq!(&r, &[0_i32; 4]);
56739        _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1011, a);
56740        assert_eq!(&r, &[1, 2, 4, 0]);
56741    }
56742
56743    #[simd_test(enable = "avx512f")]
56744    unsafe fn test_mm512_mask_compressstoreu_epi64() {
56745        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56746        let mut r = [0_i64; 8];
56747        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56748        assert_eq!(&r, &[0_i64; 8]);
56749        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b11001010, a);
56750        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56751    }
56752
56753    #[simd_test(enable = "avx512f,avx512vl")]
56754    unsafe fn test_mm256_mask_compressstoreu_epi64() {
56755        let a = _mm256_setr_epi64x(1, 2, 3, 4);
56756        let mut r = [0_i64; 4];
56757        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56758        assert_eq!(&r, &[0_i64; 4]);
56759        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b1011, a);
56760        assert_eq!(&r, &[1, 2, 4, 0]);
56761    }
56762
56763    #[simd_test(enable = "avx512f,avx512vl")]
56764    unsafe fn test_mm_mask_compressstoreu_epi64() {
56765        let a = _mm_setr_epi64x(1, 2);
56766        let mut r = [0_i64; 2];
56767        _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56768        assert_eq!(&r, &[0_i64; 2]);
56769        _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b10, a);
56770        assert_eq!(&r, &[2, 0]);
56771    }
56772
56773    #[simd_test(enable = "avx512f")]
56774    unsafe fn test_mm512_mask_compressstoreu_ps() {
56775        let a = _mm512_setr_ps(
56776            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
56777            13_f32, 14_f32, 15_f32, 16_f32,
56778        );
56779        let mut r = [0_f32; 16];
56780        _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56781        assert_eq!(&r, &[0_f32; 16]);
56782        _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
56783        assert_eq!(
56784            &r,
56785            &[
56786                2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
56787                0_f32, 0_f32, 0_f32, 0_f32, 0_f32
56788            ]
56789        );
56790    }
56791
56792    #[simd_test(enable = "avx512f,avx512vl")]
56793    unsafe fn test_mm256_mask_compressstoreu_ps() {
56794        let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
56795        let mut r = [0_f32; 8];
56796        _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56797        assert_eq!(&r, &[0_f32; 8]);
56798        _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b11001010, a);
56799        assert_eq!(
56800            &r,
56801            &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
56802        );
56803    }
56804
56805    #[simd_test(enable = "avx512f,avx512vl")]
56806    unsafe fn test_mm_mask_compressstoreu_ps() {
56807        let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
56808        let mut r = [0.; 4];
56809        _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56810        assert_eq!(&r, &[0.; 4]);
56811        _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1011, a);
56812        assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
56813    }
56814
56815    #[simd_test(enable = "avx512f")]
56816    unsafe fn test_mm512_mask_compressstoreu_pd() {
56817        let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
56818        let mut r = [0.; 8];
56819        _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56820        assert_eq!(&r, &[0.; 8]);
56821        _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b11001010, a);
56822        assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
56823    }
56824
56825    #[simd_test(enable = "avx512f,avx512vl")]
56826    unsafe fn test_mm256_mask_compressstoreu_pd() {
56827        let a = _mm256_setr_pd(1., 2., 3., 4.);
56828        let mut r = [0.; 4];
56829        _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56830        assert_eq!(&r, &[0.; 4]);
56831        _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b1011, a);
56832        assert_eq!(&r, &[1., 2., 4., 0.]);
56833    }
56834
56835    #[simd_test(enable = "avx512f,avx512vl")]
56836    unsafe fn test_mm_mask_compressstoreu_pd() {
56837        let a = _mm_setr_pd(1., 2.);
56838        let mut r = [0.; 2];
56839        _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56840        assert_eq!(&r, &[0.; 2]);
56841        _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b10, a);
56842        assert_eq!(&r, &[2., 0.]);
56843    }
56844
56845    #[simd_test(enable = "avx512f")]
56846    unsafe fn test_mm512_mask_expand_epi32() {
56847        let src = _mm512_set1_epi32(200);
56848        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56849        let r = _mm512_mask_expand_epi32(src, 0, a);
56850        assert_eq_m512i(r, src);
56851        let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
56852        let e = _mm512_set_epi32(
56853            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
56854        );
56855        assert_eq_m512i(r, e);
56856    }
56857
56858    #[simd_test(enable = "avx512f")]
56859    unsafe fn test_mm512_maskz_expand_epi32() {
56860        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56861        let r = _mm512_maskz_expand_epi32(0, a);
56862        assert_eq_m512i(r, _mm512_setzero_si512());
56863        let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
56864        let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
56865        assert_eq_m512i(r, e);
56866    }
56867
56868    #[simd_test(enable = "avx512f,avx512vl")]
56869    unsafe fn test_mm256_mask_expand_epi32() {
56870        let src = _mm256_set1_epi32(200);
56871        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56872        let r = _mm256_mask_expand_epi32(src, 0, a);
56873        assert_eq_m256i(r, src);
56874        let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
56875        let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
56876        assert_eq_m256i(r, e);
56877    }
56878
56879    #[simd_test(enable = "avx512f,avx512vl")]
56880    unsafe fn test_mm256_maskz_expand_epi32() {
56881        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56882        let r = _mm256_maskz_expand_epi32(0, a);
56883        assert_eq_m256i(r, _mm256_setzero_si256());
56884        let r = _mm256_maskz_expand_epi32(0b01010101, a);
56885        let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
56886        assert_eq_m256i(r, e);
56887    }
56888
56889    #[simd_test(enable = "avx512f,avx512vl")]
56890    unsafe fn test_mm_mask_expand_epi32() {
56891        let src = _mm_set1_epi32(200);
56892        let a = _mm_set_epi32(0, 1, 2, 3);
56893        let r = _mm_mask_expand_epi32(src, 0, a);
56894        assert_eq_m128i(r, src);
56895        let r = _mm_mask_expand_epi32(src, 0b00000101, a);
56896        let e = _mm_set_epi32(200, 2, 200, 3);
56897        assert_eq_m128i(r, e);
56898    }
56899
56900    #[simd_test(enable = "avx512f,avx512vl")]
56901    unsafe fn test_mm_maskz_expand_epi32() {
56902        let a = _mm_set_epi32(0, 1, 2, 3);
56903        let r = _mm_maskz_expand_epi32(0, a);
56904        assert_eq_m128i(r, _mm_setzero_si128());
56905        let r = _mm_maskz_expand_epi32(0b00000101, a);
56906        let e = _mm_set_epi32(0, 2, 0, 3);
56907        assert_eq_m128i(r, e);
56908    }
56909
56910    #[simd_test(enable = "avx512f")]
56911    unsafe fn test_mm512_mask_expand_ps() {
56912        let src = _mm512_set1_ps(200.);
56913        let a = _mm512_set_ps(
56914            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56915        );
56916        let r = _mm512_mask_expand_ps(src, 0, a);
56917        assert_eq_m512(r, src);
56918        let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
56919        let e = _mm512_set_ps(
56920            200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
56921        );
56922        assert_eq_m512(r, e);
56923    }
56924
56925    #[simd_test(enable = "avx512f")]
56926    unsafe fn test_mm512_maskz_expand_ps() {
56927        let a = _mm512_set_ps(
56928            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56929        );
56930        let r = _mm512_maskz_expand_ps(0, a);
56931        assert_eq_m512(r, _mm512_setzero_ps());
56932        let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
56933        let e = _mm512_set_ps(
56934            0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
56935        );
56936        assert_eq_m512(r, e);
56937    }
56938
56939    #[simd_test(enable = "avx512f,avx512vl")]
56940    unsafe fn test_mm256_mask_expand_ps() {
56941        let src = _mm256_set1_ps(200.);
56942        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56943        let r = _mm256_mask_expand_ps(src, 0, a);
56944        assert_eq_m256(r, src);
56945        let r = _mm256_mask_expand_ps(src, 0b01010101, a);
56946        let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
56947        assert_eq_m256(r, e);
56948    }
56949
56950    #[simd_test(enable = "avx512f,avx512vl")]
56951    unsafe fn test_mm256_maskz_expand_ps() {
56952        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56953        let r = _mm256_maskz_expand_ps(0, a);
56954        assert_eq_m256(r, _mm256_setzero_ps());
56955        let r = _mm256_maskz_expand_ps(0b01010101, a);
56956        let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
56957        assert_eq_m256(r, e);
56958    }
56959
56960    #[simd_test(enable = "avx512f,avx512vl")]
56961    unsafe fn test_mm_mask_expand_ps() {
56962        let src = _mm_set1_ps(200.);
56963        let a = _mm_set_ps(0., 1., 2., 3.);
56964        let r = _mm_mask_expand_ps(src, 0, a);
56965        assert_eq_m128(r, src);
56966        let r = _mm_mask_expand_ps(src, 0b00000101, a);
56967        let e = _mm_set_ps(200., 2., 200., 3.);
56968        assert_eq_m128(r, e);
56969    }
56970
56971    #[simd_test(enable = "avx512f,avx512vl")]
56972    unsafe fn test_mm_maskz_expand_ps() {
56973        let a = _mm_set_ps(0., 1., 2., 3.);
56974        let r = _mm_maskz_expand_ps(0, a);
56975        assert_eq_m128(r, _mm_setzero_ps());
56976        let r = _mm_maskz_expand_ps(0b00000101, a);
56977        let e = _mm_set_ps(0., 2., 0., 3.);
56978        assert_eq_m128(r, e);
56979    }
56980
56981    #[simd_test(enable = "avx512f")]
56982    unsafe fn test_mm512_loadu_epi32() {
56983        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
56984        let p = a.as_ptr();
56985        let r = _mm512_loadu_epi32(black_box(p));
56986        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
56987        assert_eq_m512i(r, e);
56988    }
56989
56990    #[simd_test(enable = "avx512f,avx512vl")]
56991    unsafe fn test_mm256_loadu_epi32() {
56992        let a = &[4, 3, 2, 5, 8, 9, 64, 50];
56993        let p = a.as_ptr();
56994        let r = _mm256_loadu_epi32(black_box(p));
56995        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
56996        assert_eq_m256i(r, e);
56997    }
56998
56999    #[simd_test(enable = "avx512f,avx512vl")]
57000    unsafe fn test_mm_loadu_epi32() {
57001        let a = &[4, 3, 2, 5];
57002        let p = a.as_ptr();
57003        let r = _mm_loadu_epi32(black_box(p));
57004        let e = _mm_setr_epi32(4, 3, 2, 5);
57005        assert_eq_m128i(r, e);
57006    }
57007
57008    #[simd_test(enable = "avx512f")]
57009    unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
57010        let a = _mm512_set1_epi32(9);
57011        let mut r = _mm256_undefined_si256();
57012        _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57013        let e = _mm256_set1_epi16(9);
57014        assert_eq_m256i(r, e);
57015    }
57016
57017    #[simd_test(enable = "avx512f,avx512vl")]
57018    unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
57019        let a = _mm256_set1_epi32(9);
57020        let mut r = _mm_undefined_si128();
57021        _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57022        let e = _mm_set1_epi16(9);
57023        assert_eq_m128i(r, e);
57024    }
57025
57026    #[simd_test(enable = "avx512f,avx512vl")]
57027    unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
57028        let a = _mm_set1_epi32(9);
57029        let mut r = _mm_set1_epi8(0);
57030        _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57031        let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
57032        assert_eq_m128i(r, e);
57033    }
57034
57035    #[simd_test(enable = "avx512f")]
57036    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
57037        let a = _mm512_set1_epi32(i32::MAX);
57038        let mut r = _mm256_undefined_si256();
57039        _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57040        let e = _mm256_set1_epi16(i16::MAX);
57041        assert_eq_m256i(r, e);
57042    }
57043
57044    #[simd_test(enable = "avx512f,avx512vl")]
57045    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
57046        let a = _mm256_set1_epi32(i32::MAX);
57047        let mut r = _mm_undefined_si128();
57048        _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57049        let e = _mm_set1_epi16(i16::MAX);
57050        assert_eq_m128i(r, e);
57051    }
57052
57053    #[simd_test(enable = "avx512f,avx512vl")]
57054    unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
57055        let a = _mm_set1_epi32(i32::MAX);
57056        let mut r = _mm_set1_epi8(0);
57057        _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57058        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
57059        assert_eq_m128i(r, e);
57060    }
57061
57062    #[simd_test(enable = "avx512f")]
57063    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
57064        let a = _mm512_set1_epi32(i32::MAX);
57065        let mut r = _mm256_undefined_si256();
57066        _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57067        let e = _mm256_set1_epi16(u16::MAX as i16);
57068        assert_eq_m256i(r, e);
57069    }
57070
57071    #[simd_test(enable = "avx512f,avx512vl")]
57072    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
57073        let a = _mm256_set1_epi32(i32::MAX);
57074        let mut r = _mm_undefined_si128();
57075        _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57076        let e = _mm_set1_epi16(u16::MAX as i16);
57077        assert_eq_m128i(r, e);
57078    }
57079
57080    #[simd_test(enable = "avx512f,avx512vl")]
57081    unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
57082        let a = _mm_set1_epi32(i32::MAX);
57083        let mut r = _mm_set1_epi8(0);
57084        _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57085        let e = _mm_set_epi16(
57086            0,
57087            0,
57088            0,
57089            0,
57090            u16::MAX as i16,
57091            u16::MAX as i16,
57092            u16::MAX as i16,
57093            u16::MAX as i16,
57094        );
57095        assert_eq_m128i(r, e);
57096    }
57097
57098    #[simd_test(enable = "avx512f")]
57099    unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
57100        let a = _mm512_set1_epi32(9);
57101        let mut r = _mm_undefined_si128();
57102        _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57103        let e = _mm_set1_epi8(9);
57104        assert_eq_m128i(r, e);
57105    }
57106
57107    #[simd_test(enable = "avx512f,avx512vl")]
57108    unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
57109        let a = _mm256_set1_epi32(9);
57110        let mut r = _mm_set1_epi8(0);
57111        _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57112        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
57113        assert_eq_m128i(r, e);
57114    }
57115
57116    #[simd_test(enable = "avx512f,avx512vl")]
57117    unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
57118        let a = _mm_set1_epi32(9);
57119        let mut r = _mm_set1_epi8(0);
57120        _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57121        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
57122        assert_eq_m128i(r, e);
57123    }
57124
57125    #[simd_test(enable = "avx512f")]
57126    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
57127        let a = _mm512_set1_epi32(i32::MAX);
57128        let mut r = _mm_undefined_si128();
57129        _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57130        let e = _mm_set1_epi8(i8::MAX);
57131        assert_eq_m128i(r, e);
57132    }
57133
57134    #[simd_test(enable = "avx512f,avx512vl")]
57135    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
57136        let a = _mm256_set1_epi32(i32::MAX);
57137        let mut r = _mm_set1_epi8(0);
57138        _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57139        #[rustfmt::skip]
57140        let e = _mm_set_epi8(
57141            0, 0, 0, 0,
57142            0, 0, 0, 0,
57143            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57144            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57145        );
57146        assert_eq_m128i(r, e);
57147    }
57148
57149    #[simd_test(enable = "avx512f,avx512vl")]
57150    unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
57151        let a = _mm_set1_epi32(i32::MAX);
57152        let mut r = _mm_set1_epi8(0);
57153        _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57154        #[rustfmt::skip]
57155        let e = _mm_set_epi8(
57156            0, 0, 0, 0,
57157            0, 0, 0, 0,
57158            0, 0, 0, 0,
57159            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57160        );
57161        assert_eq_m128i(r, e);
57162    }
57163
57164    #[simd_test(enable = "avx512f")]
57165    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
57166        let a = _mm512_set1_epi32(i32::MAX);
57167        let mut r = _mm_undefined_si128();
57168        _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57169        let e = _mm_set1_epi8(u8::MAX as i8);
57170        assert_eq_m128i(r, e);
57171    }
57172
57173    #[simd_test(enable = "avx512f,avx512vl")]
57174    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
57175        let a = _mm256_set1_epi32(i32::MAX);
57176        let mut r = _mm_set1_epi8(0);
57177        _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57178        #[rustfmt::skip]
57179        let e = _mm_set_epi8(
57180            0, 0, 0, 0,
57181            0, 0, 0, 0,
57182            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57183            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57184        );
57185        assert_eq_m128i(r, e);
57186    }
57187
57188    #[simd_test(enable = "avx512f,avx512vl")]
57189    unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
57190        let a = _mm_set1_epi32(i32::MAX);
57191        let mut r = _mm_set1_epi8(0);
57192        _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57193        #[rustfmt::skip]
57194        let e = _mm_set_epi8(
57195            0, 0, 0, 0,
57196            0, 0, 0, 0,
57197            0, 0, 0, 0,
57198            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57199        );
57200        assert_eq_m128i(r, e);
57201    }
57202
57203    #[simd_test(enable = "avx512f")]
57204    unsafe fn test_mm512_storeu_epi32() {
57205        let a = _mm512_set1_epi32(9);
57206        let mut r = _mm512_undefined_epi32();
57207        _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57208        assert_eq_m512i(r, a);
57209    }
57210
57211    #[simd_test(enable = "avx512f,avx512vl")]
57212    unsafe fn test_mm256_storeu_epi32() {
57213        let a = _mm256_set1_epi32(9);
57214        let mut r = _mm256_undefined_si256();
57215        _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57216        assert_eq_m256i(r, a);
57217    }
57218
57219    #[simd_test(enable = "avx512f,avx512vl")]
57220    unsafe fn test_mm_storeu_epi32() {
57221        let a = _mm_set1_epi32(9);
57222        let mut r = _mm_undefined_si128();
57223        _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57224        assert_eq_m128i(r, a);
57225    }
57226
57227    #[simd_test(enable = "avx512f")]
57228    unsafe fn test_mm512_loadu_si512() {
57229        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57230        let p = a.as_ptr();
57231        let r = _mm512_loadu_si512(black_box(p));
57232        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57233        assert_eq_m512i(r, e);
57234    }
57235
57236    #[simd_test(enable = "avx512f")]
57237    unsafe fn test_mm512_storeu_si512() {
57238        let a = _mm512_set1_epi32(9);
57239        let mut r = _mm512_undefined_epi32();
57240        _mm512_storeu_si512(&mut r as *mut _, a);
57241        assert_eq_m512i(r, a);
57242    }
57243
57244    #[simd_test(enable = "avx512f")]
57245    unsafe fn test_mm512_load_si512() {
57246        #[repr(align(64))]
57247        struct Align {
57248            data: [i32; 16], // 64 bytes
57249        }
57250        let a = Align {
57251            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57252        };
57253        let p = (a.data).as_ptr();
57254        let r = _mm512_load_si512(black_box(p));
57255        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57256        assert_eq_m512i(r, e);
57257    }
57258
57259    #[simd_test(enable = "avx512f")]
57260    unsafe fn test_mm512_store_si512() {
57261        let a = _mm512_set1_epi32(9);
57262        let mut r = _mm512_undefined_epi32();
57263        _mm512_store_si512(&mut r as *mut _, a);
57264        assert_eq_m512i(r, a);
57265    }
57266
57267    #[simd_test(enable = "avx512f")]
57268    unsafe fn test_mm512_load_epi32() {
57269        #[repr(align(64))]
57270        struct Align {
57271            data: [i32; 16], // 64 bytes
57272        }
57273        let a = Align {
57274            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57275        };
57276        let p = (a.data).as_ptr();
57277        let r = _mm512_load_epi32(black_box(p));
57278        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57279        assert_eq_m512i(r, e);
57280    }
57281
57282    #[simd_test(enable = "avx512f,avx512vl")]
57283    unsafe fn test_mm256_load_epi32() {
57284        #[repr(align(64))]
57285        struct Align {
57286            data: [i32; 8],
57287        }
57288        let a = Align {
57289            data: [4, 3, 2, 5, 8, 9, 64, 50],
57290        };
57291        let p = (a.data).as_ptr();
57292        let r = _mm256_load_epi32(black_box(p));
57293        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57294        assert_eq_m256i(r, e);
57295    }
57296
57297    #[simd_test(enable = "avx512f,avx512vl")]
57298    unsafe fn test_mm_load_epi32() {
57299        #[repr(align(64))]
57300        struct Align {
57301            data: [i32; 4],
57302        }
57303        let a = Align { data: [4, 3, 2, 5] };
57304        let p = (a.data).as_ptr();
57305        let r = _mm_load_epi32(black_box(p));
57306        let e = _mm_setr_epi32(4, 3, 2, 5);
57307        assert_eq_m128i(r, e);
57308    }
57309
57310    #[simd_test(enable = "avx512f")]
57311    unsafe fn test_mm512_store_epi32() {
57312        let a = _mm512_set1_epi32(9);
57313        let mut r = _mm512_undefined_epi32();
57314        _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
57315        assert_eq_m512i(r, a);
57316    }
57317
57318    #[simd_test(enable = "avx512f,avx512vl")]
57319    unsafe fn test_mm256_store_epi32() {
57320        let a = _mm256_set1_epi32(9);
57321        let mut r = _mm256_undefined_si256();
57322        _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
57323        assert_eq_m256i(r, a);
57324    }
57325
57326    #[simd_test(enable = "avx512f,avx512vl")]
57327    unsafe fn test_mm_store_epi32() {
57328        let a = _mm_set1_epi32(9);
57329        let mut r = _mm_undefined_si128();
57330        _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
57331        assert_eq_m128i(r, a);
57332    }
57333
57334    #[simd_test(enable = "avx512f")]
57335    unsafe fn test_mm512_load_ps() {
57336        #[repr(align(64))]
57337        struct Align {
57338            data: [f32; 16], // 64 bytes
57339        }
57340        let a = Align {
57341            data: [
57342                4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57343            ],
57344        };
57345        let p = (a.data).as_ptr();
57346        let r = _mm512_load_ps(black_box(p));
57347        let e = _mm512_setr_ps(
57348            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57349        );
57350        assert_eq_m512(r, e);
57351    }
57352
57353    #[simd_test(enable = "avx512f")]
57354    unsafe fn test_mm512_store_ps() {
57355        let a = _mm512_set1_ps(9.);
57356        let mut r = _mm512_undefined_ps();
57357        _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
57358        assert_eq_m512(r, a);
57359    }
57360
57361    #[simd_test(enable = "avx512f")]
57362    unsafe fn test_mm512_mask_set1_epi32() {
57363        let src = _mm512_set1_epi32(2);
57364        let a: i32 = 11;
57365        let r = _mm512_mask_set1_epi32(src, 0, a);
57366        assert_eq_m512i(r, src);
57367        let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
57368        let e = _mm512_set1_epi32(11);
57369        assert_eq_m512i(r, e);
57370    }
57371
57372    #[simd_test(enable = "avx512f")]
57373    unsafe fn test_mm512_maskz_set1_epi32() {
57374        let a: i32 = 11;
57375        let r = _mm512_maskz_set1_epi32(0, a);
57376        assert_eq_m512i(r, _mm512_setzero_si512());
57377        let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
57378        let e = _mm512_set1_epi32(11);
57379        assert_eq_m512i(r, e);
57380    }
57381
57382    #[simd_test(enable = "avx512f,avx512vl")]
57383    unsafe fn test_mm256_mask_set1_epi32() {
57384        let src = _mm256_set1_epi32(2);
57385        let a: i32 = 11;
57386        let r = _mm256_mask_set1_epi32(src, 0, a);
57387        assert_eq_m256i(r, src);
57388        let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
57389        let e = _mm256_set1_epi32(11);
57390        assert_eq_m256i(r, e);
57391    }
57392
57393    #[simd_test(enable = "avx512f")]
57394    unsafe fn test_mm256_maskz_set1_epi32() {
57395        let a: i32 = 11;
57396        let r = _mm256_maskz_set1_epi32(0, a);
57397        assert_eq_m256i(r, _mm256_setzero_si256());
57398        let r = _mm256_maskz_set1_epi32(0b11111111, a);
57399        let e = _mm256_set1_epi32(11);
57400        assert_eq_m256i(r, e);
57401    }
57402
57403    #[simd_test(enable = "avx512f,avx512vl")]
57404    unsafe fn test_mm_mask_set1_epi32() {
57405        let src = _mm_set1_epi32(2);
57406        let a: i32 = 11;
57407        let r = _mm_mask_set1_epi32(src, 0, a);
57408        assert_eq_m128i(r, src);
57409        let r = _mm_mask_set1_epi32(src, 0b00001111, a);
57410        let e = _mm_set1_epi32(11);
57411        assert_eq_m128i(r, e);
57412    }
57413
57414    #[simd_test(enable = "avx512f")]
57415    unsafe fn test_mm_maskz_set1_epi32() {
57416        let a: i32 = 11;
57417        let r = _mm_maskz_set1_epi32(0, a);
57418        assert_eq_m128i(r, _mm_setzero_si128());
57419        let r = _mm_maskz_set1_epi32(0b00001111, a);
57420        let e = _mm_set1_epi32(11);
57421        assert_eq_m128i(r, e);
57422    }
57423
57424    #[simd_test(enable = "avx512f")]
57425    unsafe fn test_mm_mask_move_ss() {
57426        let src = _mm_set_ps(10., 11., 100., 110.);
57427        let a = _mm_set_ps(1., 2., 10., 20.);
57428        let b = _mm_set_ps(3., 4., 30., 40.);
57429        let r = _mm_mask_move_ss(src, 0, a, b);
57430        let e = _mm_set_ps(1., 2., 10., 110.);
57431        assert_eq_m128(r, e);
57432        let r = _mm_mask_move_ss(src, 0b11111111, a, b);
57433        let e = _mm_set_ps(1., 2., 10., 40.);
57434        assert_eq_m128(r, e);
57435    }
57436
57437    #[simd_test(enable = "avx512f")]
57438    unsafe fn test_mm_maskz_move_ss() {
57439        let a = _mm_set_ps(1., 2., 10., 20.);
57440        let b = _mm_set_ps(3., 4., 30., 40.);
57441        let r = _mm_maskz_move_ss(0, a, b);
57442        let e = _mm_set_ps(1., 2., 10., 0.);
57443        assert_eq_m128(r, e);
57444        let r = _mm_maskz_move_ss(0b11111111, a, b);
57445        let e = _mm_set_ps(1., 2., 10., 40.);
57446        assert_eq_m128(r, e);
57447    }
57448
57449    #[simd_test(enable = "avx512f")]
57450    unsafe fn test_mm_mask_move_sd() {
57451        let src = _mm_set_pd(10., 11.);
57452        let a = _mm_set_pd(1., 2.);
57453        let b = _mm_set_pd(3., 4.);
57454        let r = _mm_mask_move_sd(src, 0, a, b);
57455        let e = _mm_set_pd(1., 11.);
57456        assert_eq_m128d(r, e);
57457        let r = _mm_mask_move_sd(src, 0b11111111, a, b);
57458        let e = _mm_set_pd(1., 4.);
57459        assert_eq_m128d(r, e);
57460    }
57461
57462    #[simd_test(enable = "avx512f")]
57463    unsafe fn test_mm_maskz_move_sd() {
57464        let a = _mm_set_pd(1., 2.);
57465        let b = _mm_set_pd(3., 4.);
57466        let r = _mm_maskz_move_sd(0, a, b);
57467        let e = _mm_set_pd(1., 0.);
57468        assert_eq_m128d(r, e);
57469        let r = _mm_maskz_move_sd(0b11111111, a, b);
57470        let e = _mm_set_pd(1., 4.);
57471        assert_eq_m128d(r, e);
57472    }
57473
57474    #[simd_test(enable = "avx512f")]
57475    unsafe fn test_mm_mask_add_ss() {
57476        let src = _mm_set_ps(10., 11., 100., 110.);
57477        let a = _mm_set_ps(1., 2., 10., 20.);
57478        let b = _mm_set_ps(3., 4., 30., 40.);
57479        let r = _mm_mask_add_ss(src, 0, a, b);
57480        let e = _mm_set_ps(1., 2., 10., 110.);
57481        assert_eq_m128(r, e);
57482        let r = _mm_mask_add_ss(src, 0b11111111, a, b);
57483        let e = _mm_set_ps(1., 2., 10., 60.);
57484        assert_eq_m128(r, e);
57485    }
57486
57487    #[simd_test(enable = "avx512f")]
57488    unsafe fn test_mm_maskz_add_ss() {
57489        let a = _mm_set_ps(1., 2., 10., 20.);
57490        let b = _mm_set_ps(3., 4., 30., 40.);
57491        let r = _mm_maskz_add_ss(0, a, b);
57492        let e = _mm_set_ps(1., 2., 10., 0.);
57493        assert_eq_m128(r, e);
57494        let r = _mm_maskz_add_ss(0b11111111, a, b);
57495        let e = _mm_set_ps(1., 2., 10., 60.);
57496        assert_eq_m128(r, e);
57497    }
57498
57499    #[simd_test(enable = "avx512f")]
57500    unsafe fn test_mm_mask_add_sd() {
57501        let src = _mm_set_pd(10., 11.);
57502        let a = _mm_set_pd(1., 2.);
57503        let b = _mm_set_pd(3., 4.);
57504        let r = _mm_mask_add_sd(src, 0, a, b);
57505        let e = _mm_set_pd(1., 11.);
57506        assert_eq_m128d(r, e);
57507        let r = _mm_mask_add_sd(src, 0b11111111, a, b);
57508        let e = _mm_set_pd(1., 6.);
57509        assert_eq_m128d(r, e);
57510    }
57511
57512    #[simd_test(enable = "avx512f")]
57513    unsafe fn test_mm_maskz_add_sd() {
57514        let a = _mm_set_pd(1., 2.);
57515        let b = _mm_set_pd(3., 4.);
57516        let r = _mm_maskz_add_sd(0, a, b);
57517        let e = _mm_set_pd(1., 0.);
57518        assert_eq_m128d(r, e);
57519        let r = _mm_maskz_add_sd(0b11111111, a, b);
57520        let e = _mm_set_pd(1., 6.);
57521        assert_eq_m128d(r, e);
57522    }
57523
57524    #[simd_test(enable = "avx512f")]
57525    unsafe fn test_mm_mask_sub_ss() {
57526        let src = _mm_set_ps(10., 11., 100., 110.);
57527        let a = _mm_set_ps(1., 2., 10., 20.);
57528        let b = _mm_set_ps(3., 4., 30., 40.);
57529        let r = _mm_mask_sub_ss(src, 0, a, b);
57530        let e = _mm_set_ps(1., 2., 10., 110.);
57531        assert_eq_m128(r, e);
57532        let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
57533        let e = _mm_set_ps(1., 2., 10., -20.);
57534        assert_eq_m128(r, e);
57535    }
57536
57537    #[simd_test(enable = "avx512f")]
57538    unsafe fn test_mm_maskz_sub_ss() {
57539        let a = _mm_set_ps(1., 2., 10., 20.);
57540        let b = _mm_set_ps(3., 4., 30., 40.);
57541        let r = _mm_maskz_sub_ss(0, a, b);
57542        let e = _mm_set_ps(1., 2., 10., 0.);
57543        assert_eq_m128(r, e);
57544        let r = _mm_maskz_sub_ss(0b11111111, a, b);
57545        let e = _mm_set_ps(1., 2., 10., -20.);
57546        assert_eq_m128(r, e);
57547    }
57548
57549    #[simd_test(enable = "avx512f")]
57550    unsafe fn test_mm_mask_sub_sd() {
57551        let src = _mm_set_pd(10., 11.);
57552        let a = _mm_set_pd(1., 2.);
57553        let b = _mm_set_pd(3., 4.);
57554        let r = _mm_mask_sub_sd(src, 0, a, b);
57555        let e = _mm_set_pd(1., 11.);
57556        assert_eq_m128d(r, e);
57557        let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
57558        let e = _mm_set_pd(1., -2.);
57559        assert_eq_m128d(r, e);
57560    }
57561
57562    #[simd_test(enable = "avx512f")]
57563    unsafe fn test_mm_maskz_sub_sd() {
57564        let a = _mm_set_pd(1., 2.);
57565        let b = _mm_set_pd(3., 4.);
57566        let r = _mm_maskz_sub_sd(0, a, b);
57567        let e = _mm_set_pd(1., 0.);
57568        assert_eq_m128d(r, e);
57569        let r = _mm_maskz_sub_sd(0b11111111, a, b);
57570        let e = _mm_set_pd(1., -2.);
57571        assert_eq_m128d(r, e);
57572    }
57573
57574    #[simd_test(enable = "avx512f")]
57575    unsafe fn test_mm_mask_mul_ss() {
57576        let src = _mm_set_ps(10., 11., 100., 110.);
57577        let a = _mm_set_ps(1., 2., 10., 20.);
57578        let b = _mm_set_ps(3., 4., 30., 40.);
57579        let r = _mm_mask_mul_ss(src, 0, a, b);
57580        let e = _mm_set_ps(1., 2., 10., 110.);
57581        assert_eq_m128(r, e);
57582        let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
57583        let e = _mm_set_ps(1., 2., 10., 800.);
57584        assert_eq_m128(r, e);
57585    }
57586
57587    #[simd_test(enable = "avx512f")]
57588    unsafe fn test_mm_maskz_mul_ss() {
57589        let a = _mm_set_ps(1., 2., 10., 20.);
57590        let b = _mm_set_ps(3., 4., 30., 40.);
57591        let r = _mm_maskz_mul_ss(0, a, b);
57592        let e = _mm_set_ps(1., 2., 10., 0.);
57593        assert_eq_m128(r, e);
57594        let r = _mm_maskz_mul_ss(0b11111111, a, b);
57595        let e = _mm_set_ps(1., 2., 10., 800.);
57596        assert_eq_m128(r, e);
57597    }
57598
57599    #[simd_test(enable = "avx512f")]
57600    unsafe fn test_mm_mask_mul_sd() {
57601        let src = _mm_set_pd(10., 11.);
57602        let a = _mm_set_pd(1., 2.);
57603        let b = _mm_set_pd(3., 4.);
57604        let r = _mm_mask_mul_sd(src, 0, a, b);
57605        let e = _mm_set_pd(1., 11.);
57606        assert_eq_m128d(r, e);
57607        let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
57608        let e = _mm_set_pd(1., 8.);
57609        assert_eq_m128d(r, e);
57610    }
57611
57612    #[simd_test(enable = "avx512f")]
57613    unsafe fn test_mm_maskz_mul_sd() {
57614        let a = _mm_set_pd(1., 2.);
57615        let b = _mm_set_pd(3., 4.);
57616        let r = _mm_maskz_mul_sd(0, a, b);
57617        let e = _mm_set_pd(1., 0.);
57618        assert_eq_m128d(r, e);
57619        let r = _mm_maskz_mul_sd(0b11111111, a, b);
57620        let e = _mm_set_pd(1., 8.);
57621        assert_eq_m128d(r, e);
57622    }
57623
57624    #[simd_test(enable = "avx512f")]
57625    unsafe fn test_mm_mask_div_ss() {
57626        let src = _mm_set_ps(10., 11., 100., 110.);
57627        let a = _mm_set_ps(1., 2., 10., 20.);
57628        let b = _mm_set_ps(3., 4., 30., 40.);
57629        let r = _mm_mask_div_ss(src, 0, a, b);
57630        let e = _mm_set_ps(1., 2., 10., 110.);
57631        assert_eq_m128(r, e);
57632        let r = _mm_mask_div_ss(src, 0b11111111, a, b);
57633        let e = _mm_set_ps(1., 2., 10., 0.5);
57634        assert_eq_m128(r, e);
57635    }
57636
57637    #[simd_test(enable = "avx512f")]
57638    unsafe fn test_mm_maskz_div_ss() {
57639        let a = _mm_set_ps(1., 2., 10., 20.);
57640        let b = _mm_set_ps(3., 4., 30., 40.);
57641        let r = _mm_maskz_div_ss(0, a, b);
57642        let e = _mm_set_ps(1., 2., 10., 0.);
57643        assert_eq_m128(r, e);
57644        let r = _mm_maskz_div_ss(0b11111111, a, b);
57645        let e = _mm_set_ps(1., 2., 10., 0.5);
57646        assert_eq_m128(r, e);
57647    }
57648
57649    #[simd_test(enable = "avx512f")]
57650    unsafe fn test_mm_mask_div_sd() {
57651        let src = _mm_set_pd(10., 11.);
57652        let a = _mm_set_pd(1., 2.);
57653        let b = _mm_set_pd(3., 4.);
57654        let r = _mm_mask_div_sd(src, 0, a, b);
57655        let e = _mm_set_pd(1., 11.);
57656        assert_eq_m128d(r, e);
57657        let r = _mm_mask_div_sd(src, 0b11111111, a, b);
57658        let e = _mm_set_pd(1., 0.5);
57659        assert_eq_m128d(r, e);
57660    }
57661
57662    #[simd_test(enable = "avx512f")]
57663    unsafe fn test_mm_maskz_div_sd() {
57664        let a = _mm_set_pd(1., 2.);
57665        let b = _mm_set_pd(3., 4.);
57666        let r = _mm_maskz_div_sd(0, a, b);
57667        let e = _mm_set_pd(1., 0.);
57668        assert_eq_m128d(r, e);
57669        let r = _mm_maskz_div_sd(0b11111111, a, b);
57670        let e = _mm_set_pd(1., 0.5);
57671        assert_eq_m128d(r, e);
57672    }
57673
57674    #[simd_test(enable = "avx512f")]
57675    unsafe fn test_mm_mask_max_ss() {
57676        let a = _mm_set_ps(0., 1., 2., 3.);
57677        let b = _mm_set_ps(4., 5., 6., 7.);
57678        let r = _mm_mask_max_ss(a, 0, a, b);
57679        let e = _mm_set_ps(0., 1., 2., 3.);
57680        assert_eq_m128(r, e);
57681        let r = _mm_mask_max_ss(a, 0b11111111, a, b);
57682        let e = _mm_set_ps(0., 1., 2., 7.);
57683        assert_eq_m128(r, e);
57684    }
57685
57686    #[simd_test(enable = "avx512f")]
57687    unsafe fn test_mm_maskz_max_ss() {
57688        let a = _mm_set_ps(0., 1., 2., 3.);
57689        let b = _mm_set_ps(4., 5., 6., 7.);
57690        let r = _mm_maskz_max_ss(0, a, b);
57691        let e = _mm_set_ps(0., 1., 2., 0.);
57692        assert_eq_m128(r, e);
57693        let r = _mm_maskz_max_ss(0b11111111, a, b);
57694        let e = _mm_set_ps(0., 1., 2., 7.);
57695        assert_eq_m128(r, e);
57696    }
57697
57698    #[simd_test(enable = "avx512f")]
57699    unsafe fn test_mm_mask_max_sd() {
57700        let a = _mm_set_pd(0., 1.);
57701        let b = _mm_set_pd(2., 3.);
57702        let r = _mm_mask_max_sd(a, 0, a, b);
57703        let e = _mm_set_pd(0., 1.);
57704        assert_eq_m128d(r, e);
57705        let r = _mm_mask_max_sd(a, 0b11111111, a, b);
57706        let e = _mm_set_pd(0., 3.);
57707        assert_eq_m128d(r, e);
57708    }
57709
57710    #[simd_test(enable = "avx512f")]
57711    unsafe fn test_mm_maskz_max_sd() {
57712        let a = _mm_set_pd(0., 1.);
57713        let b = _mm_set_pd(2., 3.);
57714        let r = _mm_maskz_max_sd(0, a, b);
57715        let e = _mm_set_pd(0., 0.);
57716        assert_eq_m128d(r, e);
57717        let r = _mm_maskz_max_sd(0b11111111, a, b);
57718        let e = _mm_set_pd(0., 3.);
57719        assert_eq_m128d(r, e);
57720    }
57721
57722    #[simd_test(enable = "avx512f")]
57723    unsafe fn test_mm_mask_min_ss() {
57724        let a = _mm_set_ps(0., 1., 2., 3.);
57725        let b = _mm_set_ps(4., 5., 6., 7.);
57726        let r = _mm_mask_min_ss(a, 0, a, b);
57727        let e = _mm_set_ps(0., 1., 2., 3.);
57728        assert_eq_m128(r, e);
57729        let r = _mm_mask_min_ss(a, 0b11111111, a, b);
57730        let e = _mm_set_ps(0., 1., 2., 3.);
57731        assert_eq_m128(r, e);
57732    }
57733
57734    #[simd_test(enable = "avx512f")]
57735    unsafe fn test_mm_maskz_min_ss() {
57736        let a = _mm_set_ps(0., 1., 2., 3.);
57737        let b = _mm_set_ps(4., 5., 6., 7.);
57738        let r = _mm_maskz_min_ss(0, a, b);
57739        let e = _mm_set_ps(0., 1., 2., 0.);
57740        assert_eq_m128(r, e);
57741        let r = _mm_maskz_min_ss(0b11111111, a, b);
57742        let e = _mm_set_ps(0., 1., 2., 3.);
57743        assert_eq_m128(r, e);
57744    }
57745
57746    #[simd_test(enable = "avx512f")]
57747    unsafe fn test_mm_mask_min_sd() {
57748        let a = _mm_set_pd(0., 1.);
57749        let b = _mm_set_pd(2., 3.);
57750        let r = _mm_mask_min_sd(a, 0, a, b);
57751        let e = _mm_set_pd(0., 1.);
57752        assert_eq_m128d(r, e);
57753        let r = _mm_mask_min_sd(a, 0b11111111, a, b);
57754        let e = _mm_set_pd(0., 1.);
57755        assert_eq_m128d(r, e);
57756    }
57757
57758    #[simd_test(enable = "avx512f")]
57759    unsafe fn test_mm_maskz_min_sd() {
57760        let a = _mm_set_pd(0., 1.);
57761        let b = _mm_set_pd(2., 3.);
57762        let r = _mm_maskz_min_sd(0, a, b);
57763        let e = _mm_set_pd(0., 0.);
57764        assert_eq_m128d(r, e);
57765        let r = _mm_maskz_min_sd(0b11111111, a, b);
57766        let e = _mm_set_pd(0., 1.);
57767        assert_eq_m128d(r, e);
57768    }
57769
57770    #[simd_test(enable = "avx512f")]
57771    unsafe fn test_mm_mask_sqrt_ss() {
57772        let src = _mm_set_ps(10., 11., 100., 110.);
57773        let a = _mm_set_ps(1., 2., 10., 20.);
57774        let b = _mm_set_ps(3., 4., 30., 4.);
57775        let r = _mm_mask_sqrt_ss(src, 0, a, b);
57776        let e = _mm_set_ps(1., 2., 10., 110.);
57777        assert_eq_m128(r, e);
57778        let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
57779        let e = _mm_set_ps(1., 2., 10., 2.);
57780        assert_eq_m128(r, e);
57781    }
57782
57783    #[simd_test(enable = "avx512f")]
57784    unsafe fn test_mm_maskz_sqrt_ss() {
57785        let a = _mm_set_ps(1., 2., 10., 20.);
57786        let b = _mm_set_ps(3., 4., 30., 4.);
57787        let r = _mm_maskz_sqrt_ss(0, a, b);
57788        let e = _mm_set_ps(1., 2., 10., 0.);
57789        assert_eq_m128(r, e);
57790        let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
57791        let e = _mm_set_ps(1., 2., 10., 2.);
57792        assert_eq_m128(r, e);
57793    }
57794
57795    #[simd_test(enable = "avx512f")]
57796    unsafe fn test_mm_mask_sqrt_sd() {
57797        let src = _mm_set_pd(10., 11.);
57798        let a = _mm_set_pd(1., 2.);
57799        let b = _mm_set_pd(3., 4.);
57800        let r = _mm_mask_sqrt_sd(src, 0, a, b);
57801        let e = _mm_set_pd(1., 11.);
57802        assert_eq_m128d(r, e);
57803        let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
57804        let e = _mm_set_pd(1., 2.);
57805        assert_eq_m128d(r, e);
57806    }
57807
57808    #[simd_test(enable = "avx512f")]
57809    unsafe fn test_mm_maskz_sqrt_sd() {
57810        let a = _mm_set_pd(1., 2.);
57811        let b = _mm_set_pd(3., 4.);
57812        let r = _mm_maskz_sqrt_sd(0, a, b);
57813        let e = _mm_set_pd(1., 0.);
57814        assert_eq_m128d(r, e);
57815        let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
57816        let e = _mm_set_pd(1., 2.);
57817        assert_eq_m128d(r, e);
57818    }
57819
57820    #[simd_test(enable = "avx512f")]
57821    unsafe fn test_mm_rsqrt14_ss() {
57822        let a = _mm_set_ps(1., 2., 10., 20.);
57823        let b = _mm_set_ps(3., 4., 30., 4.);
57824        let r = _mm_rsqrt14_ss(a, b);
57825        let e = _mm_set_ps(1., 2., 10., 0.5);
57826        assert_eq_m128(r, e);
57827    }
57828
57829    #[simd_test(enable = "avx512f")]
57830    unsafe fn test_mm_mask_rsqrt14_ss() {
57831        let src = _mm_set_ps(10., 11., 100., 110.);
57832        let a = _mm_set_ps(1., 2., 10., 20.);
57833        let b = _mm_set_ps(3., 4., 30., 4.);
57834        let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
57835        let e = _mm_set_ps(1., 2., 10., 110.);
57836        assert_eq_m128(r, e);
57837        let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
57838        let e = _mm_set_ps(1., 2., 10., 0.5);
57839        assert_eq_m128(r, e);
57840    }
57841
57842    #[simd_test(enable = "avx512f")]
57843    unsafe fn test_mm_maskz_rsqrt14_ss() {
57844        let a = _mm_set_ps(1., 2., 10., 20.);
57845        let b = _mm_set_ps(3., 4., 30., 4.);
57846        let r = _mm_maskz_rsqrt14_ss(0, a, b);
57847        let e = _mm_set_ps(1., 2., 10., 0.);
57848        assert_eq_m128(r, e);
57849        let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
57850        let e = _mm_set_ps(1., 2., 10., 0.5);
57851        assert_eq_m128(r, e);
57852    }
57853
57854    #[simd_test(enable = "avx512f")]
57855    unsafe fn test_mm_rsqrt14_sd() {
57856        let a = _mm_set_pd(1., 2.);
57857        let b = _mm_set_pd(3., 4.);
57858        let r = _mm_rsqrt14_sd(a, b);
57859        let e = _mm_set_pd(1., 0.5);
57860        assert_eq_m128d(r, e);
57861    }
57862
57863    #[simd_test(enable = "avx512f")]
57864    unsafe fn test_mm_mask_rsqrt14_sd() {
57865        let src = _mm_set_pd(10., 11.);
57866        let a = _mm_set_pd(1., 2.);
57867        let b = _mm_set_pd(3., 4.);
57868        let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
57869        let e = _mm_set_pd(1., 11.);
57870        assert_eq_m128d(r, e);
57871        let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
57872        let e = _mm_set_pd(1., 0.5);
57873        assert_eq_m128d(r, e);
57874    }
57875
57876    #[simd_test(enable = "avx512f")]
57877    unsafe fn test_mm_maskz_rsqrt14_sd() {
57878        let a = _mm_set_pd(1., 2.);
57879        let b = _mm_set_pd(3., 4.);
57880        let r = _mm_maskz_rsqrt14_sd(0, a, b);
57881        let e = _mm_set_pd(1., 0.);
57882        assert_eq_m128d(r, e);
57883        let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
57884        let e = _mm_set_pd(1., 0.5);
57885        assert_eq_m128d(r, e);
57886    }
57887
57888    #[simd_test(enable = "avx512f")]
57889    unsafe fn test_mm_rcp14_ss() {
57890        let a = _mm_set_ps(1., 2., 10., 20.);
57891        let b = _mm_set_ps(3., 4., 30., 4.);
57892        let r = _mm_rcp14_ss(a, b);
57893        let e = _mm_set_ps(1., 2., 10., 0.25);
57894        assert_eq_m128(r, e);
57895    }
57896
57897    #[simd_test(enable = "avx512f")]
57898    unsafe fn test_mm_mask_rcp14_ss() {
57899        let src = _mm_set_ps(10., 11., 100., 110.);
57900        let a = _mm_set_ps(1., 2., 10., 20.);
57901        let b = _mm_set_ps(3., 4., 30., 4.);
57902        let r = _mm_mask_rcp14_ss(src, 0, a, b);
57903        let e = _mm_set_ps(1., 2., 10., 110.);
57904        assert_eq_m128(r, e);
57905        let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
57906        let e = _mm_set_ps(1., 2., 10., 0.25);
57907        assert_eq_m128(r, e);
57908    }
57909
57910    #[simd_test(enable = "avx512f")]
57911    unsafe fn test_mm_maskz_rcp14_ss() {
57912        let a = _mm_set_ps(1., 2., 10., 20.);
57913        let b = _mm_set_ps(3., 4., 30., 4.);
57914        let r = _mm_maskz_rcp14_ss(0, a, b);
57915        let e = _mm_set_ps(1., 2., 10., 0.);
57916        assert_eq_m128(r, e);
57917        let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
57918        let e = _mm_set_ps(1., 2., 10., 0.25);
57919        assert_eq_m128(r, e);
57920    }
57921
57922    #[simd_test(enable = "avx512f")]
57923    unsafe fn test_mm_rcp14_sd() {
57924        let a = _mm_set_pd(1., 2.);
57925        let b = _mm_set_pd(3., 4.);
57926        let r = _mm_rcp14_sd(a, b);
57927        let e = _mm_set_pd(1., 0.25);
57928        assert_eq_m128d(r, e);
57929    }
57930
57931    #[simd_test(enable = "avx512f")]
57932    unsafe fn test_mm_mask_rcp14_sd() {
57933        let src = _mm_set_pd(10., 11.);
57934        let a = _mm_set_pd(1., 2.);
57935        let b = _mm_set_pd(3., 4.);
57936        let r = _mm_mask_rcp14_sd(src, 0, a, b);
57937        let e = _mm_set_pd(1., 11.);
57938        assert_eq_m128d(r, e);
57939        let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
57940        let e = _mm_set_pd(1., 0.25);
57941        assert_eq_m128d(r, e);
57942    }
57943
57944    #[simd_test(enable = "avx512f")]
57945    unsafe fn test_mm_maskz_rcp14_sd() {
57946        let a = _mm_set_pd(1., 2.);
57947        let b = _mm_set_pd(3., 4.);
57948        let r = _mm_maskz_rcp14_sd(0, a, b);
57949        let e = _mm_set_pd(1., 0.);
57950        assert_eq_m128d(r, e);
57951        let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
57952        let e = _mm_set_pd(1., 0.25);
57953        assert_eq_m128d(r, e);
57954    }
57955
57956    #[simd_test(enable = "avx512f")]
57957    unsafe fn test_mm_getexp_ss() {
57958        let a = _mm_set1_ps(2.);
57959        let b = _mm_set1_ps(3.);
57960        let r = _mm_getexp_ss(a, b);
57961        let e = _mm_set_ps(2., 2., 2., 1.);
57962        assert_eq_m128(r, e);
57963    }
57964
57965    #[simd_test(enable = "avx512f")]
57966    unsafe fn test_mm_mask_getexp_ss() {
57967        let a = _mm_set1_ps(2.);
57968        let b = _mm_set1_ps(3.);
57969        let r = _mm_mask_getexp_ss(a, 0, a, b);
57970        let e = _mm_set_ps(2., 2., 2., 2.);
57971        assert_eq_m128(r, e);
57972        let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
57973        let e = _mm_set_ps(2., 2., 2., 1.);
57974        assert_eq_m128(r, e);
57975    }
57976
57977    #[simd_test(enable = "avx512f")]
57978    unsafe fn test_mm_maskz_getexp_ss() {
57979        let a = _mm_set1_ps(2.);
57980        let b = _mm_set1_ps(3.);
57981        let r = _mm_maskz_getexp_ss(0, a, b);
57982        let e = _mm_set_ps(2., 2., 2., 0.);
57983        assert_eq_m128(r, e);
57984        let r = _mm_maskz_getexp_ss(0b11111111, a, b);
57985        let e = _mm_set_ps(2., 2., 2., 1.);
57986        assert_eq_m128(r, e);
57987    }
57988
57989    #[simd_test(enable = "avx512f")]
57990    unsafe fn test_mm_getexp_sd() {
57991        let a = _mm_set1_pd(2.);
57992        let b = _mm_set1_pd(3.);
57993        let r = _mm_getexp_sd(a, b);
57994        let e = _mm_set_pd(2., 1.);
57995        assert_eq_m128d(r, e);
57996    }
57997
57998    #[simd_test(enable = "avx512f")]
57999    unsafe fn test_mm_mask_getexp_sd() {
58000        let a = _mm_set1_pd(2.);
58001        let b = _mm_set1_pd(3.);
58002        let r = _mm_mask_getexp_sd(a, 0, a, b);
58003        let e = _mm_set_pd(2., 2.);
58004        assert_eq_m128d(r, e);
58005        let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
58006        let e = _mm_set_pd(2., 1.);
58007        assert_eq_m128d(r, e);
58008    }
58009
58010    #[simd_test(enable = "avx512f")]
58011    unsafe fn test_mm_maskz_getexp_sd() {
58012        let a = _mm_set1_pd(2.);
58013        let b = _mm_set1_pd(3.);
58014        let r = _mm_maskz_getexp_sd(0, a, b);
58015        let e = _mm_set_pd(2., 0.);
58016        assert_eq_m128d(r, e);
58017        let r = _mm_maskz_getexp_sd(0b11111111, a, b);
58018        let e = _mm_set_pd(2., 1.);
58019        assert_eq_m128d(r, e);
58020    }
58021
58022    #[simd_test(enable = "avx512f")]
58023    unsafe fn test_mm_getmant_ss() {
58024        let a = _mm_set1_ps(20.);
58025        let b = _mm_set1_ps(10.);
58026        let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58027        let e = _mm_set_ps(20., 20., 20., 1.25);
58028        assert_eq_m128(r, e);
58029    }
58030
58031    #[simd_test(enable = "avx512f")]
58032    unsafe fn test_mm_mask_getmant_ss() {
58033        let a = _mm_set1_ps(20.);
58034        let b = _mm_set1_ps(10.);
58035        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58036        let e = _mm_set_ps(20., 20., 20., 20.);
58037        assert_eq_m128(r, e);
58038        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58039        let e = _mm_set_ps(20., 20., 20., 1.25);
58040        assert_eq_m128(r, e);
58041    }
58042
58043    #[simd_test(enable = "avx512f")]
58044    unsafe fn test_mm_maskz_getmant_ss() {
58045        let a = _mm_set1_ps(20.);
58046        let b = _mm_set1_ps(10.);
58047        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58048        let e = _mm_set_ps(20., 20., 20., 0.);
58049        assert_eq_m128(r, e);
58050        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58051        let e = _mm_set_ps(20., 20., 20., 1.25);
58052        assert_eq_m128(r, e);
58053    }
58054
58055    #[simd_test(enable = "avx512f")]
58056    unsafe fn test_mm_getmant_sd() {
58057        let a = _mm_set1_pd(20.);
58058        let b = _mm_set1_pd(10.);
58059        let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58060        let e = _mm_set_pd(20., 1.25);
58061        assert_eq_m128d(r, e);
58062    }
58063
58064    #[simd_test(enable = "avx512f")]
58065    unsafe fn test_mm_mask_getmant_sd() {
58066        let a = _mm_set1_pd(20.);
58067        let b = _mm_set1_pd(10.);
58068        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58069        let e = _mm_set_pd(20., 20.);
58070        assert_eq_m128d(r, e);
58071        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58072        let e = _mm_set_pd(20., 1.25);
58073        assert_eq_m128d(r, e);
58074    }
58075
58076    #[simd_test(enable = "avx512f")]
58077    unsafe fn test_mm_maskz_getmant_sd() {
58078        let a = _mm_set1_pd(20.);
58079        let b = _mm_set1_pd(10.);
58080        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58081        let e = _mm_set_pd(20., 0.);
58082        assert_eq_m128d(r, e);
58083        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58084        let e = _mm_set_pd(20., 1.25);
58085        assert_eq_m128d(r, e);
58086    }
58087
58088    #[simd_test(enable = "avx512f")]
58089    unsafe fn test_mm_roundscale_ss() {
58090        let a = _mm_set1_ps(2.2);
58091        let b = _mm_set1_ps(1.1);
58092        let r = _mm_roundscale_ss::<0>(a, b);
58093        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58094        assert_eq_m128(r, e);
58095    }
58096
58097    #[simd_test(enable = "avx512f")]
58098    unsafe fn test_mm_mask_roundscale_ss() {
58099        let a = _mm_set1_ps(2.2);
58100        let b = _mm_set1_ps(1.1);
58101        let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
58102        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
58103        assert_eq_m128(r, e);
58104        let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
58105        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58106        assert_eq_m128(r, e);
58107    }
58108
58109    #[simd_test(enable = "avx512f")]
58110    unsafe fn test_mm_maskz_roundscale_ss() {
58111        let a = _mm_set1_ps(2.2);
58112        let b = _mm_set1_ps(1.1);
58113        let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
58114        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
58115        assert_eq_m128(r, e);
58116        let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
58117        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58118        assert_eq_m128(r, e);
58119    }
58120
58121    #[simd_test(enable = "avx512f")]
58122    unsafe fn test_mm_roundscale_sd() {
58123        let a = _mm_set1_pd(2.2);
58124        let b = _mm_set1_pd(1.1);
58125        let r = _mm_roundscale_sd::<0>(a, b);
58126        let e = _mm_set_pd(2.2, 1.0);
58127        assert_eq_m128d(r, e);
58128    }
58129
58130    #[simd_test(enable = "avx512f")]
58131    unsafe fn test_mm_mask_roundscale_sd() {
58132        let a = _mm_set1_pd(2.2);
58133        let b = _mm_set1_pd(1.1);
58134        let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
58135        let e = _mm_set_pd(2.2, 2.2);
58136        assert_eq_m128d(r, e);
58137        let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
58138        let e = _mm_set_pd(2.2, 1.0);
58139        assert_eq_m128d(r, e);
58140    }
58141
58142    #[simd_test(enable = "avx512f")]
58143    unsafe fn test_mm_maskz_roundscale_sd() {
58144        let a = _mm_set1_pd(2.2);
58145        let b = _mm_set1_pd(1.1);
58146        let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
58147        let e = _mm_set_pd(2.2, 0.0);
58148        assert_eq_m128d(r, e);
58149        let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
58150        let e = _mm_set_pd(2.2, 1.0);
58151        assert_eq_m128d(r, e);
58152    }
58153
58154    #[simd_test(enable = "avx512f")]
58155    unsafe fn test_mm_scalef_ss() {
58156        let a = _mm_set1_ps(1.);
58157        let b = _mm_set1_ps(3.);
58158        let r = _mm_scalef_ss(a, b);
58159        let e = _mm_set_ps(1., 1., 1., 8.);
58160        assert_eq_m128(r, e);
58161    }
58162
58163    #[simd_test(enable = "avx512f")]
58164    unsafe fn test_mm_mask_scalef_ss() {
58165        let a = _mm_set1_ps(1.);
58166        let b = _mm_set1_ps(3.);
58167        let r = _mm_mask_scalef_ss(a, 0, a, b);
58168        let e = _mm_set_ps(1., 1., 1., 1.);
58169        assert_eq_m128(r, e);
58170        let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
58171        let e = _mm_set_ps(1., 1., 1., 8.);
58172        assert_eq_m128(r, e);
58173    }
58174
58175    #[simd_test(enable = "avx512f")]
58176    unsafe fn test_mm_maskz_scalef_ss() {
58177        let a = _mm_set1_ps(1.);
58178        let b = _mm_set1_ps(3.);
58179        let r = _mm_maskz_scalef_ss(0, a, b);
58180        let e = _mm_set_ps(1., 1., 1., 0.);
58181        assert_eq_m128(r, e);
58182        let r = _mm_maskz_scalef_ss(0b11111111, a, b);
58183        let e = _mm_set_ps(1., 1., 1., 8.);
58184        assert_eq_m128(r, e);
58185    }
58186
58187    #[simd_test(enable = "avx512f")]
58188    unsafe fn test_mm_scalef_sd() {
58189        let a = _mm_set1_pd(1.);
58190        let b = _mm_set1_pd(3.);
58191        let r = _mm_scalef_sd(a, b);
58192        let e = _mm_set_pd(1., 8.);
58193        assert_eq_m128d(r, e);
58194    }
58195
58196    #[simd_test(enable = "avx512f")]
58197    unsafe fn test_mm_mask_scalef_sd() {
58198        let a = _mm_set1_pd(1.);
58199        let b = _mm_set1_pd(3.);
58200        let r = _mm_mask_scalef_sd(a, 0, a, b);
58201        let e = _mm_set_pd(1., 1.);
58202        assert_eq_m128d(r, e);
58203        let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
58204        let e = _mm_set_pd(1., 8.);
58205        assert_eq_m128d(r, e);
58206    }
58207
58208    #[simd_test(enable = "avx512f")]
58209    unsafe fn test_mm_maskz_scalef_sd() {
58210        let a = _mm_set1_pd(1.);
58211        let b = _mm_set1_pd(3.);
58212        let r = _mm_maskz_scalef_sd(0, a, b);
58213        let e = _mm_set_pd(1., 0.);
58214        assert_eq_m128d(r, e);
58215        let r = _mm_maskz_scalef_sd(0b11111111, a, b);
58216        let e = _mm_set_pd(1., 8.);
58217        assert_eq_m128d(r, e);
58218    }
58219
58220    #[simd_test(enable = "avx512f")]
58221    unsafe fn test_mm_mask_fmadd_ss() {
58222        let a = _mm_set1_ps(1.);
58223        let b = _mm_set1_ps(2.);
58224        let c = _mm_set1_ps(3.);
58225        let r = _mm_mask_fmadd_ss(a, 0, b, c);
58226        assert_eq_m128(r, a);
58227        let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
58228        let e = _mm_set_ps(1., 1., 1., 5.);
58229        assert_eq_m128(r, e);
58230    }
58231
58232    #[simd_test(enable = "avx512f")]
58233    unsafe fn test_mm_maskz_fmadd_ss() {
58234        let a = _mm_set1_ps(1.);
58235        let b = _mm_set1_ps(2.);
58236        let c = _mm_set1_ps(3.);
58237        let r = _mm_maskz_fmadd_ss(0, a, b, c);
58238        let e = _mm_set_ps(1., 1., 1., 0.);
58239        assert_eq_m128(r, e);
58240        let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
58241        let e = _mm_set_ps(1., 1., 1., 5.);
58242        assert_eq_m128(r, e);
58243    }
58244
58245    #[simd_test(enable = "avx512f")]
58246    unsafe fn test_mm_mask3_fmadd_ss() {
58247        let a = _mm_set1_ps(1.);
58248        let b = _mm_set1_ps(2.);
58249        let c = _mm_set1_ps(3.);
58250        let r = _mm_mask3_fmadd_ss(a, b, c, 0);
58251        assert_eq_m128(r, c);
58252        let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
58253        let e = _mm_set_ps(3., 3., 3., 5.);
58254        assert_eq_m128(r, e);
58255    }
58256
58257    #[simd_test(enable = "avx512f")]
58258    unsafe fn test_mm_mask_fmadd_sd() {
58259        let a = _mm_set1_pd(1.);
58260        let b = _mm_set1_pd(2.);
58261        let c = _mm_set1_pd(3.);
58262        let r = _mm_mask_fmadd_sd(a, 0, b, c);
58263        assert_eq_m128d(r, a);
58264        let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
58265        let e = _mm_set_pd(1., 5.);
58266        assert_eq_m128d(r, e);
58267    }
58268
58269    #[simd_test(enable = "avx512f")]
58270    unsafe fn test_mm_maskz_fmadd_sd() {
58271        let a = _mm_set1_pd(1.);
58272        let b = _mm_set1_pd(2.);
58273        let c = _mm_set1_pd(3.);
58274        let r = _mm_maskz_fmadd_sd(0, a, b, c);
58275        let e = _mm_set_pd(1., 0.);
58276        assert_eq_m128d(r, e);
58277        let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
58278        let e = _mm_set_pd(1., 5.);
58279        assert_eq_m128d(r, e);
58280    }
58281
58282    #[simd_test(enable = "avx512f")]
58283    unsafe fn test_mm_mask3_fmadd_sd() {
58284        let a = _mm_set1_pd(1.);
58285        let b = _mm_set1_pd(2.);
58286        let c = _mm_set1_pd(3.);
58287        let r = _mm_mask3_fmadd_sd(a, b, c, 0);
58288        assert_eq_m128d(r, c);
58289        let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
58290        let e = _mm_set_pd(3., 5.);
58291        assert_eq_m128d(r, e);
58292    }
58293
58294    #[simd_test(enable = "avx512f")]
58295    unsafe fn test_mm_mask_fmsub_ss() {
58296        let a = _mm_set1_ps(1.);
58297        let b = _mm_set1_ps(2.);
58298        let c = _mm_set1_ps(3.);
58299        let r = _mm_mask_fmsub_ss(a, 0, b, c);
58300        assert_eq_m128(r, a);
58301        let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
58302        let e = _mm_set_ps(1., 1., 1., -1.);
58303        assert_eq_m128(r, e);
58304    }
58305
58306    #[simd_test(enable = "avx512f")]
58307    unsafe fn test_mm_maskz_fmsub_ss() {
58308        let a = _mm_set1_ps(1.);
58309        let b = _mm_set1_ps(2.);
58310        let c = _mm_set1_ps(3.);
58311        let r = _mm_maskz_fmsub_ss(0, a, b, c);
58312        let e = _mm_set_ps(1., 1., 1., 0.);
58313        assert_eq_m128(r, e);
58314        let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
58315        let e = _mm_set_ps(1., 1., 1., -1.);
58316        assert_eq_m128(r, e);
58317    }
58318
58319    #[simd_test(enable = "avx512f")]
58320    unsafe fn test_mm_mask3_fmsub_ss() {
58321        let a = _mm_set1_ps(1.);
58322        let b = _mm_set1_ps(2.);
58323        let c = _mm_set1_ps(3.);
58324        let r = _mm_mask3_fmsub_ss(a, b, c, 0);
58325        assert_eq_m128(r, c);
58326        let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
58327        let e = _mm_set_ps(3., 3., 3., -1.);
58328        assert_eq_m128(r, e);
58329    }
58330
58331    #[simd_test(enable = "avx512f")]
58332    unsafe fn test_mm_mask_fmsub_sd() {
58333        let a = _mm_set1_pd(1.);
58334        let b = _mm_set1_pd(2.);
58335        let c = _mm_set1_pd(3.);
58336        let r = _mm_mask_fmsub_sd(a, 0, b, c);
58337        assert_eq_m128d(r, a);
58338        let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
58339        let e = _mm_set_pd(1., -1.);
58340        assert_eq_m128d(r, e);
58341    }
58342
58343    #[simd_test(enable = "avx512f")]
58344    unsafe fn test_mm_maskz_fmsub_sd() {
58345        let a = _mm_set1_pd(1.);
58346        let b = _mm_set1_pd(2.);
58347        let c = _mm_set1_pd(3.);
58348        let r = _mm_maskz_fmsub_sd(0, a, b, c);
58349        let e = _mm_set_pd(1., 0.);
58350        assert_eq_m128d(r, e);
58351        let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
58352        let e = _mm_set_pd(1., -1.);
58353        assert_eq_m128d(r, e);
58354    }
58355
58356    #[simd_test(enable = "avx512f")]
58357    unsafe fn test_mm_mask3_fmsub_sd() {
58358        let a = _mm_set1_pd(1.);
58359        let b = _mm_set1_pd(2.);
58360        let c = _mm_set1_pd(3.);
58361        let r = _mm_mask3_fmsub_sd(a, b, c, 0);
58362        assert_eq_m128d(r, c);
58363        let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
58364        let e = _mm_set_pd(3., -1.);
58365        assert_eq_m128d(r, e);
58366    }
58367
58368    #[simd_test(enable = "avx512f")]
58369    unsafe fn test_mm_mask_fnmadd_ss() {
58370        let a = _mm_set1_ps(1.);
58371        let b = _mm_set1_ps(2.);
58372        let c = _mm_set1_ps(3.);
58373        let r = _mm_mask_fnmadd_ss(a, 0, b, c);
58374        assert_eq_m128(r, a);
58375        let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
58376        let e = _mm_set_ps(1., 1., 1., 1.);
58377        assert_eq_m128(r, e);
58378    }
58379
58380    #[simd_test(enable = "avx512f")]
58381    unsafe fn test_mm_maskz_fnmadd_ss() {
58382        let a = _mm_set1_ps(1.);
58383        let b = _mm_set1_ps(2.);
58384        let c = _mm_set1_ps(3.);
58385        let r = _mm_maskz_fnmadd_ss(0, a, b, c);
58386        let e = _mm_set_ps(1., 1., 1., 0.);
58387        assert_eq_m128(r, e);
58388        let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
58389        let e = _mm_set_ps(1., 1., 1., 1.);
58390        assert_eq_m128(r, e);
58391    }
58392
58393    #[simd_test(enable = "avx512f")]
58394    unsafe fn test_mm_mask3_fnmadd_ss() {
58395        let a = _mm_set1_ps(1.);
58396        let b = _mm_set1_ps(2.);
58397        let c = _mm_set1_ps(3.);
58398        let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
58399        assert_eq_m128(r, c);
58400        let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
58401        let e = _mm_set_ps(3., 3., 3., 1.);
58402        assert_eq_m128(r, e);
58403    }
58404
58405    #[simd_test(enable = "avx512f")]
58406    unsafe fn test_mm_mask_fnmadd_sd() {
58407        let a = _mm_set1_pd(1.);
58408        let b = _mm_set1_pd(2.);
58409        let c = _mm_set1_pd(3.);
58410        let r = _mm_mask_fnmadd_sd(a, 0, b, c);
58411        assert_eq_m128d(r, a);
58412        let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
58413        let e = _mm_set_pd(1., 1.);
58414        assert_eq_m128d(r, e);
58415    }
58416
58417    #[simd_test(enable = "avx512f")]
58418    unsafe fn test_mm_maskz_fnmadd_sd() {
58419        let a = _mm_set1_pd(1.);
58420        let b = _mm_set1_pd(2.);
58421        let c = _mm_set1_pd(3.);
58422        let r = _mm_maskz_fnmadd_sd(0, a, b, c);
58423        let e = _mm_set_pd(1., 0.);
58424        assert_eq_m128d(r, e);
58425        let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
58426        let e = _mm_set_pd(1., 1.);
58427        assert_eq_m128d(r, e);
58428    }
58429
58430    #[simd_test(enable = "avx512f")]
58431    unsafe fn test_mm_mask3_fnmadd_sd() {
58432        let a = _mm_set1_pd(1.);
58433        let b = _mm_set1_pd(2.);
58434        let c = _mm_set1_pd(3.);
58435        let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
58436        assert_eq_m128d(r, c);
58437        let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
58438        let e = _mm_set_pd(3., 1.);
58439        assert_eq_m128d(r, e);
58440    }
58441
58442    #[simd_test(enable = "avx512f")]
58443    unsafe fn test_mm_mask_fnmsub_ss() {
58444        let a = _mm_set1_ps(1.);
58445        let b = _mm_set1_ps(2.);
58446        let c = _mm_set1_ps(3.);
58447        let r = _mm_mask_fnmsub_ss(a, 0, b, c);
58448        assert_eq_m128(r, a);
58449        let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
58450        let e = _mm_set_ps(1., 1., 1., -5.);
58451        assert_eq_m128(r, e);
58452    }
58453
58454    #[simd_test(enable = "avx512f")]
58455    unsafe fn test_mm_maskz_fnmsub_ss() {
58456        let a = _mm_set1_ps(1.);
58457        let b = _mm_set1_ps(2.);
58458        let c = _mm_set1_ps(3.);
58459        let r = _mm_maskz_fnmsub_ss(0, a, b, c);
58460        let e = _mm_set_ps(1., 1., 1., 0.);
58461        assert_eq_m128(r, e);
58462        let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
58463        let e = _mm_set_ps(1., 1., 1., -5.);
58464        assert_eq_m128(r, e);
58465    }
58466
58467    #[simd_test(enable = "avx512f")]
58468    unsafe fn test_mm_mask3_fnmsub_ss() {
58469        let a = _mm_set1_ps(1.);
58470        let b = _mm_set1_ps(2.);
58471        let c = _mm_set1_ps(3.);
58472        let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
58473        assert_eq_m128(r, c);
58474        let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
58475        let e = _mm_set_ps(3., 3., 3., -5.);
58476        assert_eq_m128(r, e);
58477    }
58478
58479    #[simd_test(enable = "avx512f")]
58480    unsafe fn test_mm_mask_fnmsub_sd() {
58481        let a = _mm_set1_pd(1.);
58482        let b = _mm_set1_pd(2.);
58483        let c = _mm_set1_pd(3.);
58484        let r = _mm_mask_fnmsub_sd(a, 0, b, c);
58485        assert_eq_m128d(r, a);
58486        let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
58487        let e = _mm_set_pd(1., -5.);
58488        assert_eq_m128d(r, e);
58489    }
58490
58491    #[simd_test(enable = "avx512f")]
58492    unsafe fn test_mm_maskz_fnmsub_sd() {
58493        let a = _mm_set1_pd(1.);
58494        let b = _mm_set1_pd(2.);
58495        let c = _mm_set1_pd(3.);
58496        let r = _mm_maskz_fnmsub_sd(0, a, b, c);
58497        let e = _mm_set_pd(1., 0.);
58498        assert_eq_m128d(r, e);
58499        let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
58500        let e = _mm_set_pd(1., -5.);
58501        assert_eq_m128d(r, e);
58502    }
58503
58504    #[simd_test(enable = "avx512f")]
58505    unsafe fn test_mm_mask3_fnmsub_sd() {
58506        let a = _mm_set1_pd(1.);
58507        let b = _mm_set1_pd(2.);
58508        let c = _mm_set1_pd(3.);
58509        let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
58510        assert_eq_m128d(r, c);
58511        let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
58512        let e = _mm_set_pd(3., -5.);
58513        assert_eq_m128d(r, e);
58514    }
58515
58516    #[simd_test(enable = "avx512f")]
58517    unsafe fn test_mm_add_round_ss() {
58518        let a = _mm_set_ps(1., 2., 10., 20.);
58519        let b = _mm_set_ps(3., 4., 30., 40.);
58520        let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58521        let e = _mm_set_ps(1., 2., 10., 60.);
58522        assert_eq_m128(r, e);
58523    }
58524
58525    #[simd_test(enable = "avx512f")]
58526    unsafe fn test_mm_mask_add_round_ss() {
58527        let src = _mm_set_ps(10., 11., 100., 110.);
58528        let a = _mm_set_ps(1., 2., 10., 20.);
58529        let b = _mm_set_ps(3., 4., 30., 40.);
58530        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58531        let e = _mm_set_ps(1., 2., 10., 110.);
58532        assert_eq_m128(r, e);
58533        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58534            src, 0b11111111, a, b,
58535        );
58536        let e = _mm_set_ps(1., 2., 10., 60.);
58537        assert_eq_m128(r, e);
58538    }
58539
58540    #[simd_test(enable = "avx512f")]
58541    unsafe fn test_mm_maskz_add_round_ss() {
58542        let a = _mm_set_ps(1., 2., 10., 20.);
58543        let b = _mm_set_ps(3., 4., 30., 40.);
58544        let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58545        let e = _mm_set_ps(1., 2., 10., 0.);
58546        assert_eq_m128(r, e);
58547        let r =
58548            _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58549        let e = _mm_set_ps(1., 2., 10., 60.);
58550        assert_eq_m128(r, e);
58551    }
58552
58553    #[simd_test(enable = "avx512f")]
58554    unsafe fn test_mm_add_round_sd() {
58555        let a = _mm_set_pd(1., 2.);
58556        let b = _mm_set_pd(3., 4.);
58557        let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58558        let e = _mm_set_pd(1., 6.);
58559        assert_eq_m128d(r, e);
58560    }
58561
58562    #[simd_test(enable = "avx512f")]
58563    unsafe fn test_mm_mask_add_round_sd() {
58564        let src = _mm_set_pd(10., 11.);
58565        let a = _mm_set_pd(1., 2.);
58566        let b = _mm_set_pd(3., 4.);
58567        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58568        let e = _mm_set_pd(1., 11.);
58569        assert_eq_m128d(r, e);
58570        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58571            src, 0b11111111, a, b,
58572        );
58573        let e = _mm_set_pd(1., 6.);
58574        assert_eq_m128d(r, e);
58575    }
58576
58577    #[simd_test(enable = "avx512f")]
58578    unsafe fn test_mm_maskz_add_round_sd() {
58579        let a = _mm_set_pd(1., 2.);
58580        let b = _mm_set_pd(3., 4.);
58581        let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58582        let e = _mm_set_pd(1., 0.);
58583        assert_eq_m128d(r, e);
58584        let r =
58585            _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58586        let e = _mm_set_pd(1., 6.);
58587        assert_eq_m128d(r, e);
58588    }
58589
58590    #[simd_test(enable = "avx512f")]
58591    unsafe fn test_mm_sub_round_ss() {
58592        let a = _mm_set_ps(1., 2., 10., 20.);
58593        let b = _mm_set_ps(3., 4., 30., 40.);
58594        let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58595        let e = _mm_set_ps(1., 2., 10., -20.);
58596        assert_eq_m128(r, e);
58597    }
58598
58599    #[simd_test(enable = "avx512f")]
58600    unsafe fn test_mm_mask_sub_round_ss() {
58601        let src = _mm_set_ps(10., 11., 100., 110.);
58602        let a = _mm_set_ps(1., 2., 10., 20.);
58603        let b = _mm_set_ps(3., 4., 30., 40.);
58604        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58605        let e = _mm_set_ps(1., 2., 10., 110.);
58606        assert_eq_m128(r, e);
58607        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58608            src, 0b11111111, a, b,
58609        );
58610        let e = _mm_set_ps(1., 2., 10., -20.);
58611        assert_eq_m128(r, e);
58612    }
58613
58614    #[simd_test(enable = "avx512f")]
58615    unsafe fn test_mm_maskz_sub_round_ss() {
58616        let a = _mm_set_ps(1., 2., 10., 20.);
58617        let b = _mm_set_ps(3., 4., 30., 40.);
58618        let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58619        let e = _mm_set_ps(1., 2., 10., 0.);
58620        assert_eq_m128(r, e);
58621        let r =
58622            _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58623        let e = _mm_set_ps(1., 2., 10., -20.);
58624        assert_eq_m128(r, e);
58625    }
58626
58627    #[simd_test(enable = "avx512f")]
58628    unsafe fn test_mm_sub_round_sd() {
58629        let a = _mm_set_pd(1., 2.);
58630        let b = _mm_set_pd(3., 4.);
58631        let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58632        let e = _mm_set_pd(1., -2.);
58633        assert_eq_m128d(r, e);
58634    }
58635
58636    #[simd_test(enable = "avx512f")]
58637    unsafe fn test_mm_mask_sub_round_sd() {
58638        let src = _mm_set_pd(10., 11.);
58639        let a = _mm_set_pd(1., 2.);
58640        let b = _mm_set_pd(3., 4.);
58641        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58642        let e = _mm_set_pd(1., 11.);
58643        assert_eq_m128d(r, e);
58644        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58645            src, 0b11111111, a, b,
58646        );
58647        let e = _mm_set_pd(1., -2.);
58648        assert_eq_m128d(r, e);
58649    }
58650
58651    #[simd_test(enable = "avx512f")]
58652    unsafe fn test_mm_maskz_sub_round_sd() {
58653        let a = _mm_set_pd(1., 2.);
58654        let b = _mm_set_pd(3., 4.);
58655        let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58656        let e = _mm_set_pd(1., 0.);
58657        assert_eq_m128d(r, e);
58658        let r =
58659            _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58660        let e = _mm_set_pd(1., -2.);
58661        assert_eq_m128d(r, e);
58662    }
58663
58664    #[simd_test(enable = "avx512f")]
58665    unsafe fn test_mm_mul_round_ss() {
58666        let a = _mm_set_ps(1., 2., 10., 20.);
58667        let b = _mm_set_ps(3., 4., 30., 40.);
58668        let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58669        let e = _mm_set_ps(1., 2., 10., 800.);
58670        assert_eq_m128(r, e);
58671    }
58672
58673    #[simd_test(enable = "avx512f")]
58674    unsafe fn test_mm_mask_mul_round_ss() {
58675        let src = _mm_set_ps(10., 11., 100., 110.);
58676        let a = _mm_set_ps(1., 2., 10., 20.);
58677        let b = _mm_set_ps(3., 4., 30., 40.);
58678        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58679        let e = _mm_set_ps(1., 2., 10., 110.);
58680        assert_eq_m128(r, e);
58681        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58682            src, 0b11111111, a, b,
58683        );
58684        let e = _mm_set_ps(1., 2., 10., 800.);
58685        assert_eq_m128(r, e);
58686    }
58687
58688    #[simd_test(enable = "avx512f")]
58689    unsafe fn test_mm_maskz_mul_round_ss() {
58690        let a = _mm_set_ps(1., 2., 10., 20.);
58691        let b = _mm_set_ps(3., 4., 30., 40.);
58692        let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58693        let e = _mm_set_ps(1., 2., 10., 0.);
58694        assert_eq_m128(r, e);
58695        let r =
58696            _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58697        let e = _mm_set_ps(1., 2., 10., 800.);
58698        assert_eq_m128(r, e);
58699    }
58700
58701    #[simd_test(enable = "avx512f")]
58702    unsafe fn test_mm_mul_round_sd() {
58703        let a = _mm_set_pd(1., 2.);
58704        let b = _mm_set_pd(3., 4.);
58705        let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58706        let e = _mm_set_pd(1., 8.);
58707        assert_eq_m128d(r, e);
58708    }
58709
58710    #[simd_test(enable = "avx512f")]
58711    unsafe fn test_mm_mask_mul_round_sd() {
58712        let src = _mm_set_pd(10., 11.);
58713        let a = _mm_set_pd(1., 2.);
58714        let b = _mm_set_pd(3., 4.);
58715        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58716        let e = _mm_set_pd(1., 11.);
58717        assert_eq_m128d(r, e);
58718        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58719            src, 0b11111111, a, b,
58720        );
58721        let e = _mm_set_pd(1., 8.);
58722        assert_eq_m128d(r, e);
58723    }
58724
58725    #[simd_test(enable = "avx512f")]
58726    unsafe fn test_mm_maskz_mul_round_sd() {
58727        let a = _mm_set_pd(1., 2.);
58728        let b = _mm_set_pd(3., 4.);
58729        let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58730        let e = _mm_set_pd(1., 0.);
58731        assert_eq_m128d(r, e);
58732        let r =
58733            _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58734        let e = _mm_set_pd(1., 8.);
58735        assert_eq_m128d(r, e);
58736    }
58737
58738    #[simd_test(enable = "avx512f")]
58739    unsafe fn test_mm_div_round_ss() {
58740        let a = _mm_set_ps(1., 2., 10., 20.);
58741        let b = _mm_set_ps(3., 4., 30., 40.);
58742        let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58743        let e = _mm_set_ps(1., 2., 10., 0.5);
58744        assert_eq_m128(r, e);
58745    }
58746
58747    #[simd_test(enable = "avx512f")]
58748    unsafe fn test_mm_mask_div_round_ss() {
58749        let src = _mm_set_ps(10., 11., 100., 110.);
58750        let a = _mm_set_ps(1., 2., 10., 20.);
58751        let b = _mm_set_ps(3., 4., 30., 40.);
58752        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58753        let e = _mm_set_ps(1., 2., 10., 110.);
58754        assert_eq_m128(r, e);
58755        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58756            src, 0b11111111, a, b,
58757        );
58758        let e = _mm_set_ps(1., 2., 10., 0.5);
58759        assert_eq_m128(r, e);
58760    }
58761
58762    #[simd_test(enable = "avx512f")]
58763    unsafe fn test_mm_maskz_div_round_ss() {
58764        let a = _mm_set_ps(1., 2., 10., 20.);
58765        let b = _mm_set_ps(3., 4., 30., 40.);
58766        let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58767        let e = _mm_set_ps(1., 2., 10., 0.);
58768        assert_eq_m128(r, e);
58769        let r =
58770            _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58771        let e = _mm_set_ps(1., 2., 10., 0.5);
58772        assert_eq_m128(r, e);
58773    }
58774
58775    #[simd_test(enable = "avx512f")]
58776    unsafe fn test_mm_div_round_sd() {
58777        let a = _mm_set_pd(1., 2.);
58778        let b = _mm_set_pd(3., 4.);
58779        let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58780        let e = _mm_set_pd(1., 0.5);
58781        assert_eq_m128d(r, e);
58782    }
58783
58784    #[simd_test(enable = "avx512f")]
58785    unsafe fn test_mm_mask_div_round_sd() {
58786        let src = _mm_set_pd(10., 11.);
58787        let a = _mm_set_pd(1., 2.);
58788        let b = _mm_set_pd(3., 4.);
58789        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58790        let e = _mm_set_pd(1., 11.);
58791        assert_eq_m128d(r, e);
58792        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58793            src, 0b11111111, a, b,
58794        );
58795        let e = _mm_set_pd(1., 0.5);
58796        assert_eq_m128d(r, e);
58797    }
58798
58799    #[simd_test(enable = "avx512f")]
58800    unsafe fn test_mm_maskz_div_round_sd() {
58801        let a = _mm_set_pd(1., 2.);
58802        let b = _mm_set_pd(3., 4.);
58803        let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58804        let e = _mm_set_pd(1., 0.);
58805        assert_eq_m128d(r, e);
58806        let r =
58807            _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58808        let e = _mm_set_pd(1., 0.5);
58809        assert_eq_m128d(r, e);
58810    }
58811
58812    #[simd_test(enable = "avx512f")]
58813    unsafe fn test_mm_max_round_ss() {
58814        let a = _mm_set_ps(0., 1., 2., 3.);
58815        let b = _mm_set_ps(4., 5., 6., 7.);
58816        let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58817        let e = _mm_set_ps(0., 1., 2., 7.);
58818        assert_eq_m128(r, e);
58819    }
58820
58821    #[simd_test(enable = "avx512f")]
58822    unsafe fn test_mm_mask_max_round_ss() {
58823        let a = _mm_set_ps(0., 1., 2., 3.);
58824        let b = _mm_set_ps(4., 5., 6., 7.);
58825        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58826        let e = _mm_set_ps(0., 1., 2., 3.);
58827        assert_eq_m128(r, e);
58828        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58829        let e = _mm_set_ps(0., 1., 2., 7.);
58830        assert_eq_m128(r, e);
58831    }
58832
58833    #[simd_test(enable = "avx512f")]
58834    unsafe fn test_mm_maskz_max_round_ss() {
58835        let a = _mm_set_ps(0., 1., 2., 3.);
58836        let b = _mm_set_ps(4., 5., 6., 7.);
58837        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58838        let e = _mm_set_ps(0., 1., 2., 0.);
58839        assert_eq_m128(r, e);
58840        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58841        let e = _mm_set_ps(0., 1., 2., 7.);
58842        assert_eq_m128(r, e);
58843    }
58844
58845    #[simd_test(enable = "avx512f")]
58846    unsafe fn test_mm_max_round_sd() {
58847        let a = _mm_set_pd(0., 1.);
58848        let b = _mm_set_pd(2., 3.);
58849        let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58850        let e = _mm_set_pd(0., 3.);
58851        assert_eq_m128d(r, e);
58852    }
58853
58854    #[simd_test(enable = "avx512f")]
58855    unsafe fn test_mm_mask_max_round_sd() {
58856        let a = _mm_set_pd(0., 1.);
58857        let b = _mm_set_pd(2., 3.);
58858        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58859        let e = _mm_set_pd(0., 1.);
58860        assert_eq_m128d(r, e);
58861        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58862        let e = _mm_set_pd(0., 3.);
58863        assert_eq_m128d(r, e);
58864    }
58865
58866    #[simd_test(enable = "avx512f")]
58867    unsafe fn test_mm_maskz_max_round_sd() {
58868        let a = _mm_set_pd(0., 1.);
58869        let b = _mm_set_pd(2., 3.);
58870        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58871        let e = _mm_set_pd(0., 0.);
58872        assert_eq_m128d(r, e);
58873        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58874        let e = _mm_set_pd(0., 3.);
58875        assert_eq_m128d(r, e);
58876    }
58877
58878    #[simd_test(enable = "avx512f")]
58879    unsafe fn test_mm_min_round_ss() {
58880        let a = _mm_set_ps(0., 1., 2., 3.);
58881        let b = _mm_set_ps(4., 5., 6., 7.);
58882        let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58883        let e = _mm_set_ps(0., 1., 2., 3.);
58884        assert_eq_m128(r, e);
58885    }
58886
58887    #[simd_test(enable = "avx512f")]
58888    unsafe fn test_mm_mask_min_round_ss() {
58889        let a = _mm_set_ps(0., 1., 2., 3.);
58890        let b = _mm_set_ps(4., 5., 6., 7.);
58891        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58892        let e = _mm_set_ps(0., 1., 2., 3.);
58893        assert_eq_m128(r, e);
58894        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58895        let e = _mm_set_ps(0., 1., 2., 3.);
58896        assert_eq_m128(r, e);
58897    }
58898
58899    #[simd_test(enable = "avx512f")]
58900    unsafe fn test_mm_maskz_min_round_ss() {
58901        let a = _mm_set_ps(0., 1., 2., 3.);
58902        let b = _mm_set_ps(4., 5., 6., 7.);
58903        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58904        let e = _mm_set_ps(0., 1., 2., 0.);
58905        assert_eq_m128(r, e);
58906        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58907        let e = _mm_set_ps(0., 1., 2., 3.);
58908        assert_eq_m128(r, e);
58909    }
58910
58911    #[simd_test(enable = "avx512f")]
58912    unsafe fn test_mm_min_round_sd() {
58913        let a = _mm_set_pd(0., 1.);
58914        let b = _mm_set_pd(2., 3.);
58915        let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58916        let e = _mm_set_pd(0., 1.);
58917        assert_eq_m128d(r, e);
58918    }
58919
58920    #[simd_test(enable = "avx512f")]
58921    unsafe fn test_mm_mask_min_round_sd() {
58922        let a = _mm_set_pd(0., 1.);
58923        let b = _mm_set_pd(2., 3.);
58924        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58925        let e = _mm_set_pd(0., 1.);
58926        assert_eq_m128d(r, e);
58927        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58928        let e = _mm_set_pd(0., 1.);
58929        assert_eq_m128d(r, e);
58930    }
58931
58932    #[simd_test(enable = "avx512f")]
58933    unsafe fn test_mm_maskz_min_round_sd() {
58934        let a = _mm_set_pd(0., 1.);
58935        let b = _mm_set_pd(2., 3.);
58936        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58937        let e = _mm_set_pd(0., 0.);
58938        assert_eq_m128d(r, e);
58939        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58940        let e = _mm_set_pd(0., 1.);
58941        assert_eq_m128d(r, e);
58942    }
58943
58944    #[simd_test(enable = "avx512f")]
58945    unsafe fn test_mm_sqrt_round_ss() {
58946        let a = _mm_set_ps(1., 2., 10., 20.);
58947        let b = _mm_set_ps(3., 4., 30., 4.);
58948        let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58949        let e = _mm_set_ps(1., 2., 10., 2.);
58950        assert_eq_m128(r, e);
58951    }
58952
58953    #[simd_test(enable = "avx512f")]
58954    unsafe fn test_mm_mask_sqrt_round_ss() {
58955        let src = _mm_set_ps(10., 11., 100., 110.);
58956        let a = _mm_set_ps(1., 2., 10., 20.);
58957        let b = _mm_set_ps(3., 4., 30., 4.);
58958        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58959        let e = _mm_set_ps(1., 2., 10., 110.);
58960        assert_eq_m128(r, e);
58961        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58962            src, 0b11111111, a, b,
58963        );
58964        let e = _mm_set_ps(1., 2., 10., 2.);
58965        assert_eq_m128(r, e);
58966    }
58967
58968    #[simd_test(enable = "avx512f")]
58969    unsafe fn test_mm_maskz_sqrt_round_ss() {
58970        let a = _mm_set_ps(1., 2., 10., 20.);
58971        let b = _mm_set_ps(3., 4., 30., 4.);
58972        let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58973        let e = _mm_set_ps(1., 2., 10., 0.);
58974        assert_eq_m128(r, e);
58975        let r =
58976            _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58977        let e = _mm_set_ps(1., 2., 10., 2.);
58978        assert_eq_m128(r, e);
58979    }
58980
58981    #[simd_test(enable = "avx512f")]
58982    unsafe fn test_mm_sqrt_round_sd() {
58983        let a = _mm_set_pd(1., 2.);
58984        let b = _mm_set_pd(3., 4.);
58985        let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58986        let e = _mm_set_pd(1., 2.);
58987        assert_eq_m128d(r, e);
58988    }
58989
58990    #[simd_test(enable = "avx512f")]
58991    unsafe fn test_mm_mask_sqrt_round_sd() {
58992        let src = _mm_set_pd(10., 11.);
58993        let a = _mm_set_pd(1., 2.);
58994        let b = _mm_set_pd(3., 4.);
58995        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58996        let e = _mm_set_pd(1., 11.);
58997        assert_eq_m128d(r, e);
58998        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58999            src, 0b11111111, a, b,
59000        );
59001        let e = _mm_set_pd(1., 2.);
59002        assert_eq_m128d(r, e);
59003    }
59004
59005    #[simd_test(enable = "avx512f")]
59006    unsafe fn test_mm_maskz_sqrt_round_sd() {
59007        let a = _mm_set_pd(1., 2.);
59008        let b = _mm_set_pd(3., 4.);
59009        let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59010        let e = _mm_set_pd(1., 0.);
59011        assert_eq_m128d(r, e);
59012        let r =
59013            _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59014        let e = _mm_set_pd(1., 2.);
59015        assert_eq_m128d(r, e);
59016    }
59017
59018    #[simd_test(enable = "avx512f")]
59019    unsafe fn test_mm_getexp_round_ss() {
59020        let a = _mm_set1_ps(2.);
59021        let b = _mm_set1_ps(3.);
59022        let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
59023        let e = _mm_set_ps(2., 2., 2., 1.);
59024        assert_eq_m128(r, e);
59025    }
59026
59027    #[simd_test(enable = "avx512f")]
59028    unsafe fn test_mm_mask_getexp_round_ss() {
59029        let a = _mm_set1_ps(2.);
59030        let b = _mm_set1_ps(3.);
59031        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59032        let e = _mm_set_ps(2., 2., 2., 2.);
59033        assert_eq_m128(r, e);
59034        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59035        let e = _mm_set_ps(2., 2., 2., 1.);
59036        assert_eq_m128(r, e);
59037    }
59038
59039    #[simd_test(enable = "avx512f")]
59040    unsafe fn test_mm_maskz_getexp_round_ss() {
59041        let a = _mm_set1_ps(2.);
59042        let b = _mm_set1_ps(3.);
59043        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59044        let e = _mm_set_ps(2., 2., 2., 0.);
59045        assert_eq_m128(r, e);
59046        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59047        let e = _mm_set_ps(2., 2., 2., 1.);
59048        assert_eq_m128(r, e);
59049    }
59050
59051    #[simd_test(enable = "avx512f")]
59052    unsafe fn test_mm_getexp_round_sd() {
59053        let a = _mm_set1_pd(2.);
59054        let b = _mm_set1_pd(3.);
59055        let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59056        let e = _mm_set_pd(2., 1.);
59057        assert_eq_m128d(r, e);
59058    }
59059
59060    #[simd_test(enable = "avx512f")]
59061    unsafe fn test_mm_mask_getexp_round_sd() {
59062        let a = _mm_set1_pd(2.);
59063        let b = _mm_set1_pd(3.);
59064        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59065        let e = _mm_set_pd(2., 2.);
59066        assert_eq_m128d(r, e);
59067        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59068        let e = _mm_set_pd(2., 1.);
59069        assert_eq_m128d(r, e);
59070    }
59071
59072    #[simd_test(enable = "avx512f")]
59073    unsafe fn test_mm_maskz_getexp_round_sd() {
59074        let a = _mm_set1_pd(2.);
59075        let b = _mm_set1_pd(3.);
59076        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59077        let e = _mm_set_pd(2., 0.);
59078        assert_eq_m128d(r, e);
59079        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59080        let e = _mm_set_pd(2., 1.);
59081        assert_eq_m128d(r, e);
59082    }
59083
59084    #[simd_test(enable = "avx512f")]
59085    unsafe fn test_mm_getmant_round_ss() {
59086        let a = _mm_set1_ps(20.);
59087        let b = _mm_set1_ps(10.);
59088        let r =
59089            _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59090                a, b,
59091            );
59092        let e = _mm_set_ps(20., 20., 20., 1.25);
59093        assert_eq_m128(r, e);
59094    }
59095
59096    #[simd_test(enable = "avx512f")]
59097    unsafe fn test_mm_mask_getmant_round_ss() {
59098        let a = _mm_set1_ps(20.);
59099        let b = _mm_set1_ps(10.);
59100        let r = _mm_mask_getmant_round_ss::<
59101            _MM_MANT_NORM_1_2,
59102            _MM_MANT_SIGN_SRC,
59103            _MM_FROUND_CUR_DIRECTION,
59104        >(a, 0, a, b);
59105        let e = _mm_set_ps(20., 20., 20., 20.);
59106        assert_eq_m128(r, e);
59107        let r = _mm_mask_getmant_round_ss::<
59108            _MM_MANT_NORM_1_2,
59109            _MM_MANT_SIGN_SRC,
59110            _MM_FROUND_CUR_DIRECTION,
59111        >(a, 0b11111111, a, b);
59112        let e = _mm_set_ps(20., 20., 20., 1.25);
59113        assert_eq_m128(r, e);
59114    }
59115
59116    #[simd_test(enable = "avx512f")]
59117    unsafe fn test_mm_maskz_getmant_round_ss() {
59118        let a = _mm_set1_ps(20.);
59119        let b = _mm_set1_ps(10.);
59120        let r = _mm_maskz_getmant_round_ss::<
59121            _MM_MANT_NORM_1_2,
59122            _MM_MANT_SIGN_SRC,
59123            _MM_FROUND_CUR_DIRECTION,
59124        >(0, a, b);
59125        let e = _mm_set_ps(20., 20., 20., 0.);
59126        assert_eq_m128(r, e);
59127        let r = _mm_maskz_getmant_round_ss::<
59128            _MM_MANT_NORM_1_2,
59129            _MM_MANT_SIGN_SRC,
59130            _MM_FROUND_CUR_DIRECTION,
59131        >(0b11111111, a, b);
59132        let e = _mm_set_ps(20., 20., 20., 1.25);
59133        assert_eq_m128(r, e);
59134    }
59135
59136    #[simd_test(enable = "avx512f")]
59137    unsafe fn test_mm_getmant_round_sd() {
59138        let a = _mm_set1_pd(20.);
59139        let b = _mm_set1_pd(10.);
59140        let r =
59141            _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59142                a, b,
59143            );
59144        let e = _mm_set_pd(20., 1.25);
59145        assert_eq_m128d(r, e);
59146    }
59147
59148    #[simd_test(enable = "avx512f")]
59149    unsafe fn test_mm_mask_getmant_round_sd() {
59150        let a = _mm_set1_pd(20.);
59151        let b = _mm_set1_pd(10.);
59152        let r = _mm_mask_getmant_round_sd::<
59153            _MM_MANT_NORM_1_2,
59154            _MM_MANT_SIGN_SRC,
59155            _MM_FROUND_CUR_DIRECTION,
59156        >(a, 0, a, b);
59157        let e = _mm_set_pd(20., 20.);
59158        assert_eq_m128d(r, e);
59159        let r = _mm_mask_getmant_round_sd::<
59160            _MM_MANT_NORM_1_2,
59161            _MM_MANT_SIGN_SRC,
59162            _MM_FROUND_CUR_DIRECTION,
59163        >(a, 0b11111111, a, b);
59164        let e = _mm_set_pd(20., 1.25);
59165        assert_eq_m128d(r, e);
59166    }
59167
59168    #[simd_test(enable = "avx512f")]
59169    unsafe fn test_mm_maskz_getmant_round_sd() {
59170        let a = _mm_set1_pd(20.);
59171        let b = _mm_set1_pd(10.);
59172        let r = _mm_maskz_getmant_round_sd::<
59173            _MM_MANT_NORM_1_2,
59174            _MM_MANT_SIGN_SRC,
59175            _MM_FROUND_CUR_DIRECTION,
59176        >(0, a, b);
59177        let e = _mm_set_pd(20., 0.);
59178        assert_eq_m128d(r, e);
59179        let r = _mm_maskz_getmant_round_sd::<
59180            _MM_MANT_NORM_1_2,
59181            _MM_MANT_SIGN_SRC,
59182            _MM_FROUND_CUR_DIRECTION,
59183        >(0b11111111, a, b);
59184        let e = _mm_set_pd(20., 1.25);
59185        assert_eq_m128d(r, e);
59186    }
59187
59188    #[simd_test(enable = "avx512f")]
59189    unsafe fn test_mm_roundscale_round_ss() {
59190        let a = _mm_set1_ps(2.2);
59191        let b = _mm_set1_ps(1.1);
59192        let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59193        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59194        assert_eq_m128(r, e);
59195    }
59196
59197    #[simd_test(enable = "avx512f")]
59198    unsafe fn test_mm_mask_roundscale_round_ss() {
59199        let a = _mm_set1_ps(2.2);
59200        let b = _mm_set1_ps(1.1);
59201        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59202        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
59203        assert_eq_m128(r, e);
59204        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59205        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59206        assert_eq_m128(r, e);
59207    }
59208
59209    #[simd_test(enable = "avx512f")]
59210    unsafe fn test_mm_maskz_roundscale_round_ss() {
59211        let a = _mm_set1_ps(2.2);
59212        let b = _mm_set1_ps(1.1);
59213        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59214        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
59215        assert_eq_m128(r, e);
59216        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59217        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59218        assert_eq_m128(r, e);
59219    }
59220
59221    #[simd_test(enable = "avx512f")]
59222    unsafe fn test_mm_roundscale_round_sd() {
59223        let a = _mm_set1_pd(2.2);
59224        let b = _mm_set1_pd(1.1);
59225        let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59226        let e = _mm_set_pd(2.2, 1.0);
59227        assert_eq_m128d(r, e);
59228    }
59229
59230    #[simd_test(enable = "avx512f")]
59231    unsafe fn test_mm_mask_roundscale_round_sd() {
59232        let a = _mm_set1_pd(2.2);
59233        let b = _mm_set1_pd(1.1);
59234        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59235        let e = _mm_set_pd(2.2, 2.2);
59236        assert_eq_m128d(r, e);
59237        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59238        let e = _mm_set_pd(2.2, 1.0);
59239        assert_eq_m128d(r, e);
59240    }
59241
59242    #[simd_test(enable = "avx512f")]
59243    unsafe fn test_mm_maskz_roundscale_round_sd() {
59244        let a = _mm_set1_pd(2.2);
59245        let b = _mm_set1_pd(1.1);
59246        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59247        let e = _mm_set_pd(2.2, 0.0);
59248        assert_eq_m128d(r, e);
59249        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59250        let e = _mm_set_pd(2.2, 1.0);
59251        assert_eq_m128d(r, e);
59252    }
59253
59254    #[simd_test(enable = "avx512f")]
59255    unsafe fn test_mm_scalef_round_ss() {
59256        let a = _mm_set1_ps(1.);
59257        let b = _mm_set1_ps(3.);
59258        let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59259        let e = _mm_set_ps(1., 1., 1., 8.);
59260        assert_eq_m128(r, e);
59261    }
59262
59263    #[simd_test(enable = "avx512f")]
59264    unsafe fn test_mm_mask_scalef_round_ss() {
59265        let a = _mm_set1_ps(1.);
59266        let b = _mm_set1_ps(3.);
59267        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59268            a, 0, a, b,
59269        );
59270        let e = _mm_set_ps(1., 1., 1., 1.);
59271        assert_eq_m128(r, e);
59272        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59273            a, 0b11111111, a, b,
59274        );
59275        let e = _mm_set_ps(1., 1., 1., 8.);
59276        assert_eq_m128(r, e);
59277    }
59278
59279    #[simd_test(enable = "avx512f")]
59280    unsafe fn test_mm_maskz_scalef_round_ss() {
59281        let a = _mm_set1_ps(1.);
59282        let b = _mm_set1_ps(3.);
59283        let r =
59284            _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59285        let e = _mm_set_ps(1., 1., 1., 0.);
59286        assert_eq_m128(r, e);
59287        let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59288            0b11111111, a, b,
59289        );
59290        let e = _mm_set_ps(1., 1., 1., 8.);
59291        assert_eq_m128(r, e);
59292    }
59293
59294    #[simd_test(enable = "avx512f")]
59295    unsafe fn test_mm_scalef_round_sd() {
59296        let a = _mm_set1_pd(1.);
59297        let b = _mm_set1_pd(3.);
59298        let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59299        let e = _mm_set_pd(1., 8.);
59300        assert_eq_m128d(r, e);
59301    }
59302
59303    #[simd_test(enable = "avx512f")]
59304    unsafe fn test_mm_mask_scalef_round_sd() {
59305        let a = _mm_set1_pd(1.);
59306        let b = _mm_set1_pd(3.);
59307        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59308            a, 0, a, b,
59309        );
59310        let e = _mm_set_pd(1., 1.);
59311        assert_eq_m128d(r, e);
59312        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59313            a, 0b11111111, a, b,
59314        );
59315        let e = _mm_set_pd(1., 8.);
59316        assert_eq_m128d(r, e);
59317    }
59318
59319    #[simd_test(enable = "avx512f")]
59320    unsafe fn test_mm_maskz_scalef_round_sd() {
59321        let a = _mm_set1_pd(1.);
59322        let b = _mm_set1_pd(3.);
59323        let r =
59324            _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59325        let e = _mm_set_pd(1., 0.);
59326        assert_eq_m128d(r, e);
59327        let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59328            0b11111111, a, b,
59329        );
59330        let e = _mm_set_pd(1., 8.);
59331        assert_eq_m128d(r, e);
59332    }
59333
59334    #[simd_test(enable = "avx512f")]
59335    unsafe fn test_mm_fmadd_round_ss() {
59336        let a = _mm_set1_ps(1.);
59337        let b = _mm_set1_ps(2.);
59338        let c = _mm_set1_ps(3.);
59339        let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59340        let e = _mm_set_ps(1., 1., 1., 5.);
59341        assert_eq_m128(r, e);
59342    }
59343
59344    #[simd_test(enable = "avx512f")]
59345    unsafe fn test_mm_mask_fmadd_round_ss() {
59346        let a = _mm_set1_ps(1.);
59347        let b = _mm_set1_ps(2.);
59348        let c = _mm_set1_ps(3.);
59349        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59350            a, 0, b, c,
59351        );
59352        assert_eq_m128(r, a);
59353        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59354            a, 0b11111111, b, c,
59355        );
59356        let e = _mm_set_ps(1., 1., 1., 5.);
59357        assert_eq_m128(r, e);
59358    }
59359
59360    #[simd_test(enable = "avx512f")]
59361    unsafe fn test_mm_maskz_fmadd_round_ss() {
59362        let a = _mm_set1_ps(1.);
59363        let b = _mm_set1_ps(2.);
59364        let c = _mm_set1_ps(3.);
59365        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59366            0, a, b, c,
59367        );
59368        let e = _mm_set_ps(1., 1., 1., 0.);
59369        assert_eq_m128(r, e);
59370        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59371            0b11111111, a, b, c,
59372        );
59373        let e = _mm_set_ps(1., 1., 1., 5.);
59374        assert_eq_m128(r, e);
59375    }
59376
59377    #[simd_test(enable = "avx512f")]
59378    unsafe fn test_mm_mask3_fmadd_round_ss() {
59379        let a = _mm_set1_ps(1.);
59380        let b = _mm_set1_ps(2.);
59381        let c = _mm_set1_ps(3.);
59382        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59383            a, b, c, 0,
59384        );
59385        assert_eq_m128(r, c);
59386        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59387            a, b, c, 0b11111111,
59388        );
59389        let e = _mm_set_ps(3., 3., 3., 5.);
59390        assert_eq_m128(r, e);
59391    }
59392
59393    #[simd_test(enable = "avx512f")]
59394    unsafe fn test_mm_fmadd_round_sd() {
59395        let a = _mm_set1_pd(1.);
59396        let b = _mm_set1_pd(2.);
59397        let c = _mm_set1_pd(3.);
59398        let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59399        let e = _mm_set_pd(1., 5.);
59400        assert_eq_m128d(r, e);
59401    }
59402
59403    #[simd_test(enable = "avx512f")]
59404    unsafe fn test_mm_mask_fmadd_round_sd() {
59405        let a = _mm_set1_pd(1.);
59406        let b = _mm_set1_pd(2.);
59407        let c = _mm_set1_pd(3.);
59408        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59409            a, 0, b, c,
59410        );
59411        assert_eq_m128d(r, a);
59412        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59413            a, 0b11111111, b, c,
59414        );
59415        let e = _mm_set_pd(1., 5.);
59416        assert_eq_m128d(r, e);
59417    }
59418
59419    #[simd_test(enable = "avx512f")]
59420    unsafe fn test_mm_maskz_fmadd_round_sd() {
59421        let a = _mm_set1_pd(1.);
59422        let b = _mm_set1_pd(2.);
59423        let c = _mm_set1_pd(3.);
59424        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59425            0, a, b, c,
59426        );
59427        let e = _mm_set_pd(1., 0.);
59428        assert_eq_m128d(r, e);
59429        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59430            0b11111111, a, b, c,
59431        );
59432        let e = _mm_set_pd(1., 5.);
59433        assert_eq_m128d(r, e);
59434    }
59435
59436    #[simd_test(enable = "avx512f")]
59437    unsafe fn test_mm_mask3_fmadd_round_sd() {
59438        let a = _mm_set1_pd(1.);
59439        let b = _mm_set1_pd(2.);
59440        let c = _mm_set1_pd(3.);
59441        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59442            a, b, c, 0,
59443        );
59444        assert_eq_m128d(r, c);
59445        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59446            a, b, c, 0b11111111,
59447        );
59448        let e = _mm_set_pd(3., 5.);
59449        assert_eq_m128d(r, e);
59450    }
59451
59452    #[simd_test(enable = "avx512f")]
59453    unsafe fn test_mm_fmsub_round_ss() {
59454        let a = _mm_set1_ps(1.);
59455        let b = _mm_set1_ps(2.);
59456        let c = _mm_set1_ps(3.);
59457        let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59458        let e = _mm_set_ps(1., 1., 1., -1.);
59459        assert_eq_m128(r, e);
59460    }
59461
59462    #[simd_test(enable = "avx512f")]
59463    unsafe fn test_mm_mask_fmsub_round_ss() {
59464        let a = _mm_set1_ps(1.);
59465        let b = _mm_set1_ps(2.);
59466        let c = _mm_set1_ps(3.);
59467        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59468            a, 0, b, c,
59469        );
59470        assert_eq_m128(r, a);
59471        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59472            a, 0b11111111, b, c,
59473        );
59474        let e = _mm_set_ps(1., 1., 1., -1.);
59475        assert_eq_m128(r, e);
59476    }
59477
59478    #[simd_test(enable = "avx512f")]
59479    unsafe fn test_mm_maskz_fmsub_round_ss() {
59480        let a = _mm_set1_ps(1.);
59481        let b = _mm_set1_ps(2.);
59482        let c = _mm_set1_ps(3.);
59483        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59484            0, a, b, c,
59485        );
59486        let e = _mm_set_ps(1., 1., 1., 0.);
59487        assert_eq_m128(r, e);
59488        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59489            0b11111111, a, b, c,
59490        );
59491        let e = _mm_set_ps(1., 1., 1., -1.);
59492        assert_eq_m128(r, e);
59493    }
59494
59495    #[simd_test(enable = "avx512f")]
59496    unsafe fn test_mm_mask3_fmsub_round_ss() {
59497        let a = _mm_set1_ps(1.);
59498        let b = _mm_set1_ps(2.);
59499        let c = _mm_set1_ps(3.);
59500        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59501            a, b, c, 0,
59502        );
59503        assert_eq_m128(r, c);
59504        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59505            a, b, c, 0b11111111,
59506        );
59507        let e = _mm_set_ps(3., 3., 3., -1.);
59508        assert_eq_m128(r, e);
59509    }
59510
59511    #[simd_test(enable = "avx512f")]
59512    unsafe fn test_mm_fmsub_round_sd() {
59513        let a = _mm_set1_pd(1.);
59514        let b = _mm_set1_pd(2.);
59515        let c = _mm_set1_pd(3.);
59516        let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59517        let e = _mm_set_pd(1., -1.);
59518        assert_eq_m128d(r, e);
59519    }
59520
59521    #[simd_test(enable = "avx512f")]
59522    unsafe fn test_mm_mask_fmsub_round_sd() {
59523        let a = _mm_set1_pd(1.);
59524        let b = _mm_set1_pd(2.);
59525        let c = _mm_set1_pd(3.);
59526        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59527            a, 0, b, c,
59528        );
59529        assert_eq_m128d(r, a);
59530        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59531            a, 0b11111111, b, c,
59532        );
59533        let e = _mm_set_pd(1., -1.);
59534        assert_eq_m128d(r, e);
59535    }
59536
59537    #[simd_test(enable = "avx512f")]
59538    unsafe fn test_mm_maskz_fmsub_round_sd() {
59539        let a = _mm_set1_pd(1.);
59540        let b = _mm_set1_pd(2.);
59541        let c = _mm_set1_pd(3.);
59542        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59543            0, a, b, c,
59544        );
59545        let e = _mm_set_pd(1., 0.);
59546        assert_eq_m128d(r, e);
59547        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59548            0b11111111, a, b, c,
59549        );
59550        let e = _mm_set_pd(1., -1.);
59551        assert_eq_m128d(r, e);
59552    }
59553
59554    #[simd_test(enable = "avx512f")]
59555    unsafe fn test_mm_mask3_fmsub_round_sd() {
59556        let a = _mm_set1_pd(1.);
59557        let b = _mm_set1_pd(2.);
59558        let c = _mm_set1_pd(3.);
59559        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59560            a, b, c, 0,
59561        );
59562        assert_eq_m128d(r, c);
59563        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59564            a, b, c, 0b11111111,
59565        );
59566        let e = _mm_set_pd(3., -1.);
59567        assert_eq_m128d(r, e);
59568    }
59569
59570    #[simd_test(enable = "avx512f")]
59571    unsafe fn test_mm_fnmadd_round_ss() {
59572        let a = _mm_set1_ps(1.);
59573        let b = _mm_set1_ps(2.);
59574        let c = _mm_set1_ps(3.);
59575        let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59576        let e = _mm_set_ps(1., 1., 1., 1.);
59577        assert_eq_m128(r, e);
59578    }
59579
59580    #[simd_test(enable = "avx512f")]
59581    unsafe fn test_mm_mask_fnmadd_round_ss() {
59582        let a = _mm_set1_ps(1.);
59583        let b = _mm_set1_ps(2.);
59584        let c = _mm_set1_ps(3.);
59585        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59586            a, 0, b, c,
59587        );
59588        assert_eq_m128(r, a);
59589        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59590            a, 0b11111111, b, c,
59591        );
59592        let e = _mm_set_ps(1., 1., 1., 1.);
59593        assert_eq_m128(r, e);
59594    }
59595
59596    #[simd_test(enable = "avx512f")]
59597    unsafe fn test_mm_maskz_fnmadd_round_ss() {
59598        let a = _mm_set1_ps(1.);
59599        let b = _mm_set1_ps(2.);
59600        let c = _mm_set1_ps(3.);
59601        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59602            0, a, b, c,
59603        );
59604        let e = _mm_set_ps(1., 1., 1., 0.);
59605        assert_eq_m128(r, e);
59606        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59607            0b11111111, a, b, c,
59608        );
59609        let e = _mm_set_ps(1., 1., 1., 1.);
59610        assert_eq_m128(r, e);
59611    }
59612
59613    #[simd_test(enable = "avx512f")]
59614    unsafe fn test_mm_mask3_fnmadd_round_ss() {
59615        let a = _mm_set1_ps(1.);
59616        let b = _mm_set1_ps(2.);
59617        let c = _mm_set1_ps(3.);
59618        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59619            a, b, c, 0,
59620        );
59621        assert_eq_m128(r, c);
59622        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59623            a, b, c, 0b11111111,
59624        );
59625        let e = _mm_set_ps(3., 3., 3., 1.);
59626        assert_eq_m128(r, e);
59627    }
59628
59629    #[simd_test(enable = "avx512f")]
59630    unsafe fn test_mm_fnmadd_round_sd() {
59631        let a = _mm_set1_pd(1.);
59632        let b = _mm_set1_pd(2.);
59633        let c = _mm_set1_pd(3.);
59634        let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59635        let e = _mm_set_pd(1., 1.);
59636        assert_eq_m128d(r, e);
59637    }
59638
59639    #[simd_test(enable = "avx512f")]
59640    unsafe fn test_mm_mask_fnmadd_round_sd() {
59641        let a = _mm_set1_pd(1.);
59642        let b = _mm_set1_pd(2.);
59643        let c = _mm_set1_pd(3.);
59644        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59645            a, 0, b, c,
59646        );
59647        assert_eq_m128d(r, a);
59648        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59649            a, 0b11111111, b, c,
59650        );
59651        let e = _mm_set_pd(1., 1.);
59652        assert_eq_m128d(r, e);
59653    }
59654
59655    #[simd_test(enable = "avx512f")]
59656    unsafe fn test_mm_maskz_fnmadd_round_sd() {
59657        let a = _mm_set1_pd(1.);
59658        let b = _mm_set1_pd(2.);
59659        let c = _mm_set1_pd(3.);
59660        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59661            0, a, b, c,
59662        );
59663        let e = _mm_set_pd(1., 0.);
59664        assert_eq_m128d(r, e);
59665        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59666            0b11111111, a, b, c,
59667        );
59668        let e = _mm_set_pd(1., 1.);
59669        assert_eq_m128d(r, e);
59670    }
59671
59672    #[simd_test(enable = "avx512f")]
59673    unsafe fn test_mm_mask3_fnmadd_round_sd() {
59674        let a = _mm_set1_pd(1.);
59675        let b = _mm_set1_pd(2.);
59676        let c = _mm_set1_pd(3.);
59677        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59678            a, b, c, 0,
59679        );
59680        assert_eq_m128d(r, c);
59681        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59682            a, b, c, 0b11111111,
59683        );
59684        let e = _mm_set_pd(3., 1.);
59685        assert_eq_m128d(r, e);
59686    }
59687
59688    #[simd_test(enable = "avx512f")]
59689    unsafe fn test_mm_fnmsub_round_ss() {
59690        let a = _mm_set1_ps(1.);
59691        let b = _mm_set1_ps(2.);
59692        let c = _mm_set1_ps(3.);
59693        let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59694        let e = _mm_set_ps(1., 1., 1., -5.);
59695        assert_eq_m128(r, e);
59696    }
59697
59698    #[simd_test(enable = "avx512f")]
59699    unsafe fn test_mm_mask_fnmsub_round_ss() {
59700        let a = _mm_set1_ps(1.);
59701        let b = _mm_set1_ps(2.);
59702        let c = _mm_set1_ps(3.);
59703        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59704            a, 0, b, c,
59705        );
59706        assert_eq_m128(r, a);
59707        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59708            a, 0b11111111, b, c,
59709        );
59710        let e = _mm_set_ps(1., 1., 1., -5.);
59711        assert_eq_m128(r, e);
59712    }
59713
59714    #[simd_test(enable = "avx512f")]
59715    unsafe fn test_mm_maskz_fnmsub_round_ss() {
59716        let a = _mm_set1_ps(1.);
59717        let b = _mm_set1_ps(2.);
59718        let c = _mm_set1_ps(3.);
59719        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59720            0, a, b, c,
59721        );
59722        let e = _mm_set_ps(1., 1., 1., 0.);
59723        assert_eq_m128(r, e);
59724        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59725            0b11111111, a, b, c,
59726        );
59727        let e = _mm_set_ps(1., 1., 1., -5.);
59728        assert_eq_m128(r, e);
59729    }
59730
59731    #[simd_test(enable = "avx512f")]
59732    unsafe fn test_mm_mask3_fnmsub_round_ss() {
59733        let a = _mm_set1_ps(1.);
59734        let b = _mm_set1_ps(2.);
59735        let c = _mm_set1_ps(3.);
59736        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59737            a, b, c, 0,
59738        );
59739        assert_eq_m128(r, c);
59740        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59741            a, b, c, 0b11111111,
59742        );
59743        let e = _mm_set_ps(3., 3., 3., -5.);
59744        assert_eq_m128(r, e);
59745    }
59746
59747    #[simd_test(enable = "avx512f")]
59748    unsafe fn test_mm_fnmsub_round_sd() {
59749        let a = _mm_set1_pd(1.);
59750        let b = _mm_set1_pd(2.);
59751        let c = _mm_set1_pd(3.);
59752        let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59753        let e = _mm_set_pd(1., -5.);
59754        assert_eq_m128d(r, e);
59755    }
59756
59757    #[simd_test(enable = "avx512f")]
59758    unsafe fn test_mm_mask_fnmsub_round_sd() {
59759        let a = _mm_set1_pd(1.);
59760        let b = _mm_set1_pd(2.);
59761        let c = _mm_set1_pd(3.);
59762        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59763            a, 0, b, c,
59764        );
59765        assert_eq_m128d(r, a);
59766        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59767            a, 0b11111111, b, c,
59768        );
59769        let e = _mm_set_pd(1., -5.);
59770        assert_eq_m128d(r, e);
59771    }
59772
59773    #[simd_test(enable = "avx512f")]
59774    unsafe fn test_mm_maskz_fnmsub_round_sd() {
59775        let a = _mm_set1_pd(1.);
59776        let b = _mm_set1_pd(2.);
59777        let c = _mm_set1_pd(3.);
59778        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59779            0, a, b, c,
59780        );
59781        let e = _mm_set_pd(1., 0.);
59782        assert_eq_m128d(r, e);
59783        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59784            0b11111111, a, b, c,
59785        );
59786        let e = _mm_set_pd(1., -5.);
59787        assert_eq_m128d(r, e);
59788    }
59789
59790    #[simd_test(enable = "avx512f")]
59791    unsafe fn test_mm_mask3_fnmsub_round_sd() {
59792        let a = _mm_set1_pd(1.);
59793        let b = _mm_set1_pd(2.);
59794        let c = _mm_set1_pd(3.);
59795        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59796            a, b, c, 0,
59797        );
59798        assert_eq_m128d(r, c);
59799        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59800            a, b, c, 0b11111111,
59801        );
59802        let e = _mm_set_pd(3., -5.);
59803        assert_eq_m128d(r, e);
59804    }
59805
59806    #[simd_test(enable = "avx512f")]
59807    unsafe fn test_mm_fixupimm_ss() {
59808        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59809        let b = _mm_set1_ps(f32::MAX);
59810        let c = _mm_set1_epi32(i32::MAX);
59811        let r = _mm_fixupimm_ss::<5>(a, b, c);
59812        let e = _mm_set_ps(0., 0., 0., -0.0);
59813        assert_eq_m128(r, e);
59814    }
59815
59816    #[simd_test(enable = "avx512f")]
59817    unsafe fn test_mm_mask_fixupimm_ss() {
59818        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59819        let b = _mm_set1_ps(f32::MAX);
59820        let c = _mm_set1_epi32(i32::MAX);
59821        let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
59822        let e = _mm_set_ps(0., 0., 0., -0.0);
59823        assert_eq_m128(r, e);
59824    }
59825
59826    #[simd_test(enable = "avx512f")]
59827    unsafe fn test_mm_maskz_fixupimm_ss() {
59828        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59829        let b = _mm_set1_ps(f32::MAX);
59830        let c = _mm_set1_epi32(i32::MAX);
59831        let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
59832        let e = _mm_set_ps(0., 0., 0., 0.0);
59833        assert_eq_m128(r, e);
59834        let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
59835        let e = _mm_set_ps(0., 0., 0., -0.0);
59836        assert_eq_m128(r, e);
59837    }
59838
59839    #[simd_test(enable = "avx512f")]
59840    unsafe fn test_mm_fixupimm_sd() {
59841        let a = _mm_set_pd(0., f64::NAN);
59842        let b = _mm_set1_pd(f64::MAX);
59843        let c = _mm_set1_epi64x(i32::MAX as i64);
59844        let r = _mm_fixupimm_sd::<5>(a, b, c);
59845        let e = _mm_set_pd(0., -0.0);
59846        assert_eq_m128d(r, e);
59847    }
59848
59849    #[simd_test(enable = "avx512f")]
59850    unsafe fn test_mm_mask_fixupimm_sd() {
59851        let a = _mm_set_pd(0., f64::NAN);
59852        let b = _mm_set1_pd(f64::MAX);
59853        let c = _mm_set1_epi64x(i32::MAX as i64);
59854        let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
59855        let e = _mm_set_pd(0., -0.0);
59856        assert_eq_m128d(r, e);
59857    }
59858
59859    #[simd_test(enable = "avx512f")]
59860    unsafe fn test_mm_maskz_fixupimm_sd() {
59861        let a = _mm_set_pd(0., f64::NAN);
59862        let b = _mm_set1_pd(f64::MAX);
59863        let c = _mm_set1_epi64x(i32::MAX as i64);
59864        let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
59865        let e = _mm_set_pd(0., 0.0);
59866        assert_eq_m128d(r, e);
59867        let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
59868        let e = _mm_set_pd(0., -0.0);
59869        assert_eq_m128d(r, e);
59870    }
59871
59872    #[simd_test(enable = "avx512f")]
59873    unsafe fn test_mm_fixupimm_round_ss() {
59874        let a = _mm_set_ps(1., 0., 0., f32::NAN);
59875        let b = _mm_set1_ps(f32::MAX);
59876        let c = _mm_set1_epi32(i32::MAX);
59877        let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59878        let e = _mm_set_ps(1., 0., 0., -0.0);
59879        assert_eq_m128(r, e);
59880    }
59881
59882    #[simd_test(enable = "avx512f")]
59883    unsafe fn test_mm_mask_fixupimm_round_ss() {
59884        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59885        let b = _mm_set1_ps(f32::MAX);
59886        let c = _mm_set1_epi32(i32::MAX);
59887        let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59888        let e = _mm_set_ps(0., 0., 0., -0.0);
59889        assert_eq_m128(r, e);
59890    }
59891
59892    #[simd_test(enable = "avx512f")]
59893    unsafe fn test_mm_maskz_fixupimm_round_ss() {
59894        let a = _mm_set_ps(0., 0., 0., f32::NAN);
59895        let b = _mm_set1_ps(f32::MAX);
59896        let c = _mm_set1_epi32(i32::MAX);
59897        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59898        let e = _mm_set_ps(0., 0., 0., 0.0);
59899        assert_eq_m128(r, e);
59900        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59901        let e = _mm_set_ps(0., 0., 0., -0.0);
59902        assert_eq_m128(r, e);
59903    }
59904
59905    #[simd_test(enable = "avx512f")]
59906    unsafe fn test_mm_fixupimm_round_sd() {
59907        let a = _mm_set_pd(0., f64::NAN);
59908        let b = _mm_set1_pd(f64::MAX);
59909        let c = _mm_set1_epi64x(i32::MAX as i64);
59910        let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59911        let e = _mm_set_pd(0., -0.0);
59912        assert_eq_m128d(r, e);
59913    }
59914
59915    #[simd_test(enable = "avx512f")]
59916    unsafe fn test_mm_mask_fixupimm_round_sd() {
59917        let a = _mm_set_pd(0., f64::NAN);
59918        let b = _mm_set1_pd(f64::MAX);
59919        let c = _mm_set1_epi64x(i32::MAX as i64);
59920        let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59921        let e = _mm_set_pd(0., -0.0);
59922        assert_eq_m128d(r, e);
59923    }
59924
59925    #[simd_test(enable = "avx512f")]
59926    unsafe fn test_mm_maskz_fixupimm_round_sd() {
59927        let a = _mm_set_pd(0., f64::NAN);
59928        let b = _mm_set1_pd(f64::MAX);
59929        let c = _mm_set1_epi64x(i32::MAX as i64);
59930        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59931        let e = _mm_set_pd(0., 0.0);
59932        assert_eq_m128d(r, e);
59933        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59934        let e = _mm_set_pd(0., -0.0);
59935        assert_eq_m128d(r, e);
59936    }
59937
59938    #[simd_test(enable = "avx512f")]
59939    unsafe fn test_mm_mask_cvtss_sd() {
59940        let a = _mm_set_pd(6., -7.5);
59941        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59942        let r = _mm_mask_cvtss_sd(a, 0, a, b);
59943        assert_eq_m128d(r, a);
59944        let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
59945        let e = _mm_set_pd(6., -1.5);
59946        assert_eq_m128d(r, e);
59947    }
59948
59949    #[simd_test(enable = "avx512f")]
59950    unsafe fn test_mm_maskz_cvtss_sd() {
59951        let a = _mm_set_pd(6., -7.5);
59952        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59953        let r = _mm_maskz_cvtss_sd(0, a, b);
59954        let e = _mm_set_pd(6., 0.);
59955        assert_eq_m128d(r, e);
59956        let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
59957        let e = _mm_set_pd(6., -1.5);
59958        assert_eq_m128d(r, e);
59959    }
59960
59961    #[simd_test(enable = "avx512f")]
59962    unsafe fn test_mm_mask_cvtsd_ss() {
59963        let a = _mm_set_ps(0., -0.5, 1., -1.5);
59964        let b = _mm_set_pd(6., -7.5);
59965        let r = _mm_mask_cvtsd_ss(a, 0, a, b);
59966        assert_eq_m128(r, a);
59967        let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
59968        let e = _mm_set_ps(0., -0.5, 1., -7.5);
59969        assert_eq_m128(r, e);
59970    }
59971
59972    #[simd_test(enable = "avx512f")]
59973    unsafe fn test_mm_maskz_cvtsd_ss() {
59974        let a = _mm_set_ps(0., -0.5, 1., -1.5);
59975        let b = _mm_set_pd(6., -7.5);
59976        let r = _mm_maskz_cvtsd_ss(0, a, b);
59977        let e = _mm_set_ps(0., -0.5, 1., 0.);
59978        assert_eq_m128(r, e);
59979        let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
59980        let e = _mm_set_ps(0., -0.5, 1., -7.5);
59981        assert_eq_m128(r, e);
59982    }
59983
59984    #[simd_test(enable = "avx512f")]
59985    unsafe fn test_mm_cvt_roundss_sd() {
59986        let a = _mm_set_pd(6., -7.5);
59987        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59988        let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59989        let e = _mm_set_pd(6., -1.5);
59990        assert_eq_m128d(r, e);
59991    }
59992
59993    #[simd_test(enable = "avx512f")]
59994    unsafe fn test_mm_mask_cvt_roundss_sd() {
59995        let a = _mm_set_pd(6., -7.5);
59996        let b = _mm_set_ps(0., -0.5, 1., -1.5);
59997        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59998        assert_eq_m128d(r, a);
59999        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60000        let e = _mm_set_pd(6., -1.5);
60001        assert_eq_m128d(r, e);
60002    }
60003
60004    #[simd_test(enable = "avx512f")]
60005    unsafe fn test_mm_maskz_cvt_roundss_sd() {
60006        let a = _mm_set_pd(6., -7.5);
60007        let b = _mm_set_ps(0., -0.5, 1., -1.5);
60008        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60009        let e = _mm_set_pd(6., 0.);
60010        assert_eq_m128d(r, e);
60011        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60012        let e = _mm_set_pd(6., -1.5);
60013        assert_eq_m128d(r, e);
60014    }
60015
60016    #[simd_test(enable = "avx512f")]
60017    unsafe fn test_mm_cvt_roundsd_ss() {
60018        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60019        let b = _mm_set_pd(6., -7.5);
60020        let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60021        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60022        assert_eq_m128(r, e);
60023    }
60024
60025    #[simd_test(enable = "avx512f")]
60026    unsafe fn test_mm_mask_cvt_roundsd_ss() {
60027        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60028        let b = _mm_set_pd(6., -7.5);
60029        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
60030        assert_eq_m128(r, a);
60031        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60032            a, 0b11111111, a, b,
60033        );
60034        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60035        assert_eq_m128(r, e);
60036    }
60037
60038    #[simd_test(enable = "avx512f")]
60039    unsafe fn test_mm_maskz_cvt_roundsd_ss() {
60040        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60041        let b = _mm_set_pd(6., -7.5);
60042        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60043        let e = _mm_set_ps(0., -0.5, 1., 0.);
60044        assert_eq_m128(r, e);
60045        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60046            0b11111111, a, b,
60047        );
60048        let e = _mm_set_ps(0., -0.5, 1., -7.5);
60049        assert_eq_m128(r, e);
60050    }
60051
60052    #[simd_test(enable = "avx512f")]
60053    unsafe fn test_mm_cvt_roundss_si32() {
60054        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60055        let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60056        let e: i32 = -1;
60057        assert_eq!(r, e);
60058    }
60059
60060    #[simd_test(enable = "avx512f")]
60061    unsafe fn test_mm_cvt_roundss_i32() {
60062        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60063        let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60064        let e: i32 = -1;
60065        assert_eq!(r, e);
60066    }
60067
60068    #[simd_test(enable = "avx512f")]
60069    unsafe fn test_mm_cvt_roundss_u32() {
60070        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60071        let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60072        let e: u32 = u32::MAX;
60073        assert_eq!(r, e);
60074    }
60075
60076    #[simd_test(enable = "avx512f")]
60077    unsafe fn test_mm_cvtss_i32() {
60078        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60079        let r = _mm_cvtss_i32(a);
60080        let e: i32 = -2;
60081        assert_eq!(r, e);
60082    }
60083
60084    #[simd_test(enable = "avx512f")]
60085    unsafe fn test_mm_cvtss_u32() {
60086        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60087        let r = _mm_cvtss_u32(a);
60088        let e: u32 = u32::MAX;
60089        assert_eq!(r, e);
60090    }
60091
60092    #[simd_test(enable = "avx512f")]
60093    unsafe fn test_mm_cvt_roundsd_si32() {
60094        let a = _mm_set_pd(1., -1.5);
60095        let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60096        let e: i32 = -1;
60097        assert_eq!(r, e);
60098    }
60099
60100    #[simd_test(enable = "avx512f")]
60101    unsafe fn test_mm_cvt_roundsd_i32() {
60102        let a = _mm_set_pd(1., -1.5);
60103        let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60104        let e: i32 = -1;
60105        assert_eq!(r, e);
60106    }
60107
60108    #[simd_test(enable = "avx512f")]
60109    unsafe fn test_mm_cvt_roundsd_u32() {
60110        let a = _mm_set_pd(1., -1.5);
60111        let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60112        let e: u32 = u32::MAX;
60113        assert_eq!(r, e);
60114    }
60115
60116    #[simd_test(enable = "avx512f")]
60117    unsafe fn test_mm_cvtsd_i32() {
60118        let a = _mm_set_pd(1., -1.5);
60119        let r = _mm_cvtsd_i32(a);
60120        let e: i32 = -2;
60121        assert_eq!(r, e);
60122    }
60123
60124    #[simd_test(enable = "avx512f")]
60125    unsafe fn test_mm_cvtsd_u32() {
60126        let a = _mm_set_pd(1., -1.5);
60127        let r = _mm_cvtsd_u32(a);
60128        let e: u32 = u32::MAX;
60129        assert_eq!(r, e);
60130    }
60131
60132    #[simd_test(enable = "avx512f")]
60133    unsafe fn test_mm_cvt_roundi32_ss() {
60134        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60135        let b: i32 = 9;
60136        let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60137        let e = _mm_set_ps(0., -0.5, 1., 9.);
60138        assert_eq_m128(r, e);
60139    }
60140
60141    #[simd_test(enable = "avx512f")]
60142    unsafe fn test_mm_cvt_roundsi32_ss() {
60143        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60144        let b: i32 = 9;
60145        let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60146        let e = _mm_set_ps(0., -0.5, 1., 9.);
60147        assert_eq_m128(r, e);
60148    }
60149
60150    #[simd_test(enable = "avx512f")]
60151    unsafe fn test_mm_cvt_roundu32_ss() {
60152        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60153        let b: u32 = 9;
60154        let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60155        let e = _mm_set_ps(0., -0.5, 1., 9.);
60156        assert_eq_m128(r, e);
60157    }
60158
60159    #[simd_test(enable = "avx512f")]
60160    unsafe fn test_mm_cvti32_ss() {
60161        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60162        let b: i32 = 9;
60163        let r = _mm_cvti32_ss(a, b);
60164        let e = _mm_set_ps(0., -0.5, 1., 9.);
60165        assert_eq_m128(r, e);
60166    }
60167
60168    #[simd_test(enable = "avx512f")]
60169    unsafe fn test_mm_cvti32_sd() {
60170        let a = _mm_set_pd(1., -1.5);
60171        let b: i32 = 9;
60172        let r = _mm_cvti32_sd(a, b);
60173        let e = _mm_set_pd(1., 9.);
60174        assert_eq_m128d(r, e);
60175    }
60176
60177    #[simd_test(enable = "avx512f")]
60178    unsafe fn test_mm_cvtt_roundss_si32() {
60179        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60180        let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
60181        let e: i32 = -1;
60182        assert_eq!(r, e);
60183    }
60184
60185    #[simd_test(enable = "avx512f")]
60186    unsafe fn test_mm_cvtt_roundss_i32() {
60187        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60188        let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
60189        let e: i32 = -1;
60190        assert_eq!(r, e);
60191    }
60192
60193    #[simd_test(enable = "avx512f")]
60194    unsafe fn test_mm_cvtt_roundss_u32() {
60195        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60196        let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
60197        let e: u32 = u32::MAX;
60198        assert_eq!(r, e);
60199    }
60200
60201    #[simd_test(enable = "avx512f")]
60202    unsafe fn test_mm_cvttss_i32() {
60203        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60204        let r = _mm_cvttss_i32(a);
60205        let e: i32 = -1;
60206        assert_eq!(r, e);
60207    }
60208
60209    #[simd_test(enable = "avx512f")]
60210    unsafe fn test_mm_cvttss_u32() {
60211        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60212        let r = _mm_cvttss_u32(a);
60213        let e: u32 = u32::MAX;
60214        assert_eq!(r, e);
60215    }
60216
60217    #[simd_test(enable = "avx512f")]
60218    unsafe fn test_mm_cvtt_roundsd_si32() {
60219        let a = _mm_set_pd(1., -1.5);
60220        let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
60221        let e: i32 = -1;
60222        assert_eq!(r, e);
60223    }
60224
60225    #[simd_test(enable = "avx512f")]
60226    unsafe fn test_mm_cvtt_roundsd_i32() {
60227        let a = _mm_set_pd(1., -1.5);
60228        let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
60229        let e: i32 = -1;
60230        assert_eq!(r, e);
60231    }
60232
60233    #[simd_test(enable = "avx512f")]
60234    unsafe fn test_mm_cvtt_roundsd_u32() {
60235        let a = _mm_set_pd(1., -1.5);
60236        let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
60237        let e: u32 = u32::MAX;
60238        assert_eq!(r, e);
60239    }
60240
60241    #[simd_test(enable = "avx512f")]
60242    unsafe fn test_mm_cvttsd_i32() {
60243        let a = _mm_set_pd(1., -1.5);
60244        let r = _mm_cvttsd_i32(a);
60245        let e: i32 = -1;
60246        assert_eq!(r, e);
60247    }
60248
60249    #[simd_test(enable = "avx512f")]
60250    unsafe fn test_mm_cvttsd_u32() {
60251        let a = _mm_set_pd(1., -1.5);
60252        let r = _mm_cvttsd_u32(a);
60253        let e: u32 = u32::MAX;
60254        assert_eq!(r, e);
60255    }
60256
60257    #[simd_test(enable = "avx512f")]
60258    unsafe fn test_mm_cvtu32_ss() {
60259        let a = _mm_set_ps(0., -0.5, 1., -1.5);
60260        let b: u32 = 9;
60261        let r = _mm_cvtu32_ss(a, b);
60262        let e = _mm_set_ps(0., -0.5, 1., 9.);
60263        assert_eq_m128(r, e);
60264    }
60265
60266    #[simd_test(enable = "avx512f")]
60267    unsafe fn test_mm_cvtu32_sd() {
60268        let a = _mm_set_pd(1., -1.5);
60269        let b: u32 = 9;
60270        let r = _mm_cvtu32_sd(a, b);
60271        let e = _mm_set_pd(1., 9.);
60272        assert_eq_m128d(r, e);
60273    }
60274
60275    #[simd_test(enable = "avx512f")]
60276    unsafe fn test_mm_comi_round_ss() {
60277        let a = _mm_set1_ps(2.2);
60278        let b = _mm_set1_ps(1.1);
60279        let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60280        let e: i32 = 0;
60281        assert_eq!(r, e);
60282    }
60283
60284    #[simd_test(enable = "avx512f")]
60285    unsafe fn test_mm_comi_round_sd() {
60286        let a = _mm_set1_pd(2.2);
60287        let b = _mm_set1_pd(1.1);
60288        let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60289        let e: i32 = 0;
60290        assert_eq!(r, e);
60291    }
60292
60293    #[simd_test(enable = "avx512f")]
60294    unsafe fn test_mm512_cvtsi512_si32() {
60295        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
60296        let r = _mm512_cvtsi512_si32(a);
60297        let e: i32 = 1;
60298        assert_eq!(r, e);
60299    }
60300
60301    #[simd_test(enable = "avx512f")]
60302    unsafe fn test_mm512_cvtss_f32() {
60303        let a = _mm512_setr_ps(
60304            312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
60305        );
60306        assert_eq!(_mm512_cvtss_f32(a), 312.0134);
60307    }
60308
60309    #[simd_test(enable = "avx512f")]
60310    unsafe fn test_mm512_cvtsd_f64() {
60311        let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
60312        assert_eq!(r, -1.1);
60313    }
60314
60315    #[simd_test(enable = "avx512f")]
60316    unsafe fn test_mm512_shuffle_pd() {
60317        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60318        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60319        let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
60320        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60321        assert_eq_m512d(r, e);
60322    }
60323
60324    #[simd_test(enable = "avx512f")]
60325    unsafe fn test_mm512_mask_shuffle_pd() {
60326        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60327        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60328        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
60329        assert_eq_m512d(r, a);
60330        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
60331        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60332        assert_eq_m512d(r, e);
60333    }
60334
60335    #[simd_test(enable = "avx512f")]
60336    unsafe fn test_mm512_maskz_shuffle_pd() {
60337        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60338        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60339        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
60340        assert_eq_m512d(r, _mm512_setzero_pd());
60341        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
60342        let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
60343        assert_eq_m512d(r, e);
60344    }
60345
60346    #[simd_test(enable = "avx512f")]
60347    unsafe fn test_mm512_mask_expandloadu_epi32() {
60348        let src = _mm512_set1_epi32(42);
60349        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60350        let p = a.as_ptr();
60351        let m = 0b11101000_11001010;
60352        let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
60353        let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
60354        assert_eq_m512i(r, e);
60355    }
60356
60357    #[simd_test(enable = "avx512f")]
60358    unsafe fn test_mm512_maskz_expandloadu_epi32() {
60359        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60360        let p = a.as_ptr();
60361        let m = 0b11101000_11001010;
60362        let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
60363        let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
60364        assert_eq_m512i(r, e);
60365    }
60366
60367    #[simd_test(enable = "avx512f,avx512vl")]
60368    unsafe fn test_mm256_mask_expandloadu_epi32() {
60369        let src = _mm256_set1_epi32(42);
60370        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60371        let p = a.as_ptr();
60372        let m = 0b11101000;
60373        let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
60374        let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
60375        assert_eq_m256i(r, e);
60376    }
60377
60378    #[simd_test(enable = "avx512f,avx512vl")]
60379    unsafe fn test_mm256_maskz_expandloadu_epi32() {
60380        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60381        let p = a.as_ptr();
60382        let m = 0b11101000;
60383        let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
60384        let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
60385        assert_eq_m256i(r, e);
60386    }
60387
60388    #[simd_test(enable = "avx512f,avx512vl")]
60389    unsafe fn test_mm_mask_expandloadu_epi32() {
60390        let src = _mm_set1_epi32(42);
60391        let a = &[1_i32, 2, 3, 4];
60392        let p = a.as_ptr();
60393        let m = 0b11111000;
60394        let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
60395        let e = _mm_set_epi32(1, 42, 42, 42);
60396        assert_eq_m128i(r, e);
60397    }
60398
60399    #[simd_test(enable = "avx512f,avx512vl")]
60400    unsafe fn test_mm_maskz_expandloadu_epi32() {
60401        let a = &[1_i32, 2, 3, 4];
60402        let p = a.as_ptr();
60403        let m = 0b11111000;
60404        let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
60405        let e = _mm_set_epi32(1, 0, 0, 0);
60406        assert_eq_m128i(r, e);
60407    }
60408
60409    #[simd_test(enable = "avx512f")]
60410    unsafe fn test_mm512_mask_expandloadu_epi64() {
60411        let src = _mm512_set1_epi64(42);
60412        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60413        let p = a.as_ptr();
60414        let m = 0b11101000;
60415        let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
60416        let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
60417        assert_eq_m512i(r, e);
60418    }
60419
60420    #[simd_test(enable = "avx512f")]
60421    unsafe fn test_mm512_maskz_expandloadu_epi64() {
60422        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60423        let p = a.as_ptr();
60424        let m = 0b11101000;
60425        let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
60426        let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
60427        assert_eq_m512i(r, e);
60428    }
60429
60430    #[simd_test(enable = "avx512f,avx512vl")]
60431    unsafe fn test_mm256_mask_expandloadu_epi64() {
60432        let src = _mm256_set1_epi64x(42);
60433        let a = &[1_i64, 2, 3, 4];
60434        let p = a.as_ptr();
60435        let m = 0b11101000;
60436        let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
60437        let e = _mm256_set_epi64x(1, 42, 42, 42);
60438        assert_eq_m256i(r, e);
60439    }
60440
60441    #[simd_test(enable = "avx512f,avx512vl")]
60442    unsafe fn test_mm256_maskz_expandloadu_epi64() {
60443        let a = &[1_i64, 2, 3, 4];
60444        let p = a.as_ptr();
60445        let m = 0b11101000;
60446        let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
60447        let e = _mm256_set_epi64x(1, 0, 0, 0);
60448        assert_eq_m256i(r, e);
60449    }
60450
60451    #[simd_test(enable = "avx512f,avx512vl")]
60452    unsafe fn test_mm_mask_expandloadu_epi64() {
60453        let src = _mm_set1_epi64x(42);
60454        let a = &[1_i64, 2];
60455        let p = a.as_ptr();
60456        let m = 0b11101000;
60457        let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
60458        let e = _mm_set_epi64x(42, 42);
60459        assert_eq_m128i(r, e);
60460    }
60461
60462    #[simd_test(enable = "avx512f,avx512vl")]
60463    unsafe fn test_mm_maskz_expandloadu_epi64() {
60464        let a = &[1_i64, 2];
60465        let p = a.as_ptr();
60466        let m = 0b11101000;
60467        let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
60468        let e = _mm_set_epi64x(0, 0);
60469        assert_eq_m128i(r, e);
60470    }
60471
60472    #[simd_test(enable = "avx512f")]
60473    unsafe fn test_mm512_mask_expandloadu_ps() {
60474        let src = _mm512_set1_ps(42.);
60475        let a = &[
60476            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60477        ];
60478        let p = a.as_ptr();
60479        let m = 0b11101000_11001010;
60480        let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
60481        let e = _mm512_set_ps(
60482            8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
60483        );
60484        assert_eq_m512(r, e);
60485    }
60486
60487    #[simd_test(enable = "avx512f")]
60488    unsafe fn test_mm512_maskz_expandloadu_ps() {
60489        let a = &[
60490            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60491        ];
60492        let p = a.as_ptr();
60493        let m = 0b11101000_11001010;
60494        let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
60495        let e = _mm512_set_ps(
60496            8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
60497        );
60498        assert_eq_m512(r, e);
60499    }
60500
60501    #[simd_test(enable = "avx512f,avx512vl")]
60502    unsafe fn test_mm256_mask_expandloadu_ps() {
60503        let src = _mm256_set1_ps(42.);
60504        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60505        let p = a.as_ptr();
60506        let m = 0b11101000;
60507        let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
60508        let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
60509        assert_eq_m256(r, e);
60510    }
60511
60512    #[simd_test(enable = "avx512f,avx512vl")]
60513    unsafe fn test_mm256_maskz_expandloadu_ps() {
60514        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60515        let p = a.as_ptr();
60516        let m = 0b11101000;
60517        let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
60518        let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
60519        assert_eq_m256(r, e);
60520    }
60521
60522    #[simd_test(enable = "avx512f,avx512vl")]
60523    unsafe fn test_mm_mask_expandloadu_ps() {
60524        let src = _mm_set1_ps(42.);
60525        let a = &[1.0f32, 2., 3., 4.];
60526        let p = a.as_ptr();
60527        let m = 0b11101000;
60528        let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
60529        let e = _mm_set_ps(1., 42., 42., 42.);
60530        assert_eq_m128(r, e);
60531    }
60532
60533    #[simd_test(enable = "avx512f,avx512vl")]
60534    unsafe fn test_mm_maskz_expandloadu_ps() {
60535        let a = &[1.0f32, 2., 3., 4.];
60536        let p = a.as_ptr();
60537        let m = 0b11101000;
60538        let r = _mm_maskz_expandloadu_ps(m, black_box(p));
60539        let e = _mm_set_ps(1., 0., 0., 0.);
60540        assert_eq_m128(r, e);
60541    }
60542
60543    #[simd_test(enable = "avx512f")]
60544    unsafe fn test_mm512_mask_expandloadu_pd() {
60545        let src = _mm512_set1_pd(42.);
60546        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60547        let p = a.as_ptr();
60548        let m = 0b11101000;
60549        let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
60550        let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
60551        assert_eq_m512d(r, e);
60552    }
60553
60554    #[simd_test(enable = "avx512f")]
60555    unsafe fn test_mm512_maskz_expandloadu_pd() {
60556        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60557        let p = a.as_ptr();
60558        let m = 0b11101000;
60559        let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
60560        let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
60561        assert_eq_m512d(r, e);
60562    }
60563
60564    #[simd_test(enable = "avx512f,avx512vl")]
60565    unsafe fn test_mm256_mask_expandloadu_pd() {
60566        let src = _mm256_set1_pd(42.);
60567        let a = &[1.0f64, 2., 3., 4.];
60568        let p = a.as_ptr();
60569        let m = 0b11101000;
60570        let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
60571        let e = _mm256_set_pd(1., 42., 42., 42.);
60572        assert_eq_m256d(r, e);
60573    }
60574
60575    #[simd_test(enable = "avx512f,avx512vl")]
60576    unsafe fn test_mm256_maskz_expandloadu_pd() {
60577        let a = &[1.0f64, 2., 3., 4.];
60578        let p = a.as_ptr();
60579        let m = 0b11101000;
60580        let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
60581        let e = _mm256_set_pd(1., 0., 0., 0.);
60582        assert_eq_m256d(r, e);
60583    }
60584
60585    #[simd_test(enable = "avx512f,avx512vl")]
60586    unsafe fn test_mm_mask_expandloadu_pd() {
60587        let src = _mm_set1_pd(42.);
60588        let a = &[1.0f64, 2.];
60589        let p = a.as_ptr();
60590        let m = 0b11101000;
60591        let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
60592        let e = _mm_set_pd(42., 42.);
60593        assert_eq_m128d(r, e);
60594    }
60595
60596    #[simd_test(enable = "avx512f,avx512vl")]
60597    unsafe fn test_mm_maskz_expandloadu_pd() {
60598        let a = &[1.0f64, 2.];
60599        let p = a.as_ptr();
60600        let m = 0b11101000;
60601        let r = _mm_maskz_expandloadu_pd(m, black_box(p));
60602        let e = _mm_set_pd(0., 0.);
60603        assert_eq_m128d(r, e);
60604    }
60605}