1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19 unsafe { vpmadd52huq_512(a, b, c) }
20}
21
22#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
35 unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) }
36}
37
38#[inline]
47#[target_feature(enable = "avx512ifma")]
48#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
49#[cfg_attr(test, assert_instr(vpmadd52huq))]
50pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
51 unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) }
52}
53
54#[inline]
62#[target_feature(enable = "avx512ifma")]
63#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
64#[cfg_attr(test, assert_instr(vpmadd52luq))]
65pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
66 unsafe { vpmadd52luq_512(a, b, c) }
67}
68
69#[inline]
78#[target_feature(enable = "avx512ifma")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpmadd52luq))]
81pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
82 unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) }
83}
84
85#[inline]
94#[target_feature(enable = "avx512ifma")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpmadd52luq))]
97pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
98 unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) }
99}
100
101#[inline]
109#[target_feature(enable = "avxifma")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(
112 all(test, any(target_os = "linux", target_env = "msvc")),
113 assert_instr(vpmadd52huq)
114)]
115pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
116 unsafe { vpmadd52huq_256(a, b, c) }
117}
118
119#[inline]
127#[target_feature(enable = "avx512ifma,avx512vl")]
128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
129#[cfg_attr(test, assert_instr(vpmadd52huq))]
130pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
131 unsafe { vpmadd52huq_256(a, b, c) }
132}
133
134#[inline]
143#[target_feature(enable = "avx512ifma,avx512vl")]
144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
145#[cfg_attr(test, assert_instr(vpmadd52huq))]
146pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
147 unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) }
148}
149
150#[inline]
159#[target_feature(enable = "avx512ifma,avx512vl")]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161#[cfg_attr(test, assert_instr(vpmadd52huq))]
162pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
163 unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) }
164}
165
166#[inline]
174#[target_feature(enable = "avxifma")]
175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
176#[cfg_attr(
177 all(test, any(target_os = "linux", target_env = "msvc")),
178 assert_instr(vpmadd52luq)
179)]
180pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
181 unsafe { vpmadd52luq_256(a, b, c) }
182}
183
184#[inline]
192#[target_feature(enable = "avx512ifma,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpmadd52luq))]
195pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
196 unsafe { vpmadd52luq_256(a, b, c) }
197}
198
199#[inline]
208#[target_feature(enable = "avx512ifma,avx512vl")]
209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
210#[cfg_attr(test, assert_instr(vpmadd52luq))]
211pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
212 unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) }
213}
214
215#[inline]
224#[target_feature(enable = "avx512ifma,avx512vl")]
225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226#[cfg_attr(test, assert_instr(vpmadd52luq))]
227pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
228 unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) }
229}
230
231#[inline]
239#[target_feature(enable = "avxifma")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(
242 all(test, any(target_os = "linux", target_env = "msvc")),
243 assert_instr(vpmadd52huq)
244)]
245pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
246 unsafe { vpmadd52huq_128(a, b, c) }
247}
248
249#[inline]
257#[target_feature(enable = "avx512ifma,avx512vl")]
258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
259#[cfg_attr(test, assert_instr(vpmadd52huq))]
260pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
261 unsafe { vpmadd52huq_128(a, b, c) }
262}
263
264#[inline]
273#[target_feature(enable = "avx512ifma,avx512vl")]
274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
275#[cfg_attr(test, assert_instr(vpmadd52huq))]
276pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
277 unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) }
278}
279
280#[inline]
289#[target_feature(enable = "avx512ifma,avx512vl")]
290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
291#[cfg_attr(test, assert_instr(vpmadd52huq))]
292pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
293 unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) }
294}
295
296#[inline]
304#[target_feature(enable = "avxifma")]
305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
306#[cfg_attr(
307 all(test, any(target_os = "linux", target_env = "msvc")),
308 assert_instr(vpmadd52luq)
309)]
310pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
311 unsafe { vpmadd52luq_128(a, b, c) }
312}
313
314#[inline]
322#[target_feature(enable = "avx512ifma,avx512vl")]
323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
324#[cfg_attr(test, assert_instr(vpmadd52luq))]
325pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
326 unsafe { vpmadd52luq_128(a, b, c) }
327}
328
329#[inline]
338#[target_feature(enable = "avx512ifma,avx512vl")]
339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
340#[cfg_attr(test, assert_instr(vpmadd52luq))]
341pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
342 unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) }
343}
344
345#[inline]
354#[target_feature(enable = "avx512ifma,avx512vl")]
355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
356#[cfg_attr(test, assert_instr(vpmadd52luq))]
357pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
358 unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) }
359}
360
361#[allow(improper_ctypes)]
362unsafe extern "C" {
363 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
364 fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
365 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
366 fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
367 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
368 fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
369 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
370 fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
371 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
372 fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
373 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
374 fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
375}
376
377#[cfg(test)]
378mod tests {
379
380 use stdarch_test::simd_test;
381
382 use crate::core_arch::x86::*;
383
384 const K: __mmask8 = 0b01101101;
385
386 #[simd_test(enable = "avx512ifma")]
387 unsafe fn test_mm512_madd52hi_epu64() {
388 let a = _mm512_set1_epi64(10 << 40);
389 let b = _mm512_set1_epi64((11 << 40) + 4);
390 let c = _mm512_set1_epi64((12 << 40) + 3);
391
392 let actual = _mm512_madd52hi_epu64(a, b, c);
393
394 let expected = _mm512_set1_epi64(11030549757952);
396
397 assert_eq_m512i(expected, actual);
398 }
399
400 #[simd_test(enable = "avx512ifma")]
401 unsafe fn test_mm512_mask_madd52hi_epu64() {
402 let a = _mm512_set1_epi64(10 << 40);
403 let b = _mm512_set1_epi64((11 << 40) + 4);
404 let c = _mm512_set1_epi64((12 << 40) + 3);
405
406 let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
407
408 let mut expected = _mm512_set1_epi64(11030549757952);
410 expected = _mm512_mask_blend_epi64(K, a, expected);
411
412 assert_eq_m512i(expected, actual);
413 }
414
415 #[simd_test(enable = "avx512ifma")]
416 unsafe fn test_mm512_maskz_madd52hi_epu64() {
417 let a = _mm512_set1_epi64(10 << 40);
418 let b = _mm512_set1_epi64((11 << 40) + 4);
419 let c = _mm512_set1_epi64((12 << 40) + 3);
420
421 let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
422
423 let mut expected = _mm512_set1_epi64(11030549757952);
425 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
426
427 assert_eq_m512i(expected, actual);
428 }
429
430 #[simd_test(enable = "avx512ifma")]
431 unsafe fn test_mm512_madd52lo_epu64() {
432 let a = _mm512_set1_epi64(10 << 40);
433 let b = _mm512_set1_epi64((11 << 40) + 4);
434 let c = _mm512_set1_epi64((12 << 40) + 3);
435
436 let actual = _mm512_madd52lo_epu64(a, b, c);
437
438 let expected = _mm512_set1_epi64(100055558127628);
440
441 assert_eq_m512i(expected, actual);
442 }
443
444 #[simd_test(enable = "avx512ifma")]
445 unsafe fn test_mm512_mask_madd52lo_epu64() {
446 let a = _mm512_set1_epi64(10 << 40);
447 let b = _mm512_set1_epi64((11 << 40) + 4);
448 let c = _mm512_set1_epi64((12 << 40) + 3);
449
450 let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
451
452 let mut expected = _mm512_set1_epi64(100055558127628);
454 expected = _mm512_mask_blend_epi64(K, a, expected);
455
456 assert_eq_m512i(expected, actual);
457 }
458
459 #[simd_test(enable = "avx512ifma")]
460 unsafe fn test_mm512_maskz_madd52lo_epu64() {
461 let a = _mm512_set1_epi64(10 << 40);
462 let b = _mm512_set1_epi64((11 << 40) + 4);
463 let c = _mm512_set1_epi64((12 << 40) + 3);
464
465 let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
466
467 let mut expected = _mm512_set1_epi64(100055558127628);
469 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
470
471 assert_eq_m512i(expected, actual);
472 }
473
474 #[simd_test(enable = "avxifma")]
475 unsafe fn test_mm256_madd52hi_avx_epu64() {
476 let a = _mm256_set1_epi64x(10 << 40);
477 let b = _mm256_set1_epi64x((11 << 40) + 4);
478 let c = _mm256_set1_epi64x((12 << 40) + 3);
479
480 let actual = _mm256_madd52hi_avx_epu64(a, b, c);
481
482 let expected = _mm256_set1_epi64x(11030549757952);
484
485 assert_eq_m256i(expected, actual);
486 }
487
488 #[simd_test(enable = "avx512ifma,avx512vl")]
489 unsafe fn test_mm256_madd52hi_epu64() {
490 let a = _mm256_set1_epi64x(10 << 40);
491 let b = _mm256_set1_epi64x((11 << 40) + 4);
492 let c = _mm256_set1_epi64x((12 << 40) + 3);
493
494 let actual = _mm256_madd52hi_epu64(a, b, c);
495
496 let expected = _mm256_set1_epi64x(11030549757952);
498
499 assert_eq_m256i(expected, actual);
500 }
501
502 #[simd_test(enable = "avx512ifma,avx512vl")]
503 unsafe fn test_mm256_mask_madd52hi_epu64() {
504 let a = _mm256_set1_epi64x(10 << 40);
505 let b = _mm256_set1_epi64x((11 << 40) + 4);
506 let c = _mm256_set1_epi64x((12 << 40) + 3);
507
508 let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
509
510 let mut expected = _mm256_set1_epi64x(11030549757952);
512 expected = _mm256_mask_blend_epi64(K, a, expected);
513
514 assert_eq_m256i(expected, actual);
515 }
516
517 #[simd_test(enable = "avx512ifma,avx512vl")]
518 unsafe fn test_mm256_maskz_madd52hi_epu64() {
519 let a = _mm256_set1_epi64x(10 << 40);
520 let b = _mm256_set1_epi64x((11 << 40) + 4);
521 let c = _mm256_set1_epi64x((12 << 40) + 3);
522
523 let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
524
525 let mut expected = _mm256_set1_epi64x(11030549757952);
527 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
528
529 assert_eq_m256i(expected, actual);
530 }
531
532 #[simd_test(enable = "avxifma")]
533 unsafe fn test_mm256_madd52lo_avx_epu64() {
534 let a = _mm256_set1_epi64x(10 << 40);
535 let b = _mm256_set1_epi64x((11 << 40) + 4);
536 let c = _mm256_set1_epi64x((12 << 40) + 3);
537
538 let actual = _mm256_madd52lo_avx_epu64(a, b, c);
539
540 let expected = _mm256_set1_epi64x(100055558127628);
542
543 assert_eq_m256i(expected, actual);
544 }
545
546 #[simd_test(enable = "avx512ifma,avx512vl")]
547 unsafe fn test_mm256_madd52lo_epu64() {
548 let a = _mm256_set1_epi64x(10 << 40);
549 let b = _mm256_set1_epi64x((11 << 40) + 4);
550 let c = _mm256_set1_epi64x((12 << 40) + 3);
551
552 let actual = _mm256_madd52lo_epu64(a, b, c);
553
554 let expected = _mm256_set1_epi64x(100055558127628);
556
557 assert_eq_m256i(expected, actual);
558 }
559
560 #[simd_test(enable = "avx512ifma,avx512vl")]
561 unsafe fn test_mm256_mask_madd52lo_epu64() {
562 let a = _mm256_set1_epi64x(10 << 40);
563 let b = _mm256_set1_epi64x((11 << 40) + 4);
564 let c = _mm256_set1_epi64x((12 << 40) + 3);
565
566 let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
567
568 let mut expected = _mm256_set1_epi64x(100055558127628);
570 expected = _mm256_mask_blend_epi64(K, a, expected);
571
572 assert_eq_m256i(expected, actual);
573 }
574
575 #[simd_test(enable = "avx512ifma,avx512vl")]
576 unsafe fn test_mm256_maskz_madd52lo_epu64() {
577 let a = _mm256_set1_epi64x(10 << 40);
578 let b = _mm256_set1_epi64x((11 << 40) + 4);
579 let c = _mm256_set1_epi64x((12 << 40) + 3);
580
581 let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
582
583 let mut expected = _mm256_set1_epi64x(100055558127628);
585 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
586
587 assert_eq_m256i(expected, actual);
588 }
589
590 #[simd_test(enable = "avxifma")]
591 unsafe fn test_mm_madd52hi_avx_epu64() {
592 let a = _mm_set1_epi64x(10 << 40);
593 let b = _mm_set1_epi64x((11 << 40) + 4);
594 let c = _mm_set1_epi64x((12 << 40) + 3);
595
596 let actual = _mm_madd52hi_avx_epu64(a, b, c);
597
598 let expected = _mm_set1_epi64x(11030549757952);
600
601 assert_eq_m128i(expected, actual);
602 }
603
604 #[simd_test(enable = "avx512ifma,avx512vl")]
605 unsafe fn test_mm_madd52hi_epu64() {
606 let a = _mm_set1_epi64x(10 << 40);
607 let b = _mm_set1_epi64x((11 << 40) + 4);
608 let c = _mm_set1_epi64x((12 << 40) + 3);
609
610 let actual = _mm_madd52hi_epu64(a, b, c);
611
612 let expected = _mm_set1_epi64x(11030549757952);
614
615 assert_eq_m128i(expected, actual);
616 }
617
618 #[simd_test(enable = "avx512ifma,avx512vl")]
619 unsafe fn test_mm_mask_madd52hi_epu64() {
620 let a = _mm_set1_epi64x(10 << 40);
621 let b = _mm_set1_epi64x((11 << 40) + 4);
622 let c = _mm_set1_epi64x((12 << 40) + 3);
623
624 let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
625
626 let mut expected = _mm_set1_epi64x(11030549757952);
628 expected = _mm_mask_blend_epi64(K, a, expected);
629
630 assert_eq_m128i(expected, actual);
631 }
632
633 #[simd_test(enable = "avx512ifma,avx512vl")]
634 unsafe fn test_mm_maskz_madd52hi_epu64() {
635 let a = _mm_set1_epi64x(10 << 40);
636 let b = _mm_set1_epi64x((11 << 40) + 4);
637 let c = _mm_set1_epi64x((12 << 40) + 3);
638
639 let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
640
641 let mut expected = _mm_set1_epi64x(11030549757952);
643 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
644
645 assert_eq_m128i(expected, actual);
646 }
647
648 #[simd_test(enable = "avxifma")]
649 unsafe fn test_mm_madd52lo_avx_epu64() {
650 let a = _mm_set1_epi64x(10 << 40);
651 let b = _mm_set1_epi64x((11 << 40) + 4);
652 let c = _mm_set1_epi64x((12 << 40) + 3);
653
654 let actual = _mm_madd52lo_avx_epu64(a, b, c);
655
656 let expected = _mm_set1_epi64x(100055558127628);
658
659 assert_eq_m128i(expected, actual);
660 }
661
662 #[simd_test(enable = "avx512ifma,avx512vl")]
663 unsafe fn test_mm_madd52lo_epu64() {
664 let a = _mm_set1_epi64x(10 << 40);
665 let b = _mm_set1_epi64x((11 << 40) + 4);
666 let c = _mm_set1_epi64x((12 << 40) + 3);
667
668 let actual = _mm_madd52lo_epu64(a, b, c);
669
670 let expected = _mm_set1_epi64x(100055558127628);
672
673 assert_eq_m128i(expected, actual);
674 }
675
676 #[simd_test(enable = "avx512ifma,avx512vl")]
677 unsafe fn test_mm_mask_madd52lo_epu64() {
678 let a = _mm_set1_epi64x(10 << 40);
679 let b = _mm_set1_epi64x((11 << 40) + 4);
680 let c = _mm_set1_epi64x((12 << 40) + 3);
681
682 let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
683
684 let mut expected = _mm_set1_epi64x(100055558127628);
686 expected = _mm_mask_blend_epi64(K, a, expected);
687
688 assert_eq_m128i(expected, actual);
689 }
690
691 #[simd_test(enable = "avx512ifma,avx512vl")]
692 unsafe fn test_mm_maskz_madd52lo_epu64() {
693 let a = _mm_set1_epi64x(10 << 40);
694 let b = _mm_set1_epi64x((11 << 40) + 4);
695 let c = _mm_set1_epi64x((12 << 40) + 3);
696
697 let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
698
699 let mut expected = _mm_set1_epi64x(100055558127628);
701 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
702
703 assert_eq_m128i(expected, actual);
704 }
705}