highway/x86/
v4x64u.rs

1#![allow(unsafe_code)]
2use core::arch::x86_64::*;
3use core::ops::{
4    Add, AddAssign, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, SubAssign,
5};
6
7#[derive(Clone, Copy)]
8pub struct V4x64U(pub __m256i);
9
10impl Default for V4x64U {
11    #[inline]
12    fn default() -> Self {
13        unsafe { V4x64U::zeroed() }
14    }
15}
16
17impl core::fmt::Debug for V4x64U {
18    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
19        write!(f, "V4x64U: {:?}", unsafe { self.as_arr() })
20    }
21}
22
23macro_rules! _mm_shuffle {
24    ($z:expr, $y:expr, $x:expr, $w:expr) => {
25        ($z << 6) | ($y << 4) | ($x << 2) | $w
26    };
27}
28
29impl V4x64U {
30    #[inline]
31    #[target_feature(enable = "avx2")]
32    pub unsafe fn zeroed() -> Self {
33        V4x64U(_mm256_setzero_si256())
34    }
35
36    #[inline]
37    #[target_feature(enable = "avx2")]
38    pub unsafe fn new(highest: u64, high: u64, low: u64, lowest: u64) -> Self {
39        V4x64U(_mm256_set_epi64x(
40            highest as i64,
41            high as i64,
42            low as i64,
43            lowest as i64,
44        ))
45    }
46
47    #[target_feature(enable = "avx2")]
48    pub unsafe fn as_arr(&self) -> [u64; 4] {
49        let mut arr: [u64; 4] = [0; 4];
50        _mm256_storeu_si256(arr.as_mut_ptr().cast::<__m256i>(), self.0);
51        arr
52    }
53
54    #[inline]
55    #[target_feature(enable = "avx2")]
56    pub unsafe fn rotate_by_32(&self) -> Self {
57        V4x64U(_mm256_shuffle_epi32(self.0, _mm_shuffle!(2, 3, 0, 1)))
58    }
59
60    #[inline]
61    #[target_feature(enable = "avx2")]
62    pub unsafe fn shr_by_32(&self) -> Self {
63        V4x64U(_mm256_srli_epi64(self.0, 32))
64    }
65
66    #[inline]
67    #[target_feature(enable = "avx2")]
68    pub unsafe fn shuffle(&self, mask: &V4x64U) -> Self {
69        V4x64U::from(_mm256_shuffle_epi8(self.0, mask.0))
70    }
71
72    #[inline]
73    #[target_feature(enable = "avx2")]
74    pub unsafe fn mul_low32(&self, x: &V4x64U) -> Self {
75        V4x64U::from(_mm256_mul_epu32(self.0, x.0))
76    }
77
78    #[inline]
79    #[target_feature(enable = "avx2")]
80    pub unsafe fn and_not(&self, neg_mask: &V4x64U) -> Self {
81        V4x64U::from(_mm256_andnot_si256(neg_mask.0, self.0))
82    }
83
84    #[inline]
85    #[target_feature(enable = "avx2")]
86    unsafe fn add_assign(&mut self, other: Self) {
87        self.0 = _mm256_add_epi64(self.0, other.0);
88    }
89
90    #[inline]
91    #[target_feature(enable = "avx2")]
92    unsafe fn sub_assign(&mut self, other: Self) {
93        self.0 = _mm256_sub_epi64(self.0, other.0);
94    }
95
96    #[inline]
97    #[target_feature(enable = "avx2")]
98    unsafe fn bitand_assign(&mut self, other: Self) {
99        self.0 = _mm256_and_si256(self.0, other.0);
100    }
101
102    #[inline]
103    #[target_feature(enable = "avx2")]
104    unsafe fn bitor_assign(&mut self, other: Self) {
105        self.0 = _mm256_or_si256(self.0, other.0);
106    }
107
108    #[inline]
109    #[target_feature(enable = "avx2")]
110    unsafe fn bitxor_assign(&mut self, other: Self) {
111        self.0 = _mm256_xor_si256(self.0, other.0);
112    }
113}
114
115impl From<__m256i> for V4x64U {
116    #[inline]
117    fn from(v: __m256i) -> Self {
118        V4x64U(v)
119    }
120}
121
122impl AddAssign for V4x64U {
123    #[inline]
124    fn add_assign(&mut self, other: Self) {
125        unsafe { self.add_assign(other) }
126    }
127}
128
129impl SubAssign for V4x64U {
130    #[inline]
131    fn sub_assign(&mut self, other: Self) {
132        unsafe { self.sub_assign(other) }
133    }
134}
135
136impl BitAndAssign for V4x64U {
137    #[inline]
138    fn bitand_assign(&mut self, other: Self) {
139        unsafe { self.bitand_assign(other) }
140    }
141}
142
143impl BitAnd for V4x64U {
144    type Output = Self;
145    #[inline]
146    fn bitand(self, other: Self) -> Self {
147        let mut new = V4x64U(self.0);
148        new &= other;
149        new
150    }
151}
152
153impl BitOrAssign for V4x64U {
154    #[inline]
155    fn bitor_assign(&mut self, other: Self) {
156        unsafe { self.bitor_assign(other) }
157    }
158}
159
160impl BitOr for V4x64U {
161    type Output = Self;
162    #[inline]
163    fn bitor(self, other: Self) -> Self {
164        let mut new = V4x64U(self.0);
165        new |= other;
166        new
167    }
168}
169
170impl BitXorAssign for V4x64U {
171    #[inline]
172    fn bitxor_assign(&mut self, other: Self) {
173        unsafe { self.bitxor_assign(other) }
174    }
175}
176
177impl Add for V4x64U {
178    type Output = Self;
179
180    #[inline]
181    fn add(self, other: Self) -> Self {
182        let mut new = V4x64U(self.0);
183        new += other;
184        new
185    }
186}
187
188impl BitXor for V4x64U {
189    type Output = Self;
190
191    #[inline]
192    fn bitxor(self, other: Self) -> Self {
193        let mut new = V4x64U(self.0);
194        new ^= other;
195        new
196    }
197}