highway/x86/
v2x64u.rs

1#![allow(unsafe_code)]
2use core::arch::x86_64::*;
3use core::ops::{
4    Add, AddAssign, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, ShlAssign,
5    ShrAssign, SubAssign,
6};
7
8#[derive(Clone, Copy)]
9pub struct V2x64U(pub __m128i);
10
11impl Default for V2x64U {
12    #[inline]
13    fn default() -> Self {
14        unsafe { V2x64U::zeroed() }
15    }
16}
17
18impl core::fmt::Debug for V2x64U {
19    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
20        write!(f, "V2x64U: {:?}", unsafe { self.as_arr() })
21    }
22}
23
24impl V2x64U {
25    #[inline]
26    #[target_feature(enable = "sse4.1")]
27    unsafe fn zeroed() -> Self {
28        V2x64U(_mm_setzero_si128())
29    }
30
31    #[inline]
32    #[target_feature(enable = "sse4.1")]
33    pub unsafe fn new(hi: u64, low: u64) -> Self {
34        V2x64U(_mm_set_epi64x(hi as i64, low as i64))
35    }
36
37    #[target_feature(enable = "sse4.1")]
38    pub unsafe fn as_arr(&self) -> [u64; 2] {
39        let mut arr: [u64; 2] = [0, 0];
40        _mm_storeu_si128(arr.as_mut_ptr().cast::<__m128i>(), self.0);
41        arr
42    }
43
44    #[inline]
45    #[target_feature(enable = "sse4.1")]
46    pub unsafe fn rotate_by_32(&self) -> Self {
47        V2x64U(_mm_shuffle_epi32(self.0, _mm_shuffle!(2, 3, 0, 1)))
48    }
49
50    #[inline]
51    #[target_feature(enable = "sse4.1")]
52    pub unsafe fn shuffle(&self, mask: &V2x64U) -> Self {
53        V2x64U::from(_mm_shuffle_epi8(self.0, mask.0))
54    }
55
56    #[inline]
57    #[target_feature(enable = "sse4.1")]
58    pub unsafe fn and_not(&self, neg_mask: &V2x64U) -> Self {
59        V2x64U::from(_mm_andnot_si128(neg_mask.0, self.0))
60    }
61
62    #[inline]
63    #[target_feature(enable = "sse4.1")]
64    unsafe fn add_assign(&mut self, other: Self) {
65        self.0 = _mm_add_epi64(self.0, other.0);
66    }
67
68    #[inline]
69    #[target_feature(enable = "sse4.1")]
70    unsafe fn sub_assign(&mut self, other: Self) {
71        self.0 = _mm_sub_epi64(self.0, other.0);
72    }
73
74    #[inline]
75    #[target_feature(enable = "sse4.1")]
76    unsafe fn bitand_assign(&mut self, other: Self) {
77        self.0 = _mm_and_si128(self.0, other.0);
78    }
79
80    #[inline]
81    #[target_feature(enable = "sse4.1")]
82    unsafe fn bitor_assign(&mut self, other: Self) {
83        self.0 = _mm_or_si128(self.0, other.0);
84    }
85
86    #[inline]
87    #[target_feature(enable = "sse4.1")]
88    unsafe fn bitxor_assign(&mut self, other: Self) {
89        self.0 = _mm_xor_si128(self.0, other.0);
90    }
91
92    #[inline]
93    #[target_feature(enable = "sse4.1")]
94    unsafe fn shl_assign(&mut self, count: __m128i) {
95        self.0 = _mm_sll_epi64(self.0, count);
96    }
97
98    #[inline]
99    #[target_feature(enable = "sse4.1")]
100    unsafe fn shr_assign(&mut self, count: __m128i) {
101        self.0 = _mm_srl_epi64(self.0, count);
102    }
103}
104
105impl From<__m128i> for V2x64U {
106    #[inline]
107    fn from(v: __m128i) -> Self {
108        V2x64U(v)
109    }
110}
111
112impl AddAssign for V2x64U {
113    #[inline]
114    fn add_assign(&mut self, other: Self) {
115        unsafe { self.add_assign(other) }
116    }
117}
118
119impl SubAssign for V2x64U {
120    #[inline]
121    fn sub_assign(&mut self, other: Self) {
122        unsafe { self.sub_assign(other) }
123    }
124}
125
126impl BitAndAssign for V2x64U {
127    #[inline]
128    fn bitand_assign(&mut self, other: Self) {
129        unsafe { self.bitand_assign(other) }
130    }
131}
132
133impl BitAnd for V2x64U {
134    type Output = Self;
135    #[inline]
136    fn bitand(self, other: Self) -> Self {
137        let mut new = V2x64U(self.0);
138        new &= other;
139        new
140    }
141}
142
143impl BitOrAssign for V2x64U {
144    #[inline]
145    fn bitor_assign(&mut self, other: Self) {
146        unsafe { self.bitor_assign(other) }
147    }
148}
149
150impl BitOr for V2x64U {
151    type Output = Self;
152    #[inline]
153    fn bitor(self, other: Self) -> Self {
154        let mut new = V2x64U(self.0);
155        new |= other;
156        new
157    }
158}
159
160impl BitXorAssign for V2x64U {
161    #[inline]
162    fn bitxor_assign(&mut self, other: Self) {
163        unsafe { self.bitxor_assign(other) }
164    }
165}
166
167impl Add for V2x64U {
168    type Output = Self;
169
170    #[inline]
171    fn add(self, other: Self) -> Self {
172        let mut new = V2x64U(self.0);
173        new += other;
174        new
175    }
176}
177
178impl BitXor for V2x64U {
179    type Output = Self;
180
181    #[inline]
182    fn bitxor(self, other: Self) -> Self {
183        let mut new = V2x64U(self.0);
184        new ^= other;
185        new
186    }
187}
188
189impl ShlAssign<__m128i> for V2x64U {
190    #[inline]
191    fn shl_assign(&mut self, count: __m128i) {
192        unsafe { self.shl_assign(count) }
193    }
194}
195
196impl ShrAssign<__m128i> for V2x64U {
197    #[inline]
198    fn shr_assign(&mut self, count: __m128i) {
199        unsafe { self.shr_assign(count) }
200    }
201}
202
203#[cfg(test)]
204pub mod tests {
205    use super::*;
206
207    #[cfg_attr(miri, ignore)]
208    #[test]
209    fn test_as_arr() {
210        unsafe {
211            let x = V2x64U::new(55, 1);
212            let res = x.as_arr();
213            assert_eq!(res, [1, 55]);
214        }
215    }
216
217    #[cfg_attr(miri, ignore)]
218    #[test]
219    fn test_rotate_by_32() {
220        unsafe {
221            let x = V2x64U::new(0x0264_432C_CD8A_70E0, 0x0B28_E3EF_EBB3_172D);
222            let y = x.rotate_by_32();
223            let res = y.as_arr();
224            assert_eq!(res, [0xEBB3_172D_0B28_E3EF, 0xCD8A_70E0_0264_432C]);
225        }
226    }
227
228    #[cfg_attr(miri, ignore)]
229    #[test]
230    fn test_add() {
231        unsafe {
232            let x = V2x64U::new(55, 1);
233            let y = V2x64U::new(0x0264_432C_CD8A_70E0, 0x0B28E_3EFE_BB3_172D);
234            let z = x + y;
235            assert_eq!(z.as_arr(), [0x0B28_E3EF_EBB3_172E, 0x2644_32CC_D8A7_117]);
236        }
237    }
238
239    #[cfg_attr(miri, ignore)]
240    #[test]
241    fn test_mm_srli_epi64() {
242        unsafe {
243            let x = V2x64U::new(0x0264_432C_CD8A_70E0, 0x0B28E_3EFE_BB3_172D);
244            let y = V2x64U::from(_mm_srli_epi64(x.0, 33));
245            assert_eq!(y.as_arr(), [0x0000_0000_0594_71F7, 0x0000_0000_0132_2196]);
246        }
247    }
248
249    #[cfg_attr(miri, ignore)]
250    #[test]
251    fn test_mm_mul_epu32() {
252        unsafe {
253            let x = V2x64U::new(0x0264_432C_CD8A_70E0, 0x0B28_E3EF_EBB3_172D);
254            let y = V2x64U::new(0x0B28_E3EF_EBB3_172D, 0x0264_432C_CD8A_70E0);
255            let z = V2x64U::from(_mm_mul_epu32(x.0, y.0));
256            assert_eq!(z.as_arr(), [0xBD3D_E006_1E19_F760, 0xBD3D_E006_1E19_F760]);
257        }
258    }
259
260    #[cfg_attr(miri, ignore)]
261    #[test]
262    fn test_mm_slli_si128_8() {
263        unsafe {
264            let x = V2x64U::new(0, 0xFFFF_FFFF);
265            let y = V2x64U::from(_mm_slli_si128(x.0, 8));
266            assert_eq!(y.as_arr(), [0, 0xFFFF_FFFF]);
267        }
268    }
269}