1#![allow(unsafe_code)]
2use core::arch::x86_64::*;
3use core::ops::{
4 Add, AddAssign, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, ShlAssign,
5 ShrAssign, SubAssign,
6};
7
8#[derive(Clone, Copy)]
9pub struct V2x64U(pub __m128i);
10
11impl Default for V2x64U {
12 #[inline]
13 fn default() -> Self {
14 unsafe { V2x64U::zeroed() }
15 }
16}
17
18impl core::fmt::Debug for V2x64U {
19 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
20 write!(f, "V2x64U: {:?}", unsafe { self.as_arr() })
21 }
22}
23
24impl V2x64U {
25 #[inline]
26 #[target_feature(enable = "sse4.1")]
27 unsafe fn zeroed() -> Self {
28 V2x64U(_mm_setzero_si128())
29 }
30
31 #[inline]
32 #[target_feature(enable = "sse4.1")]
33 pub unsafe fn new(hi: u64, low: u64) -> Self {
34 V2x64U(_mm_set_epi64x(hi as i64, low as i64))
35 }
36
37 #[target_feature(enable = "sse4.1")]
38 pub unsafe fn as_arr(&self) -> [u64; 2] {
39 let mut arr: [u64; 2] = [0, 0];
40 _mm_storeu_si128(arr.as_mut_ptr().cast::<__m128i>(), self.0);
41 arr
42 }
43
44 #[inline]
45 #[target_feature(enable = "sse4.1")]
46 pub unsafe fn rotate_by_32(&self) -> Self {
47 V2x64U(_mm_shuffle_epi32(self.0, _mm_shuffle!(2, 3, 0, 1)))
48 }
49
50 #[inline]
51 #[target_feature(enable = "sse4.1")]
52 pub unsafe fn shuffle(&self, mask: &V2x64U) -> Self {
53 V2x64U::from(_mm_shuffle_epi8(self.0, mask.0))
54 }
55
56 #[inline]
57 #[target_feature(enable = "sse4.1")]
58 pub unsafe fn and_not(&self, neg_mask: &V2x64U) -> Self {
59 V2x64U::from(_mm_andnot_si128(neg_mask.0, self.0))
60 }
61
62 #[inline]
63 #[target_feature(enable = "sse4.1")]
64 unsafe fn add_assign(&mut self, other: Self) {
65 self.0 = _mm_add_epi64(self.0, other.0);
66 }
67
68 #[inline]
69 #[target_feature(enable = "sse4.1")]
70 unsafe fn sub_assign(&mut self, other: Self) {
71 self.0 = _mm_sub_epi64(self.0, other.0);
72 }
73
74 #[inline]
75 #[target_feature(enable = "sse4.1")]
76 unsafe fn bitand_assign(&mut self, other: Self) {
77 self.0 = _mm_and_si128(self.0, other.0);
78 }
79
80 #[inline]
81 #[target_feature(enable = "sse4.1")]
82 unsafe fn bitor_assign(&mut self, other: Self) {
83 self.0 = _mm_or_si128(self.0, other.0);
84 }
85
86 #[inline]
87 #[target_feature(enable = "sse4.1")]
88 unsafe fn bitxor_assign(&mut self, other: Self) {
89 self.0 = _mm_xor_si128(self.0, other.0);
90 }
91
92 #[inline]
93 #[target_feature(enable = "sse4.1")]
94 unsafe fn shl_assign(&mut self, count: __m128i) {
95 self.0 = _mm_sll_epi64(self.0, count);
96 }
97
98 #[inline]
99 #[target_feature(enable = "sse4.1")]
100 unsafe fn shr_assign(&mut self, count: __m128i) {
101 self.0 = _mm_srl_epi64(self.0, count);
102 }
103}
104
105impl From<__m128i> for V2x64U {
106 #[inline]
107 fn from(v: __m128i) -> Self {
108 V2x64U(v)
109 }
110}
111
112impl AddAssign for V2x64U {
113 #[inline]
114 fn add_assign(&mut self, other: Self) {
115 unsafe { self.add_assign(other) }
116 }
117}
118
119impl SubAssign for V2x64U {
120 #[inline]
121 fn sub_assign(&mut self, other: Self) {
122 unsafe { self.sub_assign(other) }
123 }
124}
125
126impl BitAndAssign for V2x64U {
127 #[inline]
128 fn bitand_assign(&mut self, other: Self) {
129 unsafe { self.bitand_assign(other) }
130 }
131}
132
133impl BitAnd for V2x64U {
134 type Output = Self;
135 #[inline]
136 fn bitand(self, other: Self) -> Self {
137 let mut new = V2x64U(self.0);
138 new &= other;
139 new
140 }
141}
142
143impl BitOrAssign for V2x64U {
144 #[inline]
145 fn bitor_assign(&mut self, other: Self) {
146 unsafe { self.bitor_assign(other) }
147 }
148}
149
150impl BitOr for V2x64U {
151 type Output = Self;
152 #[inline]
153 fn bitor(self, other: Self) -> Self {
154 let mut new = V2x64U(self.0);
155 new |= other;
156 new
157 }
158}
159
160impl BitXorAssign for V2x64U {
161 #[inline]
162 fn bitxor_assign(&mut self, other: Self) {
163 unsafe { self.bitxor_assign(other) }
164 }
165}
166
167impl Add for V2x64U {
168 type Output = Self;
169
170 #[inline]
171 fn add(self, other: Self) -> Self {
172 let mut new = V2x64U(self.0);
173 new += other;
174 new
175 }
176}
177
178impl BitXor for V2x64U {
179 type Output = Self;
180
181 #[inline]
182 fn bitxor(self, other: Self) -> Self {
183 let mut new = V2x64U(self.0);
184 new ^= other;
185 new
186 }
187}
188
189impl ShlAssign<__m128i> for V2x64U {
190 #[inline]
191 fn shl_assign(&mut self, count: __m128i) {
192 unsafe { self.shl_assign(count) }
193 }
194}
195
196impl ShrAssign<__m128i> for V2x64U {
197 #[inline]
198 fn shr_assign(&mut self, count: __m128i) {
199 unsafe { self.shr_assign(count) }
200 }
201}
202
203#[cfg(test)]
204pub mod tests {
205 use super::*;
206
207 #[cfg_attr(miri, ignore)]
208 #[test]
209 fn test_as_arr() {
210 unsafe {
211 let x = V2x64U::new(55, 1);
212 let res = x.as_arr();
213 assert_eq!(res, [1, 55]);
214 }
215 }
216
217 #[cfg_attr(miri, ignore)]
218 #[test]
219 fn test_rotate_by_32() {
220 unsafe {
221 let x = V2x64U::new(0x0264_432C_CD8A_70E0, 0x0B28_E3EF_EBB3_172D);
222 let y = x.rotate_by_32();
223 let res = y.as_arr();
224 assert_eq!(res, [0xEBB3_172D_0B28_E3EF, 0xCD8A_70E0_0264_432C]);
225 }
226 }
227
228 #[cfg_attr(miri, ignore)]
229 #[test]
230 fn test_add() {
231 unsafe {
232 let x = V2x64U::new(55, 1);
233 let y = V2x64U::new(0x0264_432C_CD8A_70E0, 0x0B28E_3EFE_BB3_172D);
234 let z = x + y;
235 assert_eq!(z.as_arr(), [0x0B28_E3EF_EBB3_172E, 0x2644_32CC_D8A7_117]);
236 }
237 }
238
239 #[cfg_attr(miri, ignore)]
240 #[test]
241 fn test_mm_srli_epi64() {
242 unsafe {
243 let x = V2x64U::new(0x0264_432C_CD8A_70E0, 0x0B28E_3EFE_BB3_172D);
244 let y = V2x64U::from(_mm_srli_epi64(x.0, 33));
245 assert_eq!(y.as_arr(), [0x0000_0000_0594_71F7, 0x0000_0000_0132_2196]);
246 }
247 }
248
249 #[cfg_attr(miri, ignore)]
250 #[test]
251 fn test_mm_mul_epu32() {
252 unsafe {
253 let x = V2x64U::new(0x0264_432C_CD8A_70E0, 0x0B28_E3EF_EBB3_172D);
254 let y = V2x64U::new(0x0B28_E3EF_EBB3_172D, 0x0264_432C_CD8A_70E0);
255 let z = V2x64U::from(_mm_mul_epu32(x.0, y.0));
256 assert_eq!(z.as_arr(), [0xBD3D_E006_1E19_F760, 0xBD3D_E006_1E19_F760]);
257 }
258 }
259
260 #[cfg_attr(miri, ignore)]
261 #[test]
262 fn test_mm_slli_si128_8() {
263 unsafe {
264 let x = V2x64U::new(0, 0xFFFF_FFFF);
265 let y = V2x64U::from(_mm_slli_si128(x.0, 8));
266 assert_eq!(y.as_arr(), [0, 0xFFFF_FFFF]);
267 }
268 }
269}