Initial commit.
[BoarSSL] / Crypto / GHASH.cs
1 /*
2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 using System;
26
27 namespace Crypto {
28
29 /*
30 * GHASH implementation using integer multiplications (32->64). It is
31 * constant-time, provided that the platform multiplication opcode is
32 * constant-time.
33 */
34
35 public sealed class GHASH {
36
37 /*
38 * Compute GHASH over the provided data. The y[] array is
39 * updated, using the h[] secret value. If the data length
40 * is not a multiple of 16 bytes, then it is right-padded
41 * with zeros.
42 */
43 public static void Run(byte[] y, byte[] h, byte[] data)
44 {
45 Run(y, h, data, 0, data.Length);
46 }
47
48 /*
49 * Compute GHASH over the provided data. The y[] array is
50 * updated, using the h[] secret value. If the data length
51 * is not a multiple of 16 bytes, then it is right-padded
52 * with zeros.
53 */
54 public static void Run(byte[] y, byte[] h,
55 byte[] data, int off, int len)
56 {
57 uint y0, y1, y2, y3;
58 uint h0, h1, h2, h3;
59 y3 = Dec32be(y, 0);
60 y2 = Dec32be(y, 4);
61 y1 = Dec32be(y, 8);
62 y0 = Dec32be(y, 12);
63 h3 = Dec32be(h, 0);
64 h2 = Dec32be(h, 4);
65 h1 = Dec32be(h, 8);
66 h0 = Dec32be(h, 12);
67
68 while (len > 0) {
69 /*
70 * Decode the next block and add it (XOR) into the
71 * current state.
72 */
73 if (len >= 16) {
74 y3 ^= Dec32be(data, off);
75 y2 ^= Dec32be(data, off + 4);
76 y1 ^= Dec32be(data, off + 8);
77 y0 ^= Dec32be(data, off + 12);
78 } else {
79 y3 ^= Dec32bePartial(data, off + 0, len - 0);
80 y2 ^= Dec32bePartial(data, off + 4, len - 4);
81 y1 ^= Dec32bePartial(data, off + 8, len - 8);
82 y0 ^= Dec32bePartial(data, off + 12, len - 12);
83 }
84 off += 16;
85 len -= 16;
86
87 /*
88 * We multiply two 128-bit field elements with
89 * two Karatsuba levels, to get down to nine
90 * 32->64 multiplications.
91 */
92 uint a0 = y0;
93 uint b0 = h0;
94 uint a1 = y1;
95 uint b1 = h1;
96 uint a2 = a0 ^ a1;
97 uint b2 = b0 ^ b1;
98
99 uint a3 = y2;
100 uint b3 = h2;
101 uint a4 = y3;
102 uint b4 = h3;
103 uint a5 = a3 ^ a4;
104 uint b5 = b3 ^ b4;
105
106 uint a6 = a0 ^ a3;
107 uint b6 = b0 ^ b3;
108 uint a7 = a1 ^ a4;
109 uint b7 = b1 ^ b4;
110 uint a8 = a6 ^ a7;
111 uint b8 = b6 ^ b7;
112
113 ulong z0 = BMul(a0, b0);
114 ulong z1 = BMul(a1, b1);
115 ulong z2 = BMul(a2, b2);
116 ulong z3 = BMul(a3, b3);
117 ulong z4 = BMul(a4, b4);
118 ulong z5 = BMul(a5, b5);
119 ulong z6 = BMul(a6, b6);
120 ulong z7 = BMul(a7, b7);
121 ulong z8 = BMul(a8, b8);
122
123 z2 ^= z0 ^ z1;
124 z0 ^= z2 << 32;
125 z1 ^= z2 >> 32;
126
127 z5 ^= z3 ^ z4;
128 z3 ^= z5 << 32;
129 z4 ^= z5 >> 32;
130
131 z8 ^= z6 ^ z7;
132 z6 ^= z8 << 32;
133 z7 ^= z8 >> 32;
134
135 z6 ^= z0 ^ z3;
136 z7 ^= z1 ^ z4;
137 z1 ^= z6;
138 z3 ^= z7;
139
140 /*
141 * 255-bit product is now in z4:z3:z1:z0. Since
142 * the GHASH specification uses a "reversed"
143 * notation, our 255-bit result must be shifted
144 * by 1 bit to the left.
145 */
146 z4 = (z4 << 1) | (z3 >> 63);
147 z3 = (z3 << 1) | (z1 >> 63);
148 z1 = (z1 << 1) | (z0 >> 63);
149 z0 = (z0 << 1);
150
151 /*
152 * Apply reduction modulo the degree-128
153 * polynomial that defines the field.
154 */
155 z3 ^= z0 ^ (z0 >> 1) ^ (z0 >> 2) ^ (z0 >> 7);
156 z1 ^= (z0 << 63) ^ (z0 << 62) ^ (z0 << 57);
157 z4 ^= z1 ^ (z1 >> 1) ^ (z1 >> 2) ^ (z1 >> 7);
158 z3 ^= (z1 << 63) ^ (z1 << 62) ^ (z1 << 57);
159
160 /*
161 * The reduced result is the new "y" state.
162 */
163 y0 = (uint)z3;
164 y1 = (uint)(z3 >> 32);
165 y2 = (uint)z4;
166 y3 = (uint)(z4 >> 32);
167 }
168
169 Enc32be(y3, y, 0);
170 Enc32be(y2, y, 4);
171 Enc32be(y1, y, 8);
172 Enc32be(y0, y, 12);
173 }
174
175 static ulong BMul(uint x, uint y)
176 {
177 ulong x0, x1, x2, x3;
178 ulong y0, y1, y2, y3;
179 ulong z0, z1, z2, z3;
180 x0 = (ulong)(x & 0x11111111);
181 x1 = (ulong)(x & 0x22222222);
182 x2 = (ulong)(x & 0x44444444);
183 x3 = (ulong)(x & 0x88888888);
184 y0 = (ulong)(y & 0x11111111);
185 y1 = (ulong)(y & 0x22222222);
186 y2 = (ulong)(y & 0x44444444);
187 y3 = (ulong)(y & 0x88888888);
188 z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
189 z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
190 z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
191 z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
192 z0 &= 0x1111111111111111;
193 z1 &= 0x2222222222222222;
194 z2 &= 0x4444444444444444;
195 z3 &= 0x8888888888888888;
196 return z0 | z1 | z2 | z3;
197 }
198
199 static uint Dec32be(byte[] buf, int off)
200 {
201 return ((uint)buf[off + 0] << 24)
202 | ((uint)buf[off + 1] << 16)
203 | ((uint)buf[off + 2] << 8)
204 | (uint)buf[off + 3];
205 }
206
207 static void Enc32be(uint x, byte[] buf, int off)
208 {
209 buf[off + 0] = (byte)(x >> 24);
210 buf[off + 1] = (byte)(x >> 16);
211 buf[off + 2] = (byte)(x >> 8);
212 buf[off + 3] = (byte)x;
213 }
214
215 static uint Dec32bePartial(byte[] buf, int off, int len)
216 {
217 if (len >= 4) {
218 return ((uint)buf[off + 0] << 24)
219 | ((uint)buf[off + 1] << 16)
220 | ((uint)buf[off + 2] << 8)
221 | (uint)buf[off + 3];
222 } else if (len >= 3) {
223 return ((uint)buf[off + 0] << 24)
224 | ((uint)buf[off + 1] << 16)
225 | ((uint)buf[off + 2] << 8);
226 } else if (len >= 2) {
227 return ((uint)buf[off + 0] << 24)
228 | ((uint)buf[off + 1] << 16);
229 } else if (len >= 1) {
230 return ((uint)buf[off + 0] << 24);
231 } else {
232 return 0;
233 }
234 }
235 }
236
237 }