2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * GHASH implementation using integer multiplications (32->64). It is
31 * constant-time, provided that the platform multiplication opcode is
35 public sealed class GHASH {
38 * Compute GHASH over the provided data. The y[] array is
39 * updated, using the h[] secret value. If the data length
40 * is not a multiple of 16 bytes, then it is right-padded
43 public static void Run(byte[] y, byte[] h, byte[] data)
45 Run(y, h, data, 0, data.Length);
49 * Compute GHASH over the provided data. The y[] array is
50 * updated, using the h[] secret value. If the data length
51 * is not a multiple of 16 bytes, then it is right-padded
54 public static void Run(byte[] y, byte[] h,
55 byte[] data, int off, int len)
70 * Decode the next block and add it (XOR) into the
74 y3 ^= Dec32be(data, off);
75 y2 ^= Dec32be(data, off + 4);
76 y1 ^= Dec32be(data, off + 8);
77 y0 ^= Dec32be(data, off + 12);
79 y3 ^= Dec32bePartial(data, off + 0, len - 0);
80 y2 ^= Dec32bePartial(data, off + 4, len - 4);
81 y1 ^= Dec32bePartial(data, off + 8, len - 8);
82 y0 ^= Dec32bePartial(data, off + 12, len - 12);
88 * We multiply two 128-bit field elements with
89 * two Karatsuba levels, to get down to nine
90 * 32->64 multiplications.
113 ulong z0 = BMul(a0, b0);
114 ulong z1 = BMul(a1, b1);
115 ulong z2 = BMul(a2, b2);
116 ulong z3 = BMul(a3, b3);
117 ulong z4 = BMul(a4, b4);
118 ulong z5 = BMul(a5, b5);
119 ulong z6 = BMul(a6, b6);
120 ulong z7 = BMul(a7, b7);
121 ulong z8 = BMul(a8, b8);
141 * 255-bit product is now in z4:z3:z1:z0. Since
142 * the GHASH specification uses a "reversed"
143 * notation, our 255-bit result must be shifted
144 * by 1 bit to the left.
146 z4 = (z4 << 1) | (z3 >> 63);
147 z3 = (z3 << 1) | (z1 >> 63);
148 z1 = (z1 << 1) | (z0 >> 63);
152 * Apply reduction modulo the degree-128
153 * polynomial that defines the field.
155 z3 ^= z0 ^ (z0 >> 1) ^ (z0 >> 2) ^ (z0 >> 7);
156 z1 ^= (z0 << 63) ^ (z0 << 62) ^ (z0 << 57);
157 z4 ^= z1 ^ (z1 >> 1) ^ (z1 >> 2) ^ (z1 >> 7);
158 z3 ^= (z1 << 63) ^ (z1 << 62) ^ (z1 << 57);
161 * The reduced result is the new "y" state.
164 y1 = (uint)(z3 >> 32);
166 y3 = (uint)(z4 >> 32);
175 static ulong BMul(uint x, uint y)
177 ulong x0, x1, x2, x3;
178 ulong y0, y1, y2, y3;
179 ulong z0, z1, z2, z3;
180 x0 = (ulong)(x & 0x11111111);
181 x1 = (ulong)(x & 0x22222222);
182 x2 = (ulong)(x & 0x44444444);
183 x3 = (ulong)(x & 0x88888888);
184 y0 = (ulong)(y & 0x11111111);
185 y1 = (ulong)(y & 0x22222222);
186 y2 = (ulong)(y & 0x44444444);
187 y3 = (ulong)(y & 0x88888888);
188 z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
189 z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
190 z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
191 z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
192 z0 &= 0x1111111111111111;
193 z1 &= 0x2222222222222222;
194 z2 &= 0x4444444444444444;
195 z3 &= 0x8888888888888888;
196 return z0 | z1 | z2 | z3;
199 static uint Dec32be(byte[] buf, int off)
201 return ((uint)buf[off + 0] << 24)
202 | ((uint)buf[off + 1] << 16)
203 | ((uint)buf[off + 2] << 8)
204 | (uint)buf[off + 3];
207 static void Enc32be(uint x, byte[] buf, int off)
209 buf[off + 0] = (byte)(x >> 24);
210 buf[off + 1] = (byte)(x >> 16);
211 buf[off + 2] = (byte)(x >> 8);
212 buf[off + 3] = (byte)x;
215 static uint Dec32bePartial(byte[] buf, int off, int len)
218 return ((uint)buf[off + 0] << 24)
219 | ((uint)buf[off + 1] << 16)
220 | ((uint)buf[off + 2] << 8)
221 | (uint)buf[off + 3];
222 } else if (len >= 3) {
223 return ((uint)buf[off + 0] << 24)
224 | ((uint)buf[off + 1] << 16)
225 | ((uint)buf[off + 2] << 8);
226 } else if (len >= 2) {
227 return ((uint)buf[off + 0] << 24)
228 | ((uint)buf[off + 1] << 16);
229 } else if (len >= 1) {
230 return ((uint)buf[off + 0] << 24);