+ * If there is a borrow, then we need to add p. We (virtually)
+ * add 2^256, then subtract 2^256 - p.
+ */
+ t = cc;
+ cc = _subborrow_u64(0, d[0], t, &d[0]);
+ cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);
+ cc = _subborrow_u64(cc, d[2], -t, &d[2]);
+ cc = _subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+ /*
+ * If there still is a borrow, then we need to add p again.