Branch data Line data Source code
1 : : /* gf128mul.h - GF(2^128) multiplication functions 2 : : * 3 : : * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. 4 : : * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> 5 : : * 6 : : * Based on Dr Brian Gladman's (GPL'd) work published at 7 : : * http://fp.gladman.plus.com/cryptography_technology/index.htm 8 : : * See the original copyright notice below. 9 : : * 10 : : * This program is free software; you can redistribute it and/or modify it 11 : : * under the terms of the GNU General Public License as published by the Free 12 : : * Software Foundation; either version 2 of the License, or (at your option) 13 : : * any later version. 14 : : */ 15 : : /* 16 : : --------------------------------------------------------------------------- 17 : : Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. 18 : : 19 : : LICENSE TERMS 20 : : 21 : : The free distribution and use of this software in both source and binary 22 : : form is allowed (with or without changes) provided that: 23 : : 24 : : 1. distributions of this source code include the above copyright 25 : : notice, this list of conditions and the following disclaimer; 26 : : 27 : : 2. distributions in binary form include the above copyright 28 : : notice, this list of conditions and the following disclaimer 29 : : in the documentation and/or other associated materials; 30 : : 31 : : 3. the copyright holder's name is not used to endorse products 32 : : built using this software without specific written permission. 33 : : 34 : : ALTERNATIVELY, provided that this notice is retained in full, this product 35 : : may be distributed under the terms of the GNU General Public License (GPL), 36 : : in which case the provisions of the GPL apply INSTEAD OF those given above. 37 : : 38 : : DISCLAIMER 39 : : 40 : : This software is provided 'as is' with no explicit or implied warranties 41 : : in respect of its properties, including, but not limited to, correctness 42 : : and/or fitness for purpose. 43 : : --------------------------------------------------------------------------- 44 : : Issue Date: 31/01/2006 45 : : 46 : : An implementation of field multiplication in Galois Field GF(2^128) 47 : : */ 48 : : 49 : : #ifndef _CRYPTO_GF128MUL_H 50 : : #define _CRYPTO_GF128MUL_H 51 : : 52 : : #include <asm/byteorder.h> 53 : : #include <crypto/b128ops.h> 54 : : #include <linux/slab.h> 55 : : 56 : : /* Comment by Rik: 57 : : * 58 : : * For some background on GF(2^128) see for example: 59 : : * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf 60 : : * 61 : : * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can 62 : : * be mapped to computer memory in a variety of ways. Let's examine 63 : : * three common cases. 64 : : * 65 : : * Take a look at the 16 binary octets below in memory order. The msb's 66 : : * are left and the lsb's are right. char b[16] is an array and b[0] is 67 : : * the first octet. 68 : : * 69 : : * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 70 : : * b[0] b[1] b[2] b[3] b[13] b[14] b[15] 71 : : * 72 : : * Every bit is a coefficient of some power of X. We can store the bits 73 : : * in every byte in little-endian order and the bytes themselves also in 74 : : * little endian order. I will call this lle (little-little-endian). 75 : : * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks 76 : : * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. 77 : : * This format was originally implemented in gf128mul and is used 78 : : * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). 79 : : * 80 : : * Another convention says: store the bits in bigendian order and the 81 : : * bytes also. This is bbe (big-big-endian). Now the buffer above 82 : : * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, 83 : : * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe 84 : : * is partly implemented. 85 : : * 86 : : * Both of the above formats are easy to implement on big-endian 87 : : * machines. 88 : : * 89 : : * XTS and EME (the latter of which is patent encumbered) use the ble 90 : : * format (bits are stored in big endian order and the bytes in little 91 : : * endian). The above buffer represents X^7 in this case and the 92 : : * primitive polynomial is b[0] = 0x87. 93 : : * 94 : : * The common machine word-size is smaller than 128 bits, so to make 95 : : * an efficient implementation we must split into machine word sizes. 96 : : * This implementation uses 64-bit words for the moment. Machine 97 : : * endianness comes into play. The lle format in relation to machine 98 : : * endianness is discussed below by the original author of gf128mul Dr 99 : : * Brian Gladman. 100 : : * 101 : : * Let's look at the bbe and ble format on a little endian machine. 102 : : * 103 : : * bbe on a little endian machine u32 x[4]: 104 : : * 105 : : * MS x[0] LS MS x[1] LS 106 : : * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 107 : : * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 108 : : * 109 : : * MS x[2] LS MS x[3] LS 110 : : * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 111 : : * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 112 : : * 113 : : * ble on a little endian machine 114 : : * 115 : : * MS x[0] LS MS x[1] LS 116 : : * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 117 : : * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 118 : : * 119 : : * MS x[2] LS MS x[3] LS 120 : : * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 121 : : * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 122 : : * 123 : : * Multiplications in GF(2^128) are mostly bit-shifts, so you see why 124 : : * ble (and lbe also) are easier to implement on a little-endian 125 : : * machine than on a big-endian machine. The converse holds for bbe 126 : : * and lle. 127 : : * 128 : : * Note: to have good alignment, it seems to me that it is sufficient 129 : : * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize 130 : : * machines this will automatically aligned to wordsize and on a 64-bit 131 : : * machine also. 132 : : */ 133 : : /* Multiply a GF(2^128) field element by x. Field elements are 134 : : held in arrays of bytes in which field bits 8n..8n + 7 are held in 135 : : byte[n], with lower indexed bits placed in the more numerically 136 : : significant bit positions within bytes. 137 : : 138 : : On little endian machines the bit indexes translate into the bit 139 : : positions within four 32-bit words in the following way 140 : : 141 : : MS x[0] LS MS x[1] LS 142 : : ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 143 : : 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 144 : : 145 : : MS x[2] LS MS x[3] LS 146 : : ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 147 : : 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 148 : : 149 : : On big endian machines the bit indexes translate into the bit 150 : : positions within four 32-bit words in the following way 151 : : 152 : : MS x[0] LS MS x[1] LS 153 : : ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 154 : : 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 155 : : 156 : : MS x[2] LS MS x[3] LS 157 : : ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 158 : : 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 159 : : */ 160 : : 161 : : /* A slow generic version of gf_mul, implemented for lle and bbe 162 : : * It multiplies a and b and puts the result in a */ 163 : : void gf128mul_lle(be128 *a, const be128 *b); 164 : : 165 : : void gf128mul_bbe(be128 *a, const be128 *b); 166 : : 167 : : /* 168 : : * The following functions multiply a field element by x in 169 : : * the polynomial field representation. They use 64-bit word operations 170 : : * to gain speed but compensate for machine endianness and hence work 171 : : * correctly on both styles of machine. 172 : : * 173 : : * They are defined here for performance. 174 : : */ 175 : : 176 : : static inline u64 gf128mul_mask_from_bit(u64 x, int which) 177 : : { 178 : : /* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */ 179 : 0 : return ((s64)(x << (63 - which)) >> 63); 180 : : } 181 : : 182 : : static inline void gf128mul_x_lle(be128 *r, const be128 *x) 183 : : { 184 : : u64 a = be64_to_cpu(x->a); 185 : : u64 b = be64_to_cpu(x->b); 186 : : 187 : : /* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48 188 : : * (see crypto/gf128mul.c): */ 189 : : u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56); 190 : : 191 : : r->b = cpu_to_be64((b >> 1) | (a << 63)); 192 : : r->a = cpu_to_be64((a >> 1) ^ _tt); 193 : : } 194 : : 195 : : static inline void gf128mul_x_bbe(be128 *r, const be128 *x) 196 : : { 197 : : u64 a = be64_to_cpu(x->a); 198 : : u64 b = be64_to_cpu(x->b); 199 : : 200 : : /* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */ 201 : : u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; 202 : : 203 : : r->a = cpu_to_be64((a << 1) | (b >> 63)); 204 : : r->b = cpu_to_be64((b << 1) ^ _tt); 205 : : } 206 : : 207 : : /* needed by XTS */ 208 : : static inline void gf128mul_x_ble(le128 *r, const le128 *x) 209 : : { 210 : 0 : u64 a = le64_to_cpu(x->a); 211 : 0 : u64 b = le64_to_cpu(x->b); 212 : : 213 : : /* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */ 214 : 0 : u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; 215 : : 216 : 0 : r->a = cpu_to_le64((a << 1) | (b >> 63)); 217 : 0 : r->b = cpu_to_le64((b << 1) ^ _tt); 218 : : } 219 : : 220 : : /* 4k table optimization */ 221 : : 222 : : struct gf128mul_4k { 223 : : be128 t[256]; 224 : : }; 225 : : 226 : : struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); 227 : : struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); 228 : : void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t); 229 : : void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t); 230 : : void gf128mul_x8_ble(le128 *r, const le128 *x); 231 : : static inline void gf128mul_free_4k(struct gf128mul_4k *t) 232 : : { 233 : : kzfree(t); 234 : : } 235 : : 236 : : 237 : : /* 64k table optimization, implemented for bbe */ 238 : : 239 : : struct gf128mul_64k { 240 : : struct gf128mul_4k *t[16]; 241 : : }; 242 : : 243 : : /* First initialize with the constant factor with which you 244 : : * want to multiply and then call gf128mul_64k_bbe with the other 245 : : * factor in the first argument, and the table in the second. 246 : : * Afterwards, the result is stored in *a. 247 : : */ 248 : : struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); 249 : : void gf128mul_free_64k(struct gf128mul_64k *t); 250 : : void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t); 251 : : 252 : : #endif /* _CRYPTO_GF128MUL_H */