2 chacha-merged.c version 20080118
10 #define U32C(v) (v##U)
12 #define U8V(v) ((unsigned char)(v) & U8C(0xFF))
13 #define U32V(v) ((uint32_t)(v) & U32C(0xFFFFFFFF))
15 #define ROTL32(v, n) \
16 (U32V((v) << (n)) | ((v) >> (32 - (n))))
18 #if (USE_UNALIGNED == 1)
19 #define U8TO32_LITTLE(p) \
21 #define U32TO8_LITTLE(p, v) \
23 *((uint32_t *)(p)) = v; \
26 #define U8TO32_LITTLE(p) \
27 (((uint32_t)((p)[0]) ) | \
28 ((uint32_t)((p)[1]) << 8) | \
29 ((uint32_t)((p)[2]) << 16) | \
30 ((uint32_t)((p)[3]) << 24))
31 #define U32TO8_LITTLE(p, v) \
34 (p)[1] = U8V((v) >> 8); \
35 (p)[2] = U8V((v) >> 16); \
36 (p)[3] = U8V((v) >> 24); \
40 #define ROTATE(v,c) (ROTL32(v,c))
41 #define XOR(v,w) ((v) ^ (w))
42 #define PLUS(v,w) (U32V((v) + (w)))
43 #define PLUSONE(v) (PLUS((v),1))
45 #define QUARTERROUND(a,b,c,d) \
46 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
47 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
48 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
49 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
51 static const char sigma[16] = "expand 32-byte k";
52 static const char tau[16] = "expand 16-byte k";
55 chacha_keysetup(struct chacha_ctx *x, const unsigned char *k, uint32_t kbits) {
56 const char *constants;
58 x->input[4] = U8TO32_LITTLE(k + 0);
59 x->input[5] = U8TO32_LITTLE(k + 4);
60 x->input[6] = U8TO32_LITTLE(k + 8);
61 x->input[7] = U8TO32_LITTLE(k + 12);
63 if(kbits == 256) { /* recommended */
66 } else { /* kbits == 128 */
70 x->input[8] = U8TO32_LITTLE(k + 0);
71 x->input[9] = U8TO32_LITTLE(k + 4);
72 x->input[10] = U8TO32_LITTLE(k + 8);
73 x->input[11] = U8TO32_LITTLE(k + 12);
74 x->input[0] = U8TO32_LITTLE(constants + 0);
75 x->input[1] = U8TO32_LITTLE(constants + 4);
76 x->input[2] = U8TO32_LITTLE(constants + 8);
77 x->input[3] = U8TO32_LITTLE(constants + 12);
81 chacha_ivsetup(struct chacha_ctx *x, const unsigned char *iv, const unsigned char *counter) {
82 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
83 //x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
84 x->input[13] = U8TO32_LITTLE(iv + 0);
85 x->input[14] = U8TO32_LITTLE(iv + 4);
86 x->input[15] = U8TO32_LITTLE(iv + 8);
90 chacha_encrypt_bytes(struct chacha_ctx *x, const unsigned char *m, unsigned char *c, uint32_t bytes) {
91 uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
92 uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
93 unsigned char *ctarget = NULL;
94 unsigned char tmp[64];
120 #if (USE_MEMCPY == 1)
121 memcpy(tmp, m, bytes);
124 for(i = 0; i < bytes; ++i) {
151 for(i = 20; i > 0; i -= 2) {
152 QUARTERROUND(x0, x4, x8, x12)
153 QUARTERROUND(x1, x5, x9, x13)
154 QUARTERROUND(x2, x6, x10, x14)
155 QUARTERROUND(x3, x7, x11, x15)
156 QUARTERROUND(x0, x5, x10, x15)
157 QUARTERROUND(x1, x6, x11, x12)
158 QUARTERROUND(x2, x7, x8, x13)
159 QUARTERROUND(x3, x4, x9, x14)
172 x10 = PLUS(x10, j10);
173 x11 = PLUS(x11, j11);
174 x12 = PLUS(x12, j12);
175 x13 = PLUS(x13, j13);
176 x14 = PLUS(x14, j14);
177 x15 = PLUS(x15, j15);
179 x0 = XOR(x0, U8TO32_LITTLE(m + 0));
180 x1 = XOR(x1, U8TO32_LITTLE(m + 4));
181 x2 = XOR(x2, U8TO32_LITTLE(m + 8));
182 x3 = XOR(x3, U8TO32_LITTLE(m + 12));
183 x4 = XOR(x4, U8TO32_LITTLE(m + 16));
184 x5 = XOR(x5, U8TO32_LITTLE(m + 20));
185 x6 = XOR(x6, U8TO32_LITTLE(m + 24));
186 x7 = XOR(x7, U8TO32_LITTLE(m + 28));
187 x8 = XOR(x8, U8TO32_LITTLE(m + 32));
188 x9 = XOR(x9, U8TO32_LITTLE(m + 36));
189 x10 = XOR(x10, U8TO32_LITTLE(m + 40));
190 x11 = XOR(x11, U8TO32_LITTLE(m + 44));
191 x12 = XOR(x12, U8TO32_LITTLE(m + 48));
192 x13 = XOR(x13, U8TO32_LITTLE(m + 52));
193 x14 = XOR(x14, U8TO32_LITTLE(m + 56));
194 x15 = XOR(x15, U8TO32_LITTLE(m + 60));
200 /* stopping at 2^70 bytes per nonce is user's responsibility */
203 U32TO8_LITTLE(c + 0, x0);
204 U32TO8_LITTLE(c + 4, x1);
205 U32TO8_LITTLE(c + 8, x2);
206 U32TO8_LITTLE(c + 12, x3);
207 U32TO8_LITTLE(c + 16, x4);
208 U32TO8_LITTLE(c + 20, x5);
209 U32TO8_LITTLE(c + 24, x6);
210 U32TO8_LITTLE(c + 28, x7);
211 U32TO8_LITTLE(c + 32, x8);
212 U32TO8_LITTLE(c + 36, x9);
213 U32TO8_LITTLE(c + 40, x10);
214 U32TO8_LITTLE(c + 44, x11);
215 U32TO8_LITTLE(c + 48, x12);
216 U32TO8_LITTLE(c + 52, x13);
217 U32TO8_LITTLE(c + 56, x14);
218 U32TO8_LITTLE(c + 60, x15);
222 #if (USE_MEMCPY == 1)
223 memcpy(ctarget, c, bytes);
226 for(i = 0; i < bytes; ++i) {