From: Guus Sliepen Date: Mon, 9 Aug 2021 20:30:42 +0000 (+0200) Subject: Update the built-in Chacha20-Poly1305 code to an RFC 7539 complaint version. X-Git-Url: https://tinc-vpn.org/git/browse?a=commitdiff_plain;h=63df2139835bb532125562c73ec3efbc437a3634;p=tinc Update the built-in Chacha20-Poly1305 code to an RFC 7539 complaint version. This is necessary so our copy of Chacha20-Poly1305 is compatible with that of many other crypto libraries. This code is made by Grigori Goronz, but is heavily based on the code from D.J. Bernstein's ref10 implementation used before. --- diff --git a/src/chacha-poly1305/chacha-poly1305.c b/src/chacha-poly1305/chacha-poly1305.c deleted file mode 100644 index 77d531ad..00000000 --- a/src/chacha-poly1305/chacha-poly1305.c +++ /dev/null @@ -1,102 +0,0 @@ -#include "../system.h" -#include "../xalloc.h" - -#include "chacha.h" -#include "chacha-poly1305.h" -#include "poly1305.h" - -struct chacha_poly1305_ctx { - struct chacha_ctx main_ctx, header_ctx; -}; - -chacha_poly1305_ctx_t *chacha_poly1305_init(void) { - return xzalloc(sizeof(chacha_poly1305_ctx_t)); -} - -void chacha_poly1305_exit(chacha_poly1305_ctx_t *ctx) { - xzfree(ctx, sizeof(chacha_poly1305_ctx_t)); -} - -bool chacha_poly1305_set_key(chacha_poly1305_ctx_t *ctx, const uint8_t *key) { - chacha_keysetup(&ctx->main_ctx, key, 256); - chacha_keysetup(&ctx->header_ctx, key + 32, 256); - return true; -} - -static void put_u64(void *vp, uint64_t v) { - uint8_t *p = (uint8_t *) vp; - - p[0] = (uint8_t)(v >> 56) & 0xff; - p[1] = (uint8_t)(v >> 48) & 0xff; - p[2] = (uint8_t)(v >> 40) & 0xff; - p[3] = (uint8_t)(v >> 32) & 0xff; - p[4] = (uint8_t)(v >> 24) & 0xff; - p[5] = (uint8_t)(v >> 16) & 0xff; - p[6] = (uint8_t)(v >> 8) & 0xff; - p[7] = (uint8_t) v & 0xff; -} - -bool chacha_poly1305_encrypt(chacha_poly1305_ctx_t *ctx, uint64_t seqnr, const void *indata, size_t inlen, void *voutdata, size_t *outlen) { - uint8_t seqbuf[8]; - const uint8_t one[8] = { 1, 0, 0, 0, 0, 0, 0, 0 }; /* NB little-endian */ - uint8_t poly_key[POLY1305_KEYLEN]; - uint8_t *outdata = voutdata; - - /* - * Run ChaCha20 once to generate the Poly1305 key. The IV is the - * packet sequence number. - */ - memset(poly_key, 0, sizeof(poly_key)); - put_u64(seqbuf, seqnr); - chacha_ivsetup(&ctx->main_ctx, seqbuf, NULL); - chacha_encrypt_bytes(&ctx->main_ctx, poly_key, poly_key, sizeof(poly_key)); - - /* Set Chacha's block counter to 1 */ - chacha_ivsetup(&ctx->main_ctx, seqbuf, one); - - chacha_encrypt_bytes(&ctx->main_ctx, indata, outdata, inlen); - poly1305_auth(outdata + inlen, outdata, inlen, poly_key); - - if(outlen) { - *outlen = inlen + POLY1305_TAGLEN; - } - - return true; -} - -bool chacha_poly1305_decrypt(chacha_poly1305_ctx_t *ctx, uint64_t seqnr, const void *vindata, size_t inlen, void *outdata, size_t *outlen) { - uint8_t seqbuf[8]; - const uint8_t one[8] = { 1, 0, 0, 0, 0, 0, 0, 0 }; /* NB little-endian */ - uint8_t expected_tag[POLY1305_TAGLEN], poly_key[POLY1305_KEYLEN]; - const uint8_t *indata = vindata; - - /* - * Run ChaCha20 once to generate the Poly1305 key. The IV is the - * packet sequence number. - */ - memset(poly_key, 0, sizeof(poly_key)); - put_u64(seqbuf, seqnr); - chacha_ivsetup(&ctx->main_ctx, seqbuf, NULL); - chacha_encrypt_bytes(&ctx->main_ctx, poly_key, poly_key, sizeof(poly_key)); - - /* Set Chacha's block counter to 1 */ - chacha_ivsetup(&ctx->main_ctx, seqbuf, one); - - /* Check tag before anything else */ - inlen -= POLY1305_TAGLEN; - const uint8_t *tag = indata + inlen; - - poly1305_auth(expected_tag, indata, inlen, poly_key); - - if(memcmp(expected_tag, tag, POLY1305_TAGLEN)) { - return false; - } - - chacha_encrypt_bytes(&ctx->main_ctx, indata, outdata, inlen); - - if(outlen) { - *outlen = inlen; - } - - return true; -} diff --git a/src/chacha-poly1305/chacha-poly1305.h b/src/chacha-poly1305/chacha-poly1305.h deleted file mode 100644 index e75d984f..00000000 --- a/src/chacha-poly1305/chacha-poly1305.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef CHACHA_POLY1305_H -#define CHACHA_POLY1305_H - -#define CHACHA_POLY1305_KEYLEN 64 - -typedef struct chacha_poly1305_ctx chacha_poly1305_ctx_t; - -extern void chacha_poly1305_exit(chacha_poly1305_ctx_t *); -extern chacha_poly1305_ctx_t *chacha_poly1305_init(void) ATTR_DEALLOCATOR(chacha_poly1305_exit); -extern bool chacha_poly1305_set_key(chacha_poly1305_ctx_t *ctx, const uint8_t *key); - -extern bool chacha_poly1305_encrypt(chacha_poly1305_ctx_t *ctx, uint64_t seqnr, const void *indata, size_t inlen, void *outdata, size_t *outlen); -extern bool chacha_poly1305_decrypt(chacha_poly1305_ctx_t *ctx, uint64_t seqnr, const void *indata, size_t inlen, void *outdata, size_t *outlen); - -#endif //CHACHA_POLY1305_H diff --git a/src/chacha-poly1305/chacha.c b/src/chacha-poly1305/chacha.c index 696f44a5..4452aca2 100644 --- a/src/chacha-poly1305/chacha.c +++ b/src/chacha-poly1305/chacha.c @@ -4,27 +4,30 @@ D. J. Bernstein Public domain. */ -#include "../system.h" - #include "chacha.h" -typedef struct chacha_ctx chacha_ctx; - #define U8C(v) (v##U) #define U32C(v) (v##U) -#define U8V(v) ((uint8_t)(v) & U8C(0xFF)) +#define U8V(v) ((unsigned char)(v) & U8C(0xFF)) #define U32V(v) ((uint32_t)(v) & U32C(0xFFFFFFFF)) #define ROTL32(v, n) \ (U32V((v) << (n)) | ((v) >> (32 - (n)))) +#if (USE_UNALIGNED == 1) +#define U8TO32_LITTLE(p) \ + (*((uint32_t *)(p))) +#define U32TO8_LITTLE(p, v) \ + do { \ + *((uint32_t *)(p)) = v; \ + } while (0) +#else #define U8TO32_LITTLE(p) \ (((uint32_t)((p)[0]) ) | \ ((uint32_t)((p)[1]) << 8) | \ ((uint32_t)((p)[2]) << 16) | \ ((uint32_t)((p)[3]) << 24)) - #define U32TO8_LITTLE(p, v) \ do { \ (p)[0] = U8V((v) ); \ @@ -32,6 +35,7 @@ typedef struct chacha_ctx chacha_ctx; (p)[2] = U8V((v) >> 16); \ (p)[3] = U8V((v) >> 24); \ } while (0) +#endif #define ROTATE(v,c) (ROTL32(v,c)) #define XOR(v,w) ((v) ^ (w)) @@ -47,7 +51,8 @@ typedef struct chacha_ctx chacha_ctx; static const char sigma[16] = "expand 32-byte k"; static const char tau[16] = "expand 16-byte k"; -void chacha_keysetup(chacha_ctx *x, const uint8_t *k, uint32_t kbits) { +void +chacha_keysetup(struct chacha_ctx *x, const unsigned char *k, uint32_t kbits) { const char *constants; x->input[4] = U8TO32_LITTLE(k + 0); @@ -55,10 +60,10 @@ void chacha_keysetup(chacha_ctx *x, const uint8_t *k, uint32_t kbits) { x->input[6] = U8TO32_LITTLE(k + 8); x->input[7] = U8TO32_LITTLE(k + 12); - if(kbits == 256) { /* recommended */ + if(kbits == 256) { /* recommended */ k += 16; constants = sigma; - } else { /* kbits == 128 */ + } else { /* kbits == 128 */ constants = tau; } @@ -72,19 +77,21 @@ void chacha_keysetup(chacha_ctx *x, const uint8_t *k, uint32_t kbits) { x->input[3] = U8TO32_LITTLE(constants + 12); } -void chacha_ivsetup(chacha_ctx *x, const uint8_t *iv, const uint8_t *counter) { +void +chacha_ivsetup(struct chacha_ctx *x, const unsigned char *iv, const unsigned char *counter) { x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); - x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); - x->input[14] = U8TO32_LITTLE(iv + 0); - x->input[15] = U8TO32_LITTLE(iv + 4); + //x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); + x->input[13] = U8TO32_LITTLE(iv + 0); + x->input[14] = U8TO32_LITTLE(iv + 4); + x->input[15] = U8TO32_LITTLE(iv + 8); } void -chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes) { +chacha_encrypt_bytes(struct chacha_ctx *x, const unsigned char *m, unsigned char *c, uint32_t bytes) { uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; - uint8_t *ctarget = NULL; - uint8_t tmp[64]; + unsigned char *ctarget = NULL; + unsigned char tmp[64]; uint32_t i; if(!bytes) { @@ -110,10 +117,15 @@ chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes for(;;) { if(bytes < 64) { +#if (USE_MEMCPY == 1) + memcpy(tmp, m, bytes); +#else + for(i = 0; i < bytes; ++i) { tmp[i] = m[i]; } +#endif m = tmp; ctarget = c; c = tmp; @@ -207,9 +219,15 @@ chacha_encrypt_bytes(chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes if(bytes <= 64) { if(bytes < 64) { +#if (USE_MEMCPY == 1) + memcpy(ctarget, c, bytes); +#else + for(i = 0; i < bytes; ++i) { ctarget[i] = c[i]; } + +#endif } x->input[12] = j12; diff --git a/src/chacha-poly1305/chacha.h b/src/chacha-poly1305/chacha.h index 103c3d81..a137ab6b 100644 --- a/src/chacha-poly1305/chacha.h +++ b/src/chacha-poly1305/chacha.h @@ -7,18 +7,28 @@ Public domain. #ifndef CHACHA_H #define CHACHA_H +#include +#include +#include +#include + +#define CHACHA_BLOCKLEN 64 + +/* use memcpy() to copy blocks of memory (typically faster) */ +#define USE_MEMCPY 1 +/* use unaligned little-endian load/store (can be faster) */ +#define USE_UNALIGNED 0 + struct chacha_ctx { uint32_t input[16]; }; -#define CHACHA_MINKEYLEN 16 -#define CHACHA_NONCELEN 8 -#define CHACHA_CTRLEN 8 -#define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN) -#define CHACHA_BLOCKLEN 64 +void chacha_keysetup(struct chacha_ctx *x, const unsigned char *k, + uint32_t kbits); +void chacha_ivsetup(struct chacha_ctx *x, const unsigned char *iv, + const unsigned char *ctr); +void chacha_encrypt_bytes(struct chacha_ctx *x, const unsigned char *m, + unsigned char *c, uint32_t bytes); -void chacha_keysetup(struct chacha_ctx *x, const uint8_t *k, uint32_t kbits); -void chacha_ivsetup(struct chacha_ctx *x, const uint8_t *iv, const uint8_t *ctr); -void chacha_encrypt_bytes(struct chacha_ctx *x, const uint8_t *m, uint8_t *c, uint32_t bytes); +#endif /* CHACHA_H */ -#endif /* CHACHA_H */ diff --git a/src/chacha-poly1305/chachapoly.c b/src/chacha-poly1305/chachapoly.c new file mode 100644 index 00000000..9a6620ce --- /dev/null +++ b/src/chacha-poly1305/chachapoly.c @@ -0,0 +1,182 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Grigori Goronzy + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "chachapoly.h" + +/** + * Constant-time memory compare. This should help to protect against + * side-channel attacks. + * + * \param av input 1 + * \param bv input 2 + * \param n bytes to compare + * \return 0 if inputs are equal + */ +static int memcmp_eq(const void *av, const void *bv, int n) { + const unsigned char *a = (const unsigned char *) av; + const unsigned char *b = (const unsigned char *) bv; + unsigned char res = 0; + int i; + + for(i = 0; i < n; i++) { + res |= *a ^ *b; + a++; + b++; + } + + return res; +} + +/** + * Poly1305 tag generation. This concatenates a string according to the rules + * outlined in RFC 7539 and calculates the tag. + * + * \param poly_key 32 byte secret one-time key for poly1305 + * \param ad associated data + * \param ad_len associated data length in bytes + * \param ct ciphertext + * \param ct_len ciphertext length in bytes + * \param tag pointer to 16 bytes for tag storage + */ +static void poly1305_get_tag(unsigned char *poly_key, const void *ad, + int ad_len, const void *ct, int ct_len, unsigned char *tag) { + struct poly1305_context poly; + unsigned left_over; + uint64_t len; + unsigned char pad[16]; + + poly1305_init(&poly, poly_key); + memset(&pad, 0, sizeof(pad)); + + /* associated data and padding */ + poly1305_update(&poly, ad, ad_len); + left_over = ad_len % 16; + + if(left_over) { + poly1305_update(&poly, pad, 16 - left_over); + } + + /* payload and padding */ + poly1305_update(&poly, ct, ct_len); + left_over = ct_len % 16; + + if(left_over) { + poly1305_update(&poly, pad, 16 - left_over); + } + + /* lengths */ + len = ad_len; + poly1305_update(&poly, (unsigned char *)&len, 8); + len = ct_len; + poly1305_update(&poly, (unsigned char *)&len, 8); + + poly1305_finish(&poly, tag); +} + +int chachapoly_init(struct chachapoly_ctx *ctx, const void *key, int key_len) { + assert(key_len == 128 || key_len == 256); + + memset(ctx, 0, sizeof(*ctx)); + chacha_keysetup(&ctx->cha_ctx, key, key_len); + return CHACHAPOLY_OK; +} + +int chachapoly_crypt(struct chachapoly_ctx *ctx, const void *nonce, + const void *ad, int ad_len, void *input, int input_len, + void *output, void *tag, int tag_len, int encrypt) { + unsigned char poly_key[CHACHA_BLOCKLEN]; + unsigned char calc_tag[POLY1305_TAGLEN]; + const unsigned char one[4] = { 1, 0, 0, 0 }; + + /* initialize keystream and generate poly1305 key */ + memset(poly_key, 0, sizeof(poly_key)); + chacha_ivsetup(&ctx->cha_ctx, nonce, NULL); + chacha_encrypt_bytes(&ctx->cha_ctx, poly_key, poly_key, sizeof(poly_key)); + + /* check tag if decrypting */ + if(encrypt == 0 && tag_len) { + poly1305_get_tag(poly_key, ad, ad_len, input, input_len, calc_tag); + + if(memcmp_eq(calc_tag, tag, tag_len) != 0) { + return CHACHAPOLY_INVALID_MAC; + } + } + + /* crypt data */ + chacha_ivsetup(&ctx->cha_ctx, nonce, one); + chacha_encrypt_bytes(&ctx->cha_ctx, (unsigned char *)input, + (unsigned char *)output, input_len); + + /* add tag if encrypting */ + if(encrypt && tag_len) { + poly1305_get_tag(poly_key, ad, ad_len, output, input_len, calc_tag); + memcpy(tag, calc_tag, tag_len); + } + + return CHACHAPOLY_OK; +} + +int chachapoly_crypt_short(struct chachapoly_ctx *ctx, const void *nonce, + const void *ad, int ad_len, void *input, int input_len, + void *output, void *tag, int tag_len, int encrypt) { + unsigned char keystream[CHACHA_BLOCKLEN]; + unsigned char calc_tag[POLY1305_TAGLEN]; + int i; + + assert(input_len <= 32); + + /* initialize keystream and generate poly1305 key */ + memset(keystream, 0, sizeof(keystream)); + chacha_ivsetup(&ctx->cha_ctx, nonce, NULL); + chacha_encrypt_bytes(&ctx->cha_ctx, keystream, keystream, + sizeof(keystream)); + + /* check tag if decrypting */ + if(encrypt == 0 && tag_len) { + poly1305_get_tag(keystream, ad, ad_len, input, input_len, calc_tag); + + if(memcmp_eq(calc_tag, tag, tag_len) != 0) { + return CHACHAPOLY_INVALID_MAC; + } + } + + /* crypt data */ + for(i = 0; i < input_len; i++) { + ((unsigned char *)output)[i] = + ((unsigned char *)input)[i] ^ keystream[32 + i]; + } + + /* add tag if encrypting */ + if(encrypt && tag_len) { + poly1305_get_tag(keystream, ad, ad_len, output, input_len, calc_tag); + memcpy(tag, calc_tag, tag_len); + } + + return CHACHAPOLY_OK; +} diff --git a/src/chacha-poly1305/chachapoly.h b/src/chacha-poly1305/chachapoly.h new file mode 100644 index 00000000..ffc9576d --- /dev/null +++ b/src/chacha-poly1305/chachapoly.h @@ -0,0 +1,82 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Grigori Goronzy + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CHACHAPOLY_H +#define CHACHAPOLY_H + +#include "chacha.h" +#include "poly1305.h" + +#define CHACHAPOLY_OK 0 +#define CHACHAPOLY_INVALID_MAC -1 + +struct chachapoly_ctx { + struct chacha_ctx cha_ctx; +}; + +/** + * Initialize ChaCha20-Poly1305 AEAD. + * For RFC 7539 conformant AEAD, 256 bit keys must be used. + * + * \param ctx context data + * \param key 16 or 32 bytes of key material + * \param key_len key length, 256 or 512 bits + * \return success if 0 + */ +int chachapoly_init(struct chachapoly_ctx *ctx, const void *key, int key_len); + +/** + * Encrypt or decrypt with ChaCha20-Poly1305. The AEAD construction conforms + * to RFC 7539. + * + * \param ctx context data + * \param nonce nonce (12 bytes) + * \param ad associated data + * \param ad_len associated data length in bytes + * \param input plaintext/ciphertext input + * \param input_len input length in bytes; + * \param output plaintext/ciphertext output + * \param tag tag output + * \param tag_len tag length in bytes (0-16); + if 0, authentification is skipped + * \param encrypt decrypt if 0, else encrypt + * \return CHACHAPOLY_OK if no error, CHACHAPOLY_INVALID_MAC if auth + * failed when decrypting + */ +int chachapoly_crypt(struct chachapoly_ctx *ctx, const void *nonce, + const void *ad, int ad_len, void *input, int input_len, + void *output, void *tag, int tag_len, int encrypt); + +/** + * Encrypt or decrypt with Chacha20-Poly1305 for short messages. + * The AEAD construction is different from chachapoly_crypt, but more + * efficient for small messages. Up to 32 bytes can be encrypted. The size + * of associated data is not restricted. The interface is similar to + * chachapoly_crypt. + */ +int chachapoly_crypt_short(struct chachapoly_ctx *ctx, const void *nonce, + const void *ad, int ad_len, void *input, int input_len, + void *output, void *tag, int tag_len, int encrypt); + +#endif diff --git a/src/chacha-poly1305/meson.build b/src/chacha-poly1305/meson.build index d8fd74cc..60a20ab3 100644 --- a/src/chacha-poly1305/meson.build +++ b/src/chacha-poly1305/meson.build @@ -1,5 +1,5 @@ src_chacha_poly = files( - 'chacha-poly1305.c', + 'chachapoly.c', 'chacha.c', 'poly1305.c', ) diff --git a/src/chacha-poly1305/poly1305.c b/src/chacha-poly1305/poly1305.c index 4d99b8c3..0c90564c 100644 --- a/src/chacha-poly1305/poly1305.c +++ b/src/chacha-poly1305/poly1305.c @@ -1,205 +1,302 @@ /* - * Public Domain poly1305 from Andrew Moon - * poly1305-donna-unrolled.c from https://github.com/floodyberry/poly1305-donna - */ - -#include "../system.h" +poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition +public domain +*/ #include "poly1305.h" -#define mul32x32_64(a,b) ((uint64_t)(a) * (b)) - -#define U8TO32_LE(p) \ - (((uint32_t)((p)[0])) | \ - ((uint32_t)((p)[1]) << 8) | \ - ((uint32_t)((p)[2]) << 16) | \ - ((uint32_t)((p)[3]) << 24)) - -#define U32TO8_LE(p, v) \ +#if (USE_UNALIGNED == 1) +#define U8TO32(p) \ + (*((uint32_t *)(p))) +#define U32TO8(p, v) \ do { \ - (p)[0] = (uint8_t)((v)); \ - (p)[1] = (uint8_t)((v) >> 8); \ - (p)[2] = (uint8_t)((v) >> 16); \ - (p)[3] = (uint8_t)((v) >> 24); \ + *((uint32_t *)(p)) = v; \ } while (0) +#else +/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ +static uint32_t +U8TO32(const unsigned char *p) { + return + (((uint32_t)(p[0] & 0xff)) | + ((uint32_t)(p[1] & 0xff) << 8) | + ((uint32_t)(p[2] & 0xff) << 16) | + ((uint32_t)(p[3] & 0xff) << 24)); +} + +/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ +static void +U32TO8(unsigned char *p, uint32_t v) { + p[0] = (v) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; +} +#endif void -poly1305_auth(unsigned char out[POLY1305_TAGLEN], const unsigned char *m, size_t inlen, const unsigned char key[POLY1305_KEYLEN]) { - uint32_t t0, t1, t2, t3; - uint32_t h0, h1, h2, h3, h4; +poly1305_init(struct poly1305_context *st, const unsigned char key[32]) { + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = (U8TO32(&key[ 0])) & 0x3ffffff; + st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03; + st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff; + st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff; + st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + + /* save pad for later */ + st->pad[0] = U8TO32(&key[16]); + st->pad[1] = U8TO32(&key[20]); + st->pad[2] = U8TO32(&key[24]); + st->pad[3] = U8TO32(&key[28]); + + st->leftover = 0; + st->final = 0; +} + +static void +poly1305_blocks(struct poly1305_context *st, const unsigned char *m, size_t bytes) { + const uint32_t hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */ uint32_t r0, r1, r2, r3, r4; uint32_t s1, s2, s3, s4; - uint32_t b, nb; - size_t j; - uint64_t t[5]; - uint64_t f0, f1, f2, f3; - uint32_t g0, g1, g2, g3, g4; - uint64_t c; - unsigned char mp[16]; - - /* clamp key */ - t0 = U8TO32_LE(key + 0); - t1 = U8TO32_LE(key + 4); - t2 = U8TO32_LE(key + 8); - t3 = U8TO32_LE(key + 12); - - /* precompute multipliers */ - r0 = t0 & 0x3ffffff; - t0 >>= 26; - t0 |= t1 << 6; - r1 = t0 & 0x3ffff03; - t1 >>= 20; - t1 |= t2 << 12; - r2 = t1 & 0x3ffc0ff; - t2 >>= 14; - t2 |= t3 << 18; - r3 = t2 & 0x3f03fff; - t3 >>= 8; - r4 = t3 & 0x00fffff; + uint32_t h0, h1, h2, h3, h4; + uint64_t d0, d1, d2, d3, d4; + uint32_t c; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + r3 = st->r[3]; + r4 = st->r[4]; s1 = r1 * 5; s2 = r2 * 5; s3 = r3 * 5; s4 = r4 * 5; - /* init state */ - h0 = 0; - h1 = 0; - h2 = 0; - h3 = 0; - h4 = 0; - - /* full blocks */ - if(inlen < 16) { - goto poly1305_donna_atmost15bytes; - } + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; -poly1305_donna_16bytes: - m += 16; - inlen -= 16; - - t0 = U8TO32_LE(m - 16); - t1 = U8TO32_LE(m - 12); - t2 = U8TO32_LE(m - 8); - t3 = U8TO32_LE(m - 4); - - h0 += t0 & 0x3ffffff; - h1 += ((((uint64_t) t1 << 32) | t0) >> 26) & 0x3ffffff; - h2 += ((((uint64_t) t2 << 32) | t1) >> 20) & 0x3ffffff; - h3 += ((((uint64_t) t3 << 32) | t2) >> 14) & 0x3ffffff; - h4 += (t3 >> 8) | (1 << 24); - -poly1305_donna_mul: - t[0] = mul32x32_64(h0, r0) + mul32x32_64(h1, s4) + mul32x32_64(h2, s3) + mul32x32_64(h3, s2) + mul32x32_64(h4, s1); - t[1] = mul32x32_64(h0, r1) + mul32x32_64(h1, r0) + mul32x32_64(h2, s4) + mul32x32_64(h3, s3) + mul32x32_64(h4, s2); - t[2] = mul32x32_64(h0, r2) + mul32x32_64(h1, r1) + mul32x32_64(h2, r0) + mul32x32_64(h3, s4) + mul32x32_64(h4, s3); - t[3] = mul32x32_64(h0, r3) + mul32x32_64(h1, r2) + mul32x32_64(h2, r1) + mul32x32_64(h3, r0) + mul32x32_64(h4, s4); - t[4] = mul32x32_64(h0, r4) + mul32x32_64(h1, r3) + mul32x32_64(h2, r2) + mul32x32_64(h3, r1) + mul32x32_64(h4, r0); - - h0 = (uint32_t) t[0] & 0x3ffffff; - c = (t[0] >> 26); - t[1] += c; - h1 = (uint32_t) t[1] & 0x3ffffff; - b = (uint32_t)(t[1] >> 26); - t[2] += b; - h2 = (uint32_t) t[2] & 0x3ffffff; - b = (uint32_t)(t[2] >> 26); - t[3] += b; - h3 = (uint32_t) t[3] & 0x3ffffff; - b = (uint32_t)(t[3] >> 26); - t[4] += b; - h4 = (uint32_t) t[4] & 0x3ffffff; - b = (uint32_t)(t[4] >> 26); - h0 += b * 5; - - if(inlen >= 16) { - goto poly1305_donna_16bytes; - } + while(bytes >= POLY1305_BLOCK_SIZE) { + /* h += m[i] */ + h0 += (U8TO32(m + 0)) & 0x3ffffff; + h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff; + h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff; + h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff; + h4 += (U8TO32(m + 12) >> 8) | hibit; - /* final bytes */ -poly1305_donna_atmost15bytes: + /* h *= r */ + d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1); + d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2); + d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) + ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) + ((uint64_t)h4 * s3); + d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) + ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) + ((uint64_t)h4 * s4); + d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) + ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) + ((uint64_t)h4 * r0); - if(!inlen) { - goto poly1305_donna_finish; - } + /* (partial) h %= p */ + c = (uint32_t)(d0 >> 26); + h0 = (uint32_t)d0 & 0x3ffffff; + d1 += c; + c = (uint32_t)(d1 >> 26); + h1 = (uint32_t)d1 & 0x3ffffff; + d2 += c; + c = (uint32_t)(d2 >> 26); + h2 = (uint32_t)d2 & 0x3ffffff; + d3 += c; + c = (uint32_t)(d3 >> 26); + h3 = (uint32_t)d3 & 0x3ffffff; + d4 += c; + c = (uint32_t)(d4 >> 26); + h4 = (uint32_t)d4 & 0x3ffffff; + h0 += c * 5; + c = (h0 >> 26); + h0 = h0 & 0x3ffffff; + h1 += c; - for(j = 0; j < inlen; j++) { - mp[j] = m[j]; + m += POLY1305_BLOCK_SIZE; + bytes -= POLY1305_BLOCK_SIZE; } - mp[j++] = 1; + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; +} - for(; j < 16; j++) { - mp[j] = 0; - } +void +poly1305_finish(struct poly1305_context *st, unsigned char mac[16]) { + uint32_t h0, h1, h2, h3, h4, c; + uint32_t g0, g1, g2, g3, g4; + uint64_t f; + uint32_t mask; - inlen = 0; + /* process the remaining block */ + if(st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; - t0 = U8TO32_LE(mp + 0); - t1 = U8TO32_LE(mp + 4); - t2 = U8TO32_LE(mp + 8); - t3 = U8TO32_LE(mp + 12); + for(; i < POLY1305_BLOCK_SIZE; i++) { + st->buffer[i] = 0; + } - h0 += t0 & 0x3ffffff; - h1 += ((((uint64_t) t1 << 32) | t0) >> 26) & 0x3ffffff; - h2 += ((((uint64_t) t2 << 32) | t1) >> 20) & 0x3ffffff; - h3 += ((((uint64_t) t3 << 32) | t2) >> 14) & 0x3ffffff; - h4 += (t3 >> 8); + st->final = 1; + poly1305_blocks(st, st->buffer, POLY1305_BLOCK_SIZE); + } - goto poly1305_donna_mul; + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; -poly1305_donna_finish: - b = h0 >> 26; - h0 = h0 & 0x3ffffff; - h1 += b; - b = h1 >> 26; + c = h1 >> 26; h1 = h1 & 0x3ffffff; - h2 += b; - b = h2 >> 26; + h2 += c; + c = h2 >> 26; h2 = h2 & 0x3ffffff; - h3 += b; - b = h3 >> 26; + h3 += c; + c = h3 >> 26; h3 = h3 & 0x3ffffff; - h4 += b; - b = h4 >> 26; + h4 += c; + c = h4 >> 26; h4 = h4 & 0x3ffffff; - h0 += b * 5; - b = h0 >> 26; + h0 += c * 5; + c = h0 >> 26; h0 = h0 & 0x3ffffff; - h1 += b; + h1 += c; + /* compute h + -p */ g0 = h0 + 5; - b = g0 >> 26; + c = g0 >> 26; g0 &= 0x3ffffff; - g1 = h1 + b; - b = g1 >> 26; + g1 = h1 + c; + c = g1 >> 26; g1 &= 0x3ffffff; - g2 = h2 + b; - b = g2 >> 26; + g2 = h2 + c; + c = g2 >> 26; g2 &= 0x3ffffff; - g3 = h3 + b; - b = g3 >> 26; + g3 = h3 + c; + c = g3 >> 26; g3 &= 0x3ffffff; - g4 = h4 + b - (1 << 26); - - b = (g4 >> 31) - 1; - nb = ~b; - h0 = (h0 & nb) | (g0 & b); - h1 = (h1 & nb) | (g1 & b); - h2 = (h2 & nb) | (g2 & b); - h3 = (h3 & nb) | (g3 & b); - h4 = (h4 & nb) | (g4 & b); - - f0 = ((h0) | (h1 << 26)) + (uint64_t) U8TO32_LE(&key[16]); - f1 = ((h1 >> 6) | (h2 << 20)) + (uint64_t) U8TO32_LE(&key[20]); - f2 = ((h2 >> 12) | (h3 << 14)) + (uint64_t) U8TO32_LE(&key[24]); - f3 = ((h3 >> 18) | (h4 << 8)) + (uint64_t) U8TO32_LE(&key[28]); - - U32TO8_LE(&out[0], f0); - f1 += (f0 >> 32); - U32TO8_LE(&out[4], f1); - f2 += (f1 >> 32); - U32TO8_LE(&out[8], f2); - f3 += (f2 >> 32); - U32TO8_LE(&out[12], f3); + g4 = h4 + c - (1 << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (uint64_t)h0 + st->pad[0] ; + h0 = (uint32_t)f; + f = (uint64_t)h1 + st->pad[1] + (f >> 32); + h1 = (uint32_t)f; + f = (uint64_t)h2 + st->pad[2] + (f >> 32); + h2 = (uint32_t)f; + f = (uint64_t)h3 + st->pad[3] + (f >> 32); + h3 = (uint32_t)f; + + U32TO8(mac + 0, h0); + U32TO8(mac + 4, h1); + U32TO8(mac + 8, h2); + U32TO8(mac + 12, h3); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->r[3] = 0; + st->r[4] = 0; + st->pad[0] = 0; + st->pad[1] = 0; + st->pad[2] = 0; + st->pad[3] = 0; +} + + +void +poly1305_update(struct poly1305_context *st, const unsigned char *m, size_t bytes) { + size_t i; + + /* handle leftover */ + if(st->leftover) { + size_t want = (POLY1305_BLOCK_SIZE - st->leftover); + + if(want > bytes) { + want = bytes; + } + + for(i = 0; i < want; i++) { + st->buffer[st->leftover + i] = m[i]; + } + + bytes -= want; + m += want; + st->leftover += want; + + if(st->leftover < POLY1305_BLOCK_SIZE) { + return; + } + + poly1305_blocks(st, st->buffer, POLY1305_BLOCK_SIZE); + st->leftover = 0; + } + + /* process full blocks */ + if(bytes >= POLY1305_BLOCK_SIZE) { + size_t want = (bytes & ~(POLY1305_BLOCK_SIZE - 1)); + poly1305_blocks(st, m, want); + m += want; + bytes -= want; + } + + /* store leftover */ + if(bytes) { +#if (USE_MEMCPY == 1) + memcpy(st->buffer + st->leftover, m, bytes); +#else + + for(i = 0; i < bytes; i++) { + st->buffer[st->leftover + i] = m[i]; + } + +#endif + st->leftover += bytes; + } +} + +void +poly1305_auth(unsigned char mac[16], const unsigned char *m, size_t bytes, const unsigned char key[32]) { + struct poly1305_context ctx; + poly1305_init(&ctx, key); + poly1305_update(&ctx, m, bytes); + poly1305_finish(&ctx, mac); } diff --git a/src/chacha-poly1305/poly1305.h b/src/chacha-poly1305/poly1305.h index 4ece415c..624a19a9 100644 --- a/src/chacha-poly1305/poly1305.h +++ b/src/chacha-poly1305/poly1305.h @@ -1,16 +1,32 @@ -/* $OpenBSD: poly1305.h,v 1.2 2013/12/19 22:57:13 djm Exp $ */ - -/* - * Public Domain poly1305 from Andrew Moon - * poly1305-donna-unrolled.c from https://github.com/floodyberry/poly1305-donna - */ - #ifndef POLY1305_H #define POLY1305_H -#define POLY1305_KEYLEN 32 -#define POLY1305_TAGLEN 16 +#include +#include +#include + +#define POLY1305_KEYLEN 32 +#define POLY1305_TAGLEN 16 +#define POLY1305_BLOCK_SIZE 16 + +/* use memcpy() to copy blocks of memory (typically faster) */ +#define USE_MEMCPY 1 +/* use unaligned little-endian load/store (can be faster) */ +#define USE_UNALIGNED 0 + +struct poly1305_context { + uint32_t r[5]; + uint32_t h[5]; + uint32_t pad[4]; + size_t leftover; + unsigned char buffer[POLY1305_BLOCK_SIZE]; + unsigned char final; +}; + +void poly1305_init(struct poly1305_context *ctx, const unsigned char key[32]); +void poly1305_update(struct poly1305_context *ctx, const unsigned char *m, size_t bytes); +void poly1305_finish(struct poly1305_context *ctx, unsigned char mac[16]); +void poly1305_auth(unsigned char mac[16], const unsigned char *m, size_t bytes, const unsigned char key[32]); -void poly1305_auth(uint8_t out[POLY1305_TAGLEN], const uint8_t *m, size_t inlen, const uint8_t key[POLY1305_KEYLEN]); +#endif /* POLY1305_H */ -#endif /* POLY1305_H */ diff --git a/src/sptps.c b/src/sptps.c index 500bf83d..de4ef6ee 100644 --- a/src/sptps.c +++ b/src/sptps.c @@ -1,6 +1,6 @@ /* sptps.c -- Simple Peer-to-Peer Security - Copyright (C) 2011-2015 Guus Sliepen , + Copyright (C) 2011-2021 Guus Sliepen , 2010 Brandon L. Black This program is free software; you can redistribute it and/or modify @@ -20,7 +20,7 @@ #include "system.h" -#include "chacha-poly1305/chacha-poly1305.h" +#include "chacha-poly1305/chachapoly.h" #include "ecdh.h" #include "ecdsa.h" #include "prf.h" @@ -32,6 +32,8 @@ #include #endif +#define CIPHER_KEYLEN 64 + unsigned int sptps_replaywin = 16; /* @@ -113,14 +115,14 @@ static void free_sptps_key(sptps_key_t *key) { } static bool cipher_init(uint8_t suite, void **ctx, const sptps_key_t *keys, bool key_half) { - const uint8_t *key = key_half ? keys->key1 : keys->key0; + const uint8_t *key = key_half ? keys->key1 : keys->key0; switch(suite) { #ifndef HAVE_OPENSSL case SPTPS_CHACHA_POLY1305: - *ctx = chacha_poly1305_init(); - return ctx && chacha_poly1305_set_key(*ctx, key); + *ctx = malloc(sizeof(struct chachapoly_ctx)); + return *ctx && chachapoly_init(*ctx, key, 256) == CHACHAPOLY_OK; #else @@ -157,7 +159,7 @@ static void cipher_exit(uint8_t suite, void *ctx) { #ifndef HAVE_OPENSSL case SPTPS_CHACHA_POLY1305: - chacha_poly1305_exit(ctx); + free(ctx); break; #else @@ -177,9 +179,17 @@ static bool cipher_encrypt(uint8_t suite, void *ctx, uint32_t seqno, const uint8 switch(suite) { #ifndef HAVE_OPENSSL - case SPTPS_CHACHA_POLY1305: - chacha_poly1305_encrypt(ctx, seqno, in, inlen, out, outlen); + case SPTPS_CHACHA_POLY1305: { + if(chachapoly_crypt(ctx, nonce, NULL, 0, (void *)in, inlen, out, out + inlen, 16, 1) != CHACHAPOLY_OK) { + return false; + } + + if(outlen) { + *outlen = inlen + 16; + } + return true; + } #else @@ -224,22 +234,30 @@ static bool cipher_encrypt(uint8_t suite, void *ctx, uint32_t seqno, const uint8 } static bool cipher_decrypt(uint8_t suite, void *ctx, uint32_t seqno, const uint8_t *in, size_t inlen, uint8_t *out, size_t *outlen) { + if(inlen < 16) { + return false; + } + + inlen -= 16; + switch(suite) { #ifndef HAVE_OPENSSL case SPTPS_CHACHA_POLY1305: - return chacha_poly1305_decrypt(ctx, seqno, in, inlen, out, outlen); + if(chachapoly_crypt(ctx, nonce, NULL, 0, (void *)in, inlen, out, (void *)(in + inlen), 16, 0) != CHACHAPOLY_OK) { + return false; + } + + if(outlen) { + *outlen = inlen; + } + + return true; #else case SPTPS_CHACHA_POLY1305: case SPTPS_AES256_GCM: { - if(inlen < 16) { - return false; - } - - inlen -= 16; - uint8_t nonce[12] = {seqno, seqno >> 8, seqno >> 16, seqno >> 24}; if(!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, nonce)) { diff --git a/src/sptps.h b/src/sptps.h index e21804a4..6d01891a 100644 --- a/src/sptps.h +++ b/src/sptps.h @@ -3,7 +3,7 @@ /* sptps.h -- Simple Peer-to-Peer Security - Copyright (C) 2011-2014 Guus Sliepen + Copyright (C) 2011-2021 Guus Sliepen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ #include "system.h" -#include "chacha-poly1305/chacha-poly1305.h" +#include "chacha-poly1305/chachapoly.h" #include "ecdh.h" #include "ecdsa.h" @@ -62,12 +62,15 @@ typedef struct sptps_kex_t sptps_kex_t; STATIC_ASSERT(sizeof(sptps_kex_t) == 68, "sptps_kex_t has invalid size"); +// Big enough to handle a 256 bit key + IV +#define SPTPS_KEYLEN 64 + typedef union sptps_key_t { struct { - uint8_t key0[CHACHA_POLY1305_KEYLEN]; - uint8_t key1[CHACHA_POLY1305_KEYLEN]; + uint8_t key0[SPTPS_KEYLEN]; + uint8_t key1[SPTPS_KEYLEN]; }; - uint8_t both[CHACHA_POLY1305_KEYLEN * 2]; + uint8_t both[SPTPS_KEYLEN * 2]; } sptps_key_t; STATIC_ASSERT(sizeof(sptps_key_t) == 128, "sptps_key_t has invalid size"); diff --git a/src/sptps_test.c b/src/sptps_test.c index 37b5a5af..e113a847 100644 --- a/src/sptps_test.c +++ b/src/sptps_test.c @@ -132,8 +132,8 @@ typedef enum option_t { OPT_SPECIAL_CHAR = 's', OPT_TUN = 't', OPT_VERBOSE = 'v', - OPT_CIPHER_SUITES = 'M', - OPT_PREFERRED_SUITE = 'P', + OPT_CIPHER_SUITES = 'M', + OPT_PREFERRED_SUITE = 'P', OPT_IPV4 = '4', OPT_IPV6 = '6',