[Cryptech-Commits] [sw/libhal] branch rpc updated: Optional (compile time conditional) software hash cores. At the moment this is all-or-nothing, but could easily be tweaked to allow compile-time selection of particular hashes.

git at cryptech.is git at cryptech.is
Wed Mar 9 05:56:49 UTC 2016


This is an automated email from the git hooks/post-receive script.

sra at hactrn.net pushed a commit to branch rpc
in repository sw/libhal.

The following commit(s) were added to refs/heads/rpc by this push:
       new  8db1d75   Optional (compile time conditional) software hash cores.  At the moment this is all-or-nothing, but could easily be tweaked to allow compile-time selection of particular hashes.
8db1d75 is described below

commit 8db1d753745bb7b253cf969ff2fb32464b601bf5
Author: Rob Austein <sra at hactrn.net>
AuthorDate: Wed Mar 9 00:49:13 2016 -0500

    Optional (compile time conditional) software hash cores.  At the
    moment this is all-or-nothing, but could easily be tweaked to allow
    compile-time selection of particular hashes.
---
 hash.c              | 375 +++++++++++++++++++++++++++++++++++++++++++++++++---
 verilog_constants.h |   3 +-
 2 files changed, 358 insertions(+), 20 deletions(-)

diff --git a/hash.c b/hash.c
index 5af53a8..3c5f5d7 100644
--- a/hash.c
+++ b/hash.c
@@ -4,7 +4,7 @@
  * HAL interface to Cryptech hash cores.
  *
  * Authors: Joachim Strömbergson, Paul Selkirk, Rob Austein
- * Copyright (c) 2014-2015, NORDUnet A/S
+ * Copyright (c) 2014-2016, NORDUnet A/S
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,6 +44,31 @@
 #include "hal_internal.h"
 
 /*
+ * Whether to include software implementations of the hash cores,
+ * for use when the Verilog cores aren't available.
+ */
+
+#ifndef HAL_ENABLE_SOFTWARE_HASH_CORES
+#define HAL_ENABLE_SOFTWARE_HASH_CORES 0
+#endif
+
+typedef hal_error_t (*sw_hash_core_t)(hal_hash_state_t *);
+
+#if HAL_ENABLE_SOFTWARE_HASH_CORES
+
+static hal_error_t sw_hash_core_sha1(  hal_hash_state_t *);
+static hal_error_t sw_hash_core_sha256(hal_hash_state_t *);
+static hal_error_t sw_hash_core_sha512(hal_hash_state_t *);
+
+#else /* HAL_ENABLE_SOFTWARE_HASH_CORES */
+
+#define sw_hash_core_sha1       ((sw_hash_core_t) 0)
+#define sw_hash_core_sha256     ((sw_hash_core_t) 0)
+#define sw_hash_core_sha512     ((sw_hash_core_t) 0)
+
+#endif /* HAL_ENABLE_SOFTWARE_HASH_CORES */
+
+/*
  * HMAC magic numbers.
  */
 
@@ -58,9 +83,10 @@
 
 struct hal_hash_driver {
   size_t length_length;                 /* Length of the length field */
-  hal_addr_t block_addr;  		/* Where to write hash blocks */
+  hal_addr_t block_addr;                /* Where to write hash blocks */
   hal_addr_t digest_addr;               /* Where to read digest */
   uint8_t ctrl_mode;                    /* Digest mode, for cores that have modes */
+  sw_hash_core_t sw_core;               /* Software implementation, when enabled */
 };
 
 /*
@@ -81,7 +107,8 @@ struct hal_hash_state {
   unsigned flags;
 };
 
-#define STATE_FLAG_STATE_ALLOCATED 0x1          /* State buffer dynamically allocated */
+#define STATE_FLAG_STATE_ALLOCATED      0x1     /* State buffer dynamically allocated */
+#define STATE_FLAG_SOFTWARE_CORE        0x2     /* Use software rather than hardware core */
 
 /*
  * HMAC state.  Right now this just holds the key block and a hash
@@ -101,27 +128,27 @@ struct hal_hmac_state {
  */
 
 static const hal_hash_driver_t sha1_driver = {
-  SHA1_LENGTH_LEN, SHA1_ADDR_BLOCK, SHA1_ADDR_DIGEST, 0
+  SHA1_LENGTH_LEN, SHA1_ADDR_BLOCK, SHA1_ADDR_DIGEST, 0, sw_hash_core_sha1
 };
 
 static const hal_hash_driver_t sha256_driver = {
-  SHA256_LENGTH_LEN, SHA256_ADDR_BLOCK, SHA256_ADDR_DIGEST, 0
+  SHA256_LENGTH_LEN, SHA256_ADDR_BLOCK, SHA256_ADDR_DIGEST, 0, sw_hash_core_sha256
 };
 
 static const hal_hash_driver_t sha512_224_driver = {
-  SHA512_LENGTH_LEN, SHA512_ADDR_BLOCK, SHA512_ADDR_DIGEST, MODE_SHA_512_224
+  SHA512_LENGTH_LEN, SHA512_ADDR_BLOCK, SHA512_ADDR_DIGEST, MODE_SHA_512_224, sw_hash_core_sha512
 };
 
 static const hal_hash_driver_t sha512_256_driver = {
-  SHA512_LENGTH_LEN, SHA512_ADDR_BLOCK, SHA512_ADDR_DIGEST, MODE_SHA_512_256
+  SHA512_LENGTH_LEN, SHA512_ADDR_BLOCK, SHA512_ADDR_DIGEST, MODE_SHA_512_256, sw_hash_core_sha512
 };
 
 static const hal_hash_driver_t sha384_driver = {
-  SHA512_LENGTH_LEN, SHA512_ADDR_BLOCK, SHA512_ADDR_DIGEST, MODE_SHA_384
+  SHA512_LENGTH_LEN, SHA512_ADDR_BLOCK, SHA512_ADDR_DIGEST, MODE_SHA_384, sw_hash_core_sha512
 };
 
 static const hal_hash_driver_t sha512_driver = {
-  SHA512_LENGTH_LEN, SHA512_ADDR_BLOCK, SHA512_ADDR_DIGEST, MODE_SHA_512
+  SHA512_LENGTH_LEN, SHA512_ADDR_BLOCK, SHA512_ADDR_DIGEST, MODE_SHA_512, sw_hash_core_sha512
 };
 
 /*
@@ -212,12 +239,12 @@ const hal_hash_descriptor_t hal_hash_sha512[1] = {{
  * deeply embedded universe.
  */
 
-#ifndef	HAL_STATIC_HASH_STATE_BLOCKS
-#define	HAL_STATIC_HASH_STATE_BLOCKS 0
+#ifndef HAL_STATIC_HASH_STATE_BLOCKS
+#define HAL_STATIC_HASH_STATE_BLOCKS 0
 #endif
 
-#ifndef	HAL_STATIC_HMAC_STATE_BLOCKS
-#define	HAL_STATIC_HMAC_STATE_BLOCKS 0
+#ifndef HAL_STATIC_HMAC_STATE_BLOCKS
+#define HAL_STATIC_HMAC_STATE_BLOCKS 0
 #endif
 
 #if HAL_STATIC_HASH_STATE_BLOCKS > 0
@@ -290,10 +317,26 @@ static inline const hal_hash_driver_t *check_driver(const hal_hash_descriptor_t
  */
 
 static inline hal_error_t check_core(const hal_core_t **core,
-                                     const hal_hash_descriptor_t * const descriptor)
+                                     const hal_hash_descriptor_t * const descriptor,
+                                     unsigned *flags)
 {
   assert(descriptor != NULL && descriptor->driver != NULL);
-  return hal_core_check_name(core, descriptor->core_name);
+
+  hal_error_t err = hal_core_check_name(core, descriptor->core_name);
+
+#if HAL_ENABLE_SOFTWARE_HASH_CORES
+
+  if (err == HAL_ERROR_CORE_NOT_FOUND && descriptor->driver->sw_core) {
+
+    if (flags != NULL)
+      *flags |= STATE_FLAG_SOFTWARE_CORE;
+
+    err = HAL_OK;
+  }
+
+#endif /* HAL_ENABLE_SOFTWARE_HASH_CORES */
+
+  return err;
 }
 
 /*
@@ -307,6 +350,7 @@ hal_error_t hal_hash_initialize(const hal_core_t *core,
 {
   const hal_hash_driver_t * const driver = check_driver(descriptor);
   hal_hash_state_t *state = state_buffer;
+  unsigned flags = 0;
   hal_error_t err;
 
   if (driver == NULL || state_ == NULL)
@@ -315,7 +359,7 @@ hal_error_t hal_hash_initialize(const hal_core_t *core,
   if (state_buffer != NULL && state_length < descriptor->hash_state_length)
     return HAL_ERROR_BAD_ARGUMENTS;
 
-  if ((err = check_core(&core, descriptor)) != HAL_OK)
+  if ((err = check_core(&core, descriptor, &flags)) != HAL_OK)
     return err;
 
   if (state_buffer == NULL && (state = alloc_static_hash_state()) == NULL)
@@ -325,6 +369,7 @@ hal_error_t hal_hash_initialize(const hal_core_t *core,
   state->descriptor = descriptor;
   state->driver = driver;
   state->core = core;
+  state->flags = flags;
     
   if (state_buffer == NULL)
     state->flags |= STATE_FLAG_STATE_ALLOCATED;
@@ -409,6 +454,9 @@ static hal_error_t hash_write_block(hal_hash_state_t * const state)
   if (debug)
     fprintf(stderr, "[ %s ]\n", state->block_count == 0 ? "init" : "next");
 
+  if (HAL_ENABLE_SOFTWARE_HASH_CORES && (state->flags & STATE_FLAG_SOFTWARE_CORE) != 0)
+    return state->driver->sw_core(state);
+
   if ((err = hal_io_wait_ready(state->core)) != HAL_OK)
     return err;
 
@@ -497,7 +545,7 @@ hal_error_t hal_hash_update(hal_hash_state_t *state,            /* Opaque state
  * Finish hash and return digest.
  */
 
-hal_error_t hal_hash_finalize(hal_hash_state_t *state,            	/* Opaque state block */
+hal_error_t hal_hash_finalize(hal_hash_state_t *state,                  /* Opaque state block */
                               uint8_t *digest_buffer,                   /* Returned digest */
                               const size_t digest_buffer_length)        /* Length of digest_buffer */
 {
@@ -565,7 +613,9 @@ hal_error_t hal_hash_finalize(hal_hash_state_t *state,            	/* Opaque sta
   state->block_count++;
 
   /* All data pushed to core, now we just need to read back the result */
-  if ((err = hash_read_digest(state->core, state->driver, digest_buffer, state->descriptor->digest_length)) != HAL_OK)
+  if (HAL_ENABLE_SOFTWARE_HASH_CORES && (state->flags & STATE_FLAG_SOFTWARE_CORE) != 0)
+    memcpy(digest_buffer, state->core_state, state->descriptor->digest_length);
+  else if ((err = hash_read_digest(state->core, state->driver, digest_buffer, state->descriptor->digest_length)) != HAL_OK)
     return err;
 
   return HAL_OK;
@@ -592,7 +642,7 @@ hal_error_t hal_hmac_initialize(const hal_core_t *core,
   if (state_buffer != NULL && state_length < descriptor->hmac_state_length)
     return HAL_ERROR_BAD_ARGUMENTS;
 
-  if ((err = check_core(&core, descriptor)) != HAL_OK)
+  if ((err = check_core(&core, descriptor, NULL)) != HAL_OK)
     return err;
 
   if (state_buffer == NULL && (state = alloc_static_hmac_state()) == NULL)
@@ -755,6 +805,293 @@ const hal_hash_descriptor_t *hal_hmac_get_descriptor(const hal_hmac_state_t * co
   return state == NULL ? NULL : state->hash_state.descriptor;
 }
 
+#if HAL_ENABLE_SOFTWARE_HASH_CORES
+
+/*
+ * Software implementations of hash cores.
+ *
+ * This is based in part on a mix of Tom St Denis's libtomcrypt C
+ * implementation and Joachim Str�mbergson's Python models for the
+ * Cryptech hash cores.
+ *
+ * This is not a particularly high performance implementation, as
+ * we've given priority to portability and simplicity over speed.
+ * We assume that any reasonable modern compiler can handle inline
+ * functions, loop unrolling, and optimization of expressions which
+ * become constant upon inlining and unrolling.
+ */
+
+/*
+ * K constants for SHA-2.  SHA-1 only uses four K constants, which are handled inline
+ * due to other peculiarities of the SHA-1 algorithm).
+ */
+
+static const uint32_t sha256_K[64] = {
+    0x428A2F98UL, 0x71374491UL, 0xB5C0FBCFUL, 0xE9B5DBA5UL, 0x3956C25BUL, 0x59F111F1UL, 0x923F82A4UL, 0xAB1C5ED5UL,
+    0xD807AA98UL, 0x12835B01UL, 0x243185BEUL, 0x550C7DC3UL, 0x72BE5D74UL, 0x80DEB1FEUL, 0x9BDC06A7UL, 0xC19BF174UL,
+    0xE49B69C1UL, 0xEFBE4786UL, 0x0FC19DC6UL, 0x240CA1CCUL, 0x2DE92C6FUL, 0x4A7484AAUL, 0x5CB0A9DCUL, 0x76F988DAUL,
+    0x983E5152UL, 0xA831C66DUL, 0xB00327C8UL, 0xBF597FC7UL, 0xC6E00BF3UL, 0xD5A79147UL, 0x06CA6351UL, 0x14292967UL,
+    0x27B70A85UL, 0x2E1B2138UL, 0x4D2C6DFCUL, 0x53380D13UL, 0x650A7354UL, 0x766A0ABBUL, 0x81C2C92EUL, 0x92722C85UL,
+    0xA2BFE8A1UL, 0xA81A664BUL, 0xC24B8B70UL, 0xC76C51A3UL, 0xD192E819UL, 0xD6990624UL, 0xF40E3585UL, 0x106AA070UL,
+    0x19A4C116UL, 0x1E376C08UL, 0x2748774CUL, 0x34B0BCB5UL, 0x391C0CB3UL, 0x4ED8AA4AUL, 0x5B9CCA4FUL, 0x682E6FF3UL,
+    0x748F82EEUL, 0x78A5636FUL, 0x84C87814UL, 0x8CC70208UL, 0x90BEFFFAUL, 0xA4506CEBUL, 0xBEF9A3F7UL, 0xC67178F2UL
+};
+
+static const uint64_t sha512_K[80] = {
+  0x428A2F98D728AE22ULL, 0x7137449123EF65CDULL, 0xB5C0FBCFEC4D3B2FULL, 0xE9B5DBA58189DBBCULL,
+  0x3956C25BF348B538ULL, 0x59F111F1B605D019ULL, 0x923F82A4AF194F9BULL, 0xAB1C5ED5DA6D8118ULL,
+  0xD807AA98A3030242ULL, 0x12835B0145706FBEULL, 0x243185BE4EE4B28CULL, 0x550C7DC3D5FFB4E2ULL,
+  0x72BE5D74F27B896FULL, 0x80DEB1FE3B1696B1ULL, 0x9BDC06A725C71235ULL, 0xC19BF174CF692694ULL,
+  0xE49B69C19EF14AD2ULL, 0xEFBE4786384F25E3ULL, 0x0FC19DC68B8CD5B5ULL, 0x240CA1CC77AC9C65ULL,
+  0x2DE92C6F592B0275ULL, 0x4A7484AA6EA6E483ULL, 0x5CB0A9DCBD41FBD4ULL, 0x76F988DA831153B5ULL,
+  0x983E5152EE66DFABULL, 0xA831C66D2DB43210ULL, 0xB00327C898FB213FULL, 0xBF597FC7BEEF0EE4ULL,
+  0xC6E00BF33DA88FC2ULL, 0xD5A79147930AA725ULL, 0x06CA6351E003826FULL, 0x142929670A0E6E70ULL,
+  0x27B70A8546D22FFCULL, 0x2E1B21385C26C926ULL, 0x4D2C6DFC5AC42AEDULL, 0x53380D139D95B3DFULL,
+  0x650A73548BAF63DEULL, 0x766A0ABB3C77B2A8ULL, 0x81C2C92E47EDAEE6ULL, 0x92722C851482353BULL,
+  0xA2BFE8A14CF10364ULL, 0xA81A664BBC423001ULL, 0xC24B8B70D0F89791ULL, 0xC76C51A30654BE30ULL,
+  0xD192E819D6EF5218ULL, 0xD69906245565A910ULL, 0xF40E35855771202AULL, 0x106AA07032BBD1B8ULL,
+  0x19A4C116B8D2D0C8ULL, 0x1E376C085141AB53ULL, 0x2748774CDF8EEB99ULL, 0x34B0BCB5E19B48A8ULL,
+  0x391C0CB3C5C95A63ULL, 0x4ED8AA4AE3418ACBULL, 0x5B9CCA4F7763E373ULL, 0x682E6FF3D6B2B8A3ULL,
+  0x748F82EE5DEFB2FCULL, 0x78A5636F43172F60ULL, 0x84C87814A1F0AB72ULL, 0x8CC702081A6439ECULL,
+  0x90BEFFFA23631E28ULL, 0xA4506CEBDE82BDE9ULL, 0xBEF9A3F7B2C67915ULL, 0xC67178F2E372532BULL,
+  0xCA273ECEEA26619CULL, 0xD186B8C721C0C207ULL, 0xEADA7DD6CDE0EB1EULL, 0xF57D4F7FEE6ED178ULL,
+  0x06F067AA72176FBAULL, 0x0A637DC5A2C898A6ULL, 0x113F9804BEF90DAEULL, 0x1B710B35131C471BULL,
+  0x28DB77F523047D84ULL, 0x32CAAB7B40C72493ULL, 0x3C9EBE0A15C9BEBCULL, 0x431D67C49C100D4CULL,
+  0x4CC5D4BECB3E42B6ULL, 0x597F299CFC657E2AULL, 0x5FCB6FAB3AD6FAECULL, 0x6C44198C4A475817ULL
+};
+
+/*
+ * Various bit twiddling operations.  We use inline functions rather than macros to get better
+ * data type checking, sane argument semantics, and simpler expressions (this stuff is
+ * confusing enough without adding a lot of unnecessary C macro baggage).
+ */
+
+static inline uint32_t rot_l_32(uint32_t x, unsigned n) { assert(n < 32); return ((x << n) | (x >> (32 - n))); }
+static inline uint32_t rot_r_32(uint32_t x, unsigned n) { assert(n < 32); return ((x >> n) | (x << (32 - n))); }
+static inline uint32_t lsh_r_32(uint32_t x, unsigned n) { assert(n < 32); return (x >> n); }
+
+static inline uint64_t rot_r_64(uint64_t x, unsigned n) { assert(n < 64); return ((x >> n) | (x << (64 - n))); }
+static inline uint64_t lsh_r_64(uint64_t x, unsigned n) { assert(n < 64); return (x >> n); }
+
+static inline uint32_t Choose_32(  uint32_t x, uint32_t y, uint32_t z) { return (z ^ (x & (y ^ z)));       }
+static inline uint32_t Majority_32(uint32_t x, uint32_t y, uint32_t z) { return ((x & y) | (z & (x | y))); }
+static inline uint32_t Parity_32(  uint32_t x, uint32_t y, uint32_t z) { return (x ^ y ^ z);               }
+
+static inline uint64_t Choose_64(  uint64_t x, uint64_t y, uint64_t z) { return (z ^ (x & (y ^ z)));       }
+static inline uint64_t Majority_64(uint64_t x, uint64_t y, uint64_t z) { return ((x & y) | (z & (x | y))); }
+
+static inline uint32_t Sigma0_32(uint32_t x) { return rot_r_32(x,  2) ^ rot_r_32(x, 13) ^ rot_r_32(x, 22); }
+static inline uint32_t Sigma1_32(uint32_t x) { return rot_r_32(x,  6) ^ rot_r_32(x, 11) ^ rot_r_32(x, 25); }
+static inline uint32_t Gamma0_32(uint32_t x) { return rot_r_32(x,  7) ^ rot_r_32(x, 18) ^ lsh_r_32(x,  3); }
+static inline uint32_t Gamma1_32(uint32_t x) { return rot_r_32(x, 17) ^ rot_r_32(x, 19) ^ lsh_r_32(x, 10); }
+
+static inline uint64_t Sigma0_64(uint64_t x) { return rot_r_64(x, 28) ^ rot_r_64(x, 34) ^ rot_r_64(x, 39); }
+static inline uint64_t Sigma1_64(uint64_t x) { return rot_r_64(x, 14) ^ rot_r_64(x, 18) ^ rot_r_64(x, 41); }
+static inline uint64_t Gamma0_64(uint64_t x) { return rot_r_64(x,  1) ^ rot_r_64(x,  8) ^ lsh_r_64(x,  7); }
+static inline uint64_t Gamma1_64(uint64_t x) { return rot_r_64(x, 19) ^ rot_r_64(x, 61) ^ lsh_r_64(x,  6); }
+
+/*
+ * Offset into hash state.  In theory, this should works out to compile-time constants after optimization.
+ */
+
+static inline int sha1_pos(int i, int j) { assert(i >= 0 && j >= 0 && j < 5); return (5 + j - (i % 5)) % 5; }
+static inline int sha2_pos(int i, int j) { assert(i >= 0 && j >= 0 && j < 8); return (8 + j - (i % 8)) % 8; }
+
+/*
+ * Byte-swapping version of memcpy() (sigh).
+ */
+
+static inline void swytebop(void *out_, const void * const in_, const size_t n, const size_t w)
+{
+  const uint8_t  order[] = { 0x01, 0x02, 0x03, 0x04 };
+
+  const uint8_t * const in = in_;
+  uint8_t *out = out_;
+
+  assert(in != out && in != NULL && out != NULL && w % 4 == 0 && n % w == 0);
+  
+  switch (* (uint32_t *) order) {
+
+  case 0x01020304:
+    memcpy(out, in, n);
+    return;
+
+  case 0x04030201:
+    for (int i = 0; i < n; i += w)
+      for (int j = 0; j < w; j++)
+        out[i + j] = in[i + w - j - 1];
+    return;
+
+  default:
+    assert((* (uint32_t *) order) == 0x01020304 || (* (uint32_t *) order) == 0x04030201);
+  }
+}
+
+/*
+ * Software implementation of SHA-1 block algorithm.
+ */
+
+static hal_error_t sw_hash_core_sha1(hal_hash_state_t *state)
+{
+  static const uint32_t iv[5] = {0x67452301UL, 0xefcdab89UL, 0x98badcfeUL, 0x10325476UL, 0xc3d2e1f0UL};
+
+  if (state == NULL)
+    return HAL_ERROR_BAD_ARGUMENTS;
+
+  uint32_t H[5], S[5], W[80];
+
+  if (state->block_count == 0)
+    memcpy(H, iv, sizeof(iv));
+  else
+    swytebop(H, state->core_state, sizeof(H), sizeof(*H));
+
+  memcpy(S, H, sizeof(H));
+
+  swytebop(W, state->block, 16 * sizeof(*W), sizeof(*W));
+
+  for (int i = 16; i < 80; i++)
+    W[i] = rot_l_32(W[i - 3] ^ W[i - 8] ^ W[i - 14] ^ W[i - 16], 1); 
+
+  for (int i = 0; i < 80; i++) {
+    const int a = sha1_pos(i, 0), b = sha1_pos(i, 1), c = sha1_pos(i, 2), d = sha1_pos(i, 3), e = sha1_pos(i, 4);
+
+    uint32_t f, k;
+    if (i < 20)         f = Choose_32(   S[b], S[c], S[d]), k = 0x5A827999UL;
+    else if (i < 40)    f = Parity_32(   S[b], S[c], S[d]), k = 0x6ED9EBA1UL;
+    else if (i < 60)    f = Majority_32( S[b], S[c], S[d]), k = 0x8F1BBCDCUL;
+    else                f = Parity_32(   S[b], S[c], S[d]), k = 0xCA62C1D6UL;
+
+    if (debug)
+      fprintf(stderr,
+              "[Round %02d < a = 0x%08x, b = 0x%08x, c = 0x%08x, d = 0x%08x, e = 0x%08x, f = 0x%08x, k = 0x%08x, w = 0x%08x]\n",
+              i, S[a], S[b], S[c], S[d], S[e], f, k, W[i]);
+
+    S[e] = rot_l_32(S[a], 5) + f + S[e] + k + W[i];
+    S[b] = rot_l_32(S[b], 30);
+
+    if (debug)
+      fprintf(stderr, "[Round %02d > a = 0x%08x, b = 0x%08x, c = 0x%08x, d = 0x%08x, e = 0x%08x]\n",
+              i, S[a], S[b], S[c], S[d], S[e]);
+  }
+
+  for (int i = 0; i < 5; i++)
+    H[i] += S[i];
+
+  swytebop(state->core_state, H, sizeof(H), sizeof(*H));
+
+  return HAL_OK;
+}
+
+/*
+ * Software implementation of SHA-256 block algorithm; doesn't support truncated variants because
+ * the Cryptech Verilog implementation doesn't.
+ */
+
+static hal_error_t sw_hash_core_sha256(hal_hash_state_t *state)
+{
+  static const uint32_t iv[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+                                 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL};
+
+  if (state == NULL)
+    return HAL_ERROR_BAD_ARGUMENTS;
+
+  uint32_t H[8], S[8], W[64];
+
+  if (state->block_count == 0)
+    memcpy(H, iv, sizeof(iv));
+  else
+    swytebop(H, state->core_state, sizeof(H), sizeof(*H));
+
+  memcpy(S, H, sizeof(H));
+
+  swytebop(W, state->block, 16 * sizeof(*W), sizeof(*W));
+
+  for (int i = 16; i < 64; i++)
+    W[i] = Gamma1_32(W[i - 2]) + W[i - 7] + Gamma0_32(W[i - 15]) + W[i - 16];
+
+  for (int i = 0; i < 64; i++) {
+    const int a = sha2_pos(i, 0), b = sha2_pos(i, 1), c = sha2_pos(i, 2), d = sha2_pos(i, 3);
+    const int e = sha2_pos(i, 4), f = sha2_pos(i, 5), g = sha2_pos(i, 6), h = sha2_pos(i, 7);
+
+    const uint32_t t0 = S[h] + Sigma1_32(S[e]) + Choose_32(S[e], S[f], S[g]) + sha256_K[i] + W[i];
+    const uint32_t t1 = Sigma0_32(S[a]) + Majority_32(S[a], S[b], S[c]);
+
+    S[d] += t0;
+    S[h] = t0 + t1;
+  }
+
+  for (int i = 0; i < 8; i++)
+    H[i] += S[i];
+
+  swytebop(state->core_state, H, sizeof(H), sizeof(*H));
+
+  return HAL_OK;
+}
+
+/*
+ * Software implementation of SHA-512 block algorithm, including support for same truncated variants
+ * that the Cryptech Verilog SHA-512 core supports.
+ */
+
+static hal_error_t sw_hash_core_sha512(hal_hash_state_t *state)
+{
+  static const uint64_t
+    sha512_iv[8]     = {0x6A09E667F3BCC908ULL, 0xBB67AE8584CAA73BULL, 0x3C6EF372FE94F82BULL, 0xA54FF53A5F1D36F1ULL,
+                        0x510E527FADE682D1ULL, 0x9B05688C2B3E6C1FULL, 0x1F83D9ABFB41BD6BULL, 0x5BE0CD19137E2179ULL};
+  static const uint64_t
+    sha384_iv[8]     = {0xCBBB9D5DC1059ED8ULL, 0x629A292A367CD507ULL, 0x9159015A3070DD17ULL, 0x152FECD8F70E5939ULL,
+                        0x67332667FFC00B31ULL, 0x8EB44A8768581511ULL, 0xDB0C2E0D64F98FA7ULL, 0x47B5481DBEFA4FA4ULL};
+  static const uint64_t
+    sha512_224_iv[8] = {0x8C3D37C819544DA2ULL, 0x73E1996689DCD4D6ULL, 0x1DFAB7AE32FF9C82ULL, 0x679DD514582F9FCFULL,
+                        0x0F6D2B697BD44DA8ULL, 0x77E36F7304C48942ULL, 0x3F9D85A86A1D36C8ULL, 0x1112E6AD91D692A1ULL};
+  static const uint64_t
+    sha512_256_iv[8] = {0x22312194FC2BF72CULL, 0x9F555FA3C84C64C2ULL, 0x2393B86B6F53B151ULL, 0x963877195940EABDULL,
+                        0x96283EE2A88EFFE3ULL, 0xBE5E1E2553863992ULL, 0x2B0199FC2C85B8AAULL, 0x0EB72DDC81C52CA2ULL};
+
+  if (state == NULL)
+    return HAL_ERROR_BAD_ARGUMENTS;
+
+  uint64_t H[8], S[8], W[80];
+
+  if (state->block_count == 0)
+    switch (state->driver->ctrl_mode & MODE_SHA_MASK) {
+    case MODE_SHA_512_224:      memcpy(H, sha512_224_iv, sizeof(sha512_224_iv)); break;
+    case MODE_SHA_512_256:      memcpy(H, sha512_256_iv, sizeof(sha512_256_iv)); break;
+    case MODE_SHA_384:          memcpy(H, sha384_iv,     sizeof(sha384_iv));     break;
+    case MODE_SHA_512:          memcpy(H, sha512_iv,     sizeof(sha512_iv));     break;
+    default:                    return HAL_ERROR_IMPOSSIBLE;
+    }
+  else
+    swytebop(H, state->core_state, sizeof(H), sizeof(*H));
+
+  memcpy(S, H, sizeof(H));
+
+  swytebop(W, state->block, 16 * sizeof(*W), sizeof(*W));
+
+  for (int i = 16; i < 80; i++)
+    W[i] = Gamma1_64(W[i - 2]) + W[i - 7] + Gamma0_64(W[i - 15]) + W[i - 16];
+
+  for (int i = 0; i < 80; i++) {
+    const int a = sha2_pos(i, 0), b = sha2_pos(i, 1), c = sha2_pos(i, 2), d = sha2_pos(i, 3);
+    const int e = sha2_pos(i, 4), f = sha2_pos(i, 5), g = sha2_pos(i, 6), h = sha2_pos(i, 7);
+
+    const uint64_t t0 = S[h] + Sigma1_64(S[e]) + Choose_64(S[e], S[f], S[g]) + sha512_K[i] + W[i];
+    const uint64_t t1 = Sigma0_64(S[a]) + Majority_64(S[a], S[b], S[c]);
+
+    S[d] += t0;
+    S[h] = t0 + t1;
+  }
+
+  for (int i = 0; i < 8; i++)
+    H[i] += S[i];
+
+  swytebop(state->core_state, H, sizeof(H), sizeof(*H));
+
+  return HAL_OK;
+}
+
+#endif /* HAL_ENABLE_SOFTWARE_HASH_CORES */
+
 /*
  * "Any programmer who fails to comply with the standard naming, formatting,
  *  or commenting conventions should be shot.  If it so happens that it is
diff --git a/verilog_constants.h b/verilog_constants.h
index 879d2af..dfd102a 100644
--- a/verilog_constants.h
+++ b/verilog_constants.h
@@ -8,7 +8,7 @@
  * hand-edited.
  *
  * Authors: Joachim Strombergson, Paul Selkirk, Rob Austein
- * Copyright (c) 2015, NORDUnet A/S All rights reserved.
+ * Copyright (c) 2015-2016, NORDUnet A/S All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
@@ -81,6 +81,7 @@
 #define MODE_SHA_512_256        (1 << 2)
 #define MODE_SHA_384            (2 << 2)
 #define MODE_SHA_512            (3 << 2)
+#define	MODE_SHA_MASK   	(3 << 2)
 
 /*
  * RNG cores.

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Commits mailing list