Commit c80ae7ca authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu

crypto: arm/sha512 - accelerated SHA-512 using ARM generic ASM and NEON

This replaces the SHA-512 NEON module with the faster and more
versatile implementation from the OpenSSL project. It consists
of both a NEON and a generic ASM version of the core SHA-512
transform, where the NEON version reverts to the ASM version
when invoked in non-process context.

This patch is based on the OpenSSL upstream version b1a5d1c65208
of sha512-armv4.pl, which can be found here:

  https://git.openssl.org/gitweb/?p=openssl.git;h=b1a5d1c65208

Performance relative to the generic implementation (measured
using tcrypt.ko mode=306 sec=1 running on a Cortex-A57 under
KVM):

  input size	block size	asm	neon	old neon

  16		16		1.39	2.54	2.21
  64		16		1.32	2.33	2.09
  64		64		1.38	2.53	2.19
  256		16		1.31	2.28	2.06
  256		64		1.38	2.54	2.25
  256		256		1.40	2.77	2.39
  1024		16		1.29	2.22	2.01
  1024		256		1.40	2.82	2.45
  1024		1024		1.41	2.93	2.53
  2048		16		1.33	2.21	2.00
  2048		256		1.40	2.84	2.46
  2048		1024		1.41	2.96	2.55
  2048		2048		1.41	2.98	2.56
  4096		16		1.34	2.20	1.99
  4096		256		1.40	2.84	2.46
  4096		1024		1.41	2.97	2.56
  4096		4096		1.41	3.01	2.58
  8192		16		1.34	2.19	1.99
  8192		256		1.40	2.85	2.47
  8192		1024		1.41	2.98	2.56
  8192		4096		1.41	2.71	2.59
  8192		8192		1.51	3.51	2.69
Acked-by: default avatarJussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 551d7ed2
......@@ -53,20 +53,13 @@ config CRYPTO_SHA256_ARM
SHA-256 secure hash standard (DFIPS 180-2) implemented
using optimized ARM assembler and NEON, when available.
config CRYPTO_SHA512_ARM_NEON
tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
depends on KERNEL_MODE_NEON
select CRYPTO_SHA512
config CRYPTO_SHA512_ARM
tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)"
select CRYPTO_HASH
depends on !CPU_V7M
help
SHA-512 secure hash standard (DFIPS 180-2) implemented
using ARM NEON instructions, when available.
This version of SHA implements a 512 bit hash with 256 bits of
security against collision attacks.
This code also includes SHA-384, a 384 bit hash with 192 bits
of security against collision attacks.
using optimized ARM assembler and NEON, when available.
config CRYPTO_AES_ARM
tristate "AES cipher algorithms (ARM-asm)"
......
......@@ -7,7 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
......@@ -30,7 +30,8 @@ sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
......@@ -45,4 +46,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
$(call cmd,perl)
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* sha512-glue.c - accelerated SHA-384/512 for ARM
*
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha512_base.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include "sha512.h"
MODULE_DESCRIPTION("Accelerated SHA-384/SHA-512 secure hash for ARM");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("sha384");
MODULE_ALIAS_CRYPTO("sha512");
MODULE_ALIAS_CRYPTO("sha384-arm");
MODULE_ALIAS_CRYPTO("sha512-arm");
asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks);
int sha512_arm_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha512_base_do_update(desc, data, len,
(sha512_block_fn *)sha512_block_data_order);
}
int sha512_arm_final(struct shash_desc *desc, u8 *out)
{
sha512_base_do_finalize(desc,
(sha512_block_fn *)sha512_block_data_order);
return sha512_base_finish(desc, out);
}
int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
sha512_base_do_update(desc, data, len,
(sha512_block_fn *)sha512_block_data_order);
return sha512_arm_final(desc, out);
}
static struct shash_alg sha512_arm_algs[] = { {
.init = sha384_base_init,
.update = sha512_arm_update,
.final = sha512_arm_final,
.finup = sha512_arm_finup,
.descsize = sizeof(struct sha512_state),
.digestsize = SHA384_DIGEST_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-arm",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.init = sha512_base_init,
.update = sha512_arm_update,
.final = sha512_arm_final,
.finup = sha512_arm_finup,
.descsize = sizeof(struct sha512_state),
.digestsize = SHA512_DIGEST_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-arm",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha512_arm_mod_init(void)
{
int err;
err = crypto_register_shashes(sha512_arm_algs,
ARRAY_SIZE(sha512_arm_algs));
if (err)
return err;
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
err = crypto_register_shashes(sha512_neon_algs,
ARRAY_SIZE(sha512_neon_algs));
if (err)
goto err_unregister;
}
return 0;
err_unregister:
crypto_unregister_shashes(sha512_arm_algs,
ARRAY_SIZE(sha512_arm_algs));
return err;
}
static void __exit sha512_arm_mod_fini(void)
{
crypto_unregister_shashes(sha512_arm_algs,
ARRAY_SIZE(sha512_arm_algs));
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
crypto_unregister_shashes(sha512_neon_algs,
ARRAY_SIZE(sha512_neon_algs));
}
module_init(sha512_arm_mod_init);
module_exit(sha512_arm_mod_fini);
/*
* sha512-neon-glue.c - accelerated SHA-384/512 for ARM NEON
*
* Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha512_base.h>
#include <linux/crypto.h>
#include <linux/module.h>
#include <asm/simd.h>
#include <asm/neon.h>
#include "sha512.h"
MODULE_ALIAS_CRYPTO("sha384-neon");
MODULE_ALIAS_CRYPTO("sha512-neon");
asmlinkage void sha512_block_data_order_neon(u64 *state, u8 const *src,
int blocks);
static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
if (!may_use_simd() ||
(sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
return sha512_arm_update(desc, data, len);
kernel_neon_begin();
sha512_base_do_update(desc, data, len,
(sha512_block_fn *)sha512_block_data_order_neon);
kernel_neon_end();
return 0;
}
static int sha512_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
if (!may_use_simd())
return sha512_arm_finup(desc, data, len, out);
kernel_neon_begin();
if (len)
sha512_base_do_update(desc, data, len,
(sha512_block_fn *)sha512_block_data_order_neon);
sha512_base_do_finalize(desc,
(sha512_block_fn *)sha512_block_data_order_neon);
kernel_neon_end();
return sha512_base_finish(desc, out);
}
static int sha512_neon_final(struct shash_desc *desc, u8 *out)
{
return sha512_neon_finup(desc, NULL, 0, out);
}
struct shash_alg sha512_neon_algs[] = { {
.init = sha384_base_init,
.update = sha512_neon_update,
.final = sha512_neon_final,
.finup = sha512_neon_finup,
.descsize = sizeof(struct sha512_state),
.digestsize = SHA384_DIGEST_SIZE,
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-neon",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.init = sha512_base_init,
.update = sha512_neon_update,
.final = sha512_neon_final,
.finup = sha512_neon_finup,
.descsize = sizeof(struct sha512_state),
.digestsize = SHA512_DIGEST_SIZE,
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-neon",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
int sha512_arm_update(struct shash_desc *desc, const u8 *data,
unsigned int len);
int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out);
extern struct shash_alg sha512_neon_algs[2];
/*
* Glue code for the SHA512 Secure Hash Algorithm assembly implementation
* using NEON instructions.
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is based on sha512_ssse3_glue.c:
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/simd.h>
#include <asm/neon.h>
static const u64 sha512_k[] = {
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
};
asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
const u64 k[], unsigned int num_blks);
static int sha512_neon_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA512_H0;
sctx->state[1] = SHA512_H1;
sctx->state[2] = SHA512_H2;
sctx->state[3] = SHA512_H3;
sctx->state[4] = SHA512_H4;
sctx->state[5] = SHA512_H5;
sctx->state[6] = SHA512_H6;
sctx->state[7] = SHA512_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count[0] += len;
if (sctx->count[0] < len)
sctx->count[1]++;
if (partial) {
done = SHA512_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
}
if (len - done >= SHA512_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
sha512_transform_neon(sctx->state, data + done, sha512_k,
rounds);
done += rounds * SHA512_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA512_BLOCK_SIZE) {
sctx->count[0] += len;
if (sctx->count[0] < len)
sctx->count[1]++;
memcpy(sctx->buf + partial, data, len);
return 0;
}
if (!may_use_simd()) {
res = crypto_sha512_update(desc, data, len);
} else {
kernel_neon_begin();
res = __sha512_neon_update(desc, data, len, partial);
kernel_neon_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha512_neon_final(struct shash_desc *desc, u8 *out)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be64 *dst = (__be64 *)out;
__be64 bits[2];
static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
/* save number of bits */
bits[1] = cpu_to_be64(sctx->count[0] << 3);
bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
/* Pad out to 112 mod 128 and append length */
index = sctx->count[0] & 0x7f;
padlen = (index < 112) ? (112 - index) : ((128+112) - index);
if (!may_use_simd()) {
crypto_sha512_update(desc, padding, padlen);
crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_neon_begin();
/* We need to fill a whole block for __sha512_neon_update() */
if (padlen <= 112) {
sctx->count[0] += padlen;
if (sctx->count[0] < padlen)
sctx->count[1]++;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha512_neon_update(desc, padding, padlen, index);
}
__sha512_neon_update(desc, (const u8 *)&bits,
sizeof(bits), 112);
kernel_neon_end();
}
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be64(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha512_neon_export(struct shash_desc *desc, void *out)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha512_neon_import(struct shash_desc *desc, const void *in)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static int sha384_neon_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA384_H0;
sctx->state[1] = SHA384_H1;
sctx->state[2] = SHA384_H2;
sctx->state[3] = SHA384_H3;
sctx->state[4] = SHA384_H4;
sctx->state[5] = SHA384_H5;
sctx->state[6] = SHA384_H6;
sctx->state[7] = SHA384_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA512_DIGEST_SIZE];
sha512_neon_final(desc, D);
memcpy(hash, D, SHA384_DIGEST_SIZE);
memzero_explicit(D, SHA512_DIGEST_SIZE);
return 0;
}
static struct shash_alg algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_neon_init,
.update = sha512_neon_update,
.final = sha512_neon_final,
.export = sha512_neon_export,
.import = sha512_neon_import,
.descsize = sizeof(struct sha512_state),
.statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_neon_init,
.update = sha512_neon_update,
.final = sha384_neon_final,
.export = sha512_neon_export,
.import = sha512_neon_import,
.descsize = sizeof(struct sha512_state),
.statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha512_neon_mod_init(void)
{
if (!cpu_has_neon())
return -ENODEV;
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
static void __exit sha512_neon_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha512_neon_mod_init);
module_exit(sha512_neon_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
MODULE_ALIAS_CRYPTO("sha512");
MODULE_ALIAS_CRYPTO("sha384");
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment