Commit 27d189c0 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (46 commits)
  hwrng: via_rng - Fix memory scribbling on some CPUs
  crypto: padlock - Move padlock.h into include/crypto
  hwrng: via_rng - Fix asm constraints
  crypto: n2 - use __devexit not __exit in n2_unregister_algs
  crypto: mark crypto workqueues CPU_INTENSIVE
  crypto: mv_cesa - dont return PTR_ERR() of wrong pointer
  crypto: ripemd - Set module author and update email address
  crypto: omap-sham - backlog handling fix
  crypto: gf128mul - Remove experimental tag
  crypto: af_alg - fix af_alg memory_allocated data type
  crypto: aesni-intel - Fixed build with binutils 2.16
  crypto: af_alg - Make sure sk_security is initialized on accept()ed sockets
  net: Add missing lockdep class names for af_alg
  include: Install linux/if_alg.h for user-space crypto API
  crypto: omap-aes - checkpatch --file warning fixes
  crypto: omap-aes - initialize aes module once per request
  crypto: omap-aes - unnecessary code removed
  crypto: omap-aes - error handling implementation improved
  crypto: omap-aes - redundant locking is removed
  crypto: omap-aes - DMA initialization fixes for OMAP off mode
  ...
parents a1703154 55db8387
......@@ -9,6 +9,20 @@
* Vinodh Gopal <vinodh.gopal@intel.com>
* Kahraman Akdemir
*
* Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
* interface for 64-bit kernels.
* Authors: Erdinc Ozturk (erdinc.ozturk@intel.com)
* Aidan O'Mahony (aidan.o.mahony@intel.com)
* Adrian Hoban <adrian.hoban@intel.com>
* James Guilford (james.guilford@intel.com)
* Gabriele Paoloni <gabriele.paoloni@intel.com>
* Tadeusz Struk (tadeusz.struk@intel.com)
* Wajdi Feghali (wajdi.k.feghali@intel.com)
* Copyright (c) 2010, Intel Corporation.
*
* Ported x86_64 version to x86:
* Author: Mathias Krause <minipli@googlemail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
......@@ -18,8 +32,62 @@
#include <linux/linkage.h>
#include <asm/inst.h>
#ifdef __x86_64__
.data
POLY: .octa 0xC2000000000000000000000000000001
TWOONE: .octa 0x00000001000000000000000000000001
# order of these constants should not change.
# more specifically, ALL_F should follow SHIFT_MASK,
# and ZERO should follow ALL_F
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
MASK1: .octa 0x0000000000000000ffffffffffffffff
MASK2: .octa 0xffffffffffffffff0000000000000000
SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
ZERO: .octa 0x00000000000000000000000000000000
ONE: .octa 0x00000000000000000000000000000001
F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
dec: .octa 0x1
enc: .octa 0x2
.text
#define STACK_OFFSET 8*3
#define HashKey 16*0 // store HashKey <<1 mod poly here
#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here
#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here
#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here
#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64
// bits of HashKey <<1 mod poly here
//(for Karatsuba purposes)
#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64
// bits of HashKey^2 <<1 mod poly here
// (for Karatsuba purposes)
#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64
// bits of HashKey^3 <<1 mod poly here
// (for Karatsuba purposes)
#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64
// bits of HashKey^4 <<1 mod poly here
// (for Karatsuba purposes)
#define VARIABLE_OFFSET 16*8
#define arg1 rdi
#define arg2 rsi
#define arg3 rdx
#define arg4 rcx
#define arg5 r8
#define arg6 r9
#define arg7 STACK_OFFSET+8(%r14)
#define arg8 STACK_OFFSET+16(%r14)
#define arg9 STACK_OFFSET+24(%r14)
#define arg10 STACK_OFFSET+32(%r14)
#endif
#define STATE1 %xmm0
#define STATE2 %xmm4
#define STATE3 %xmm5
......@@ -32,12 +100,16 @@
#define IN IN1
#define KEY %xmm2
#define IV %xmm3
#define BSWAP_MASK %xmm10
#define CTR %xmm11
#define INC %xmm12
#ifdef __x86_64__
#define AREG %rax
#define KEYP %rdi
#define OUTP %rsi
#define UKEYP OUTP
#define INP %rdx
#define LEN %rcx
#define IVP %r8
......@@ -46,6 +118,1588 @@
#define TKEYP T1
#define T2 %r11
#define TCTR_LOW T2
#else
#define AREG %eax
#define KEYP %edi
#define OUTP AREG
#define UKEYP OUTP
#define INP %edx
#define LEN %esi
#define IVP %ebp
#define KLEN %ebx
#define T1 %ecx
#define TKEYP T1
#endif
#ifdef __x86_64__
/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
*
*
* Input: A and B (128-bits each, bit-reflected)
* Output: C = A*B*x mod poly, (i.e. >>1 )
* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
*
*/
.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
movdqa \GH, \TMP1
pshufd $78, \GH, \TMP2
pshufd $78, \HK, \TMP3
pxor \GH, \TMP2 # TMP2 = a1+a0
pxor \HK, \TMP3 # TMP3 = b1+b0
PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0
PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
pxor \GH, \TMP2
pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0)
movdqa \TMP2, \TMP3
pslldq $8, \TMP3 # left shift TMP3 2 DWs
psrldq $8, \TMP2 # right shift TMP2 2 DWs
pxor \TMP3, \GH
pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK
# first phase of the reduction
movdqa \GH, \TMP2
movdqa \GH, \TMP3
movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4
# in in order to perform
# independent shifts
pslld $31, \TMP2 # packed right shift <<31
pslld $30, \TMP3 # packed right shift <<30
pslld $25, \TMP4 # packed right shift <<25
pxor \TMP3, \TMP2 # xor the shifted versions
pxor \TMP4, \TMP2
movdqa \TMP2, \TMP5
psrldq $4, \TMP5 # right shift TMP5 1 DW
pslldq $12, \TMP2 # left shift TMP2 3 DWs
pxor \TMP2, \GH
# second phase of the reduction
movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4
# in in order to perform
# independent shifts
movdqa \GH,\TMP3
movdqa \GH,\TMP4
psrld $1,\TMP2 # packed left shift >>1
psrld $2,\TMP3 # packed left shift >>2
psrld $7,\TMP4 # packed left shift >>7
pxor \TMP3,\TMP2 # xor the shifted versions
pxor \TMP4,\TMP2
pxor \TMP5, \TMP2
pxor \TMP2, \GH
pxor \TMP1, \GH # result is in TMP1
.endm
/*
* if a = number of total plaintext bytes
* b = floor(a/16)
* num_initial_blocks = b mod 4
* encrypt the initial num_initial_blocks blocks and apply ghash on
* the ciphertext
* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
* are clobbered
* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
*/
.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
mov arg7, %r10 # %r10 = AAD
mov arg8, %r12 # %r12 = aadLen
mov %r12, %r11
pxor %xmm\i, %xmm\i
_get_AAD_loop\num_initial_blocks\operation:
movd (%r10), \TMP1
pslldq $12, \TMP1
psrldq $4, %xmm\i
pxor \TMP1, %xmm\i
add $4, %r10
sub $4, %r12
jne _get_AAD_loop\num_initial_blocks\operation
cmp $16, %r11
je _get_AAD_loop2_done\num_initial_blocks\operation
mov $16, %r12
_get_AAD_loop2\num_initial_blocks\operation:
psrldq $4, %xmm\i
sub $4, %r12
cmp %r11, %r12
jne _get_AAD_loop2\num_initial_blocks\operation
_get_AAD_loop2_done\num_initial_blocks\operation:
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
xor %r11, %r11 # initialise the data pointer offset as zero
# start AES for num_initial_blocks blocks
mov %arg5, %rax # %rax = *Y0
movdqu (%rax), \XMM0 # XMM0 = Y0
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM0
.if (\i == 5) || (\i == 6) || (\i == 7)
.irpc index, \i_seq
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, %xmm\index
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
.endr
.irpc index, \i_seq
pxor 16*0(%arg1), %xmm\index
.endr
.irpc index, \i_seq
movaps 0x10(%rdi), \TMP1
AESENC \TMP1, %xmm\index # Round 1
.endr
.irpc index, \i_seq
movaps 0x20(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x30(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x40(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x50(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x60(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x70(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x80(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x90(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0xa0(%arg1), \TMP1
AESENCLAST \TMP1, %xmm\index # Round 10
.endr
.irpc index, \i_seq
movdqu (%arg3 , %r11, 1), \TMP1
pxor \TMP1, %xmm\index
movdqu %xmm\index, (%arg2 , %r11, 1)
# write back plaintext/ciphertext for num_initial_blocks
add $16, %r11
movdqa \TMP1, %xmm\index
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\index
# prepare plaintext/ciphertext for GHASH computation
.endr
.endif
GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
# apply GHASH on num_initial_blocks blocks
.if \i == 5
pxor %xmm5, %xmm6
GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
pxor %xmm6, %xmm7
GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
pxor %xmm7, %xmm8
GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
.elseif \i == 6
pxor %xmm6, %xmm7
GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
pxor %xmm7, %xmm8
GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
.elseif \i == 7
pxor %xmm7, %xmm8
GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
.endif
cmp $64, %r13
jl _initial_blocks_done\num_initial_blocks\operation
# no need for precomputed values
/*
*
* Precomputations for HashKey parallel with encryption of first 4 blocks.
* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
*/
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, \XMM1
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, \XMM2
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, \XMM3
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, \XMM4
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
pxor 16*0(%arg1), \XMM1
pxor 16*0(%arg1), \XMM2
pxor 16*0(%arg1), \XMM3
pxor 16*0(%arg1), \XMM4
movdqa \TMP3, \TMP5
pshufd $78, \TMP3, \TMP1
pxor \TMP3, \TMP1
movdqa \TMP1, HashKey_k(%rsp)
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^2<<1 (mod poly)
movdqa \TMP5, HashKey_2(%rsp)
# HashKey_2 = HashKey^2<<1 (mod poly)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
movdqa \TMP1, HashKey_2_k(%rsp)
.irpc index, 1234 # do 4 rounds
movaps 0x10*\index(%arg1), \TMP1
AESENC \TMP1, \XMM1
AESENC \TMP1, \XMM2
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
.endr
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
movdqa \TMP5, HashKey_3(%rsp)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
movdqa \TMP1, HashKey_3_k(%rsp)
.irpc index, 56789 # do next 5 rounds
movaps 0x10*\index(%arg1), \TMP1
AESENC \TMP1, \XMM1
AESENC \TMP1, \XMM2
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
.endr
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
movdqa \TMP5, HashKey_4(%rsp)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
movdqa \TMP1, HashKey_4_k(%rsp)
movaps 0xa0(%arg1), \TMP2
AESENCLAST \TMP2, \XMM1
AESENCLAST \TMP2, \XMM2
AESENCLAST \TMP2, \XMM3
AESENCLAST \TMP2, \XMM4
movdqu 16*0(%arg3 , %r11 , 1), \TMP1
pxor \TMP1, \XMM1
movdqu \XMM1, 16*0(%arg2 , %r11 , 1)
movdqa \TMP1, \XMM1
movdqu 16*1(%arg3 , %r11 , 1), \TMP1
pxor \TMP1, \XMM2
movdqu \XMM2, 16*1(%arg2 , %r11 , 1)
movdqa \TMP1, \XMM2
movdqu 16*2(%arg3 , %r11 , 1), \TMP1
pxor \TMP1, \XMM3
movdqu \XMM3, 16*2(%arg2 , %r11 , 1)
movdqa \TMP1, \XMM3
movdqu 16*3(%arg3 , %r11 , 1), \TMP1
pxor \TMP1, \XMM4
movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
movdqa \TMP1, \XMM4
add $64, %r11
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
pxor \XMMDst, \XMM1
# combine GHASHed value with the corresponding ciphertext
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
_initial_blocks_done\num_initial_blocks\operation:
.endm
/*
* if a = number of total plaintext bytes
* b = floor(a/16)
* num_initial_blocks = b mod 4
* encrypt the initial num_initial_blocks blocks and apply ghash on
* the ciphertext
* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
* are clobbered
* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
*/
.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
mov arg7, %r10 # %r10 = AAD
mov arg8, %r12 # %r12 = aadLen
mov %r12, %r11
pxor %xmm\i, %xmm\i
_get_AAD_loop\num_initial_blocks\operation:
movd (%r10), \TMP1
pslldq $12, \TMP1
psrldq $4, %xmm\i
pxor \TMP1, %xmm\i
add $4, %r10
sub $4, %r12
jne _get_AAD_loop\num_initial_blocks\operation
cmp $16, %r11
je _get_AAD_loop2_done\num_initial_blocks\operation
mov $16, %r12
_get_AAD_loop2\num_initial_blocks\operation:
psrldq $4, %xmm\i
sub $4, %r12
cmp %r11, %r12
jne _get_AAD_loop2\num_initial_blocks\operation
_get_AAD_loop2_done\num_initial_blocks\operation:
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
xor %r11, %r11 # initialise the data pointer offset as zero
# start AES for num_initial_blocks blocks
mov %arg5, %rax # %rax = *Y0
movdqu (%rax), \XMM0 # XMM0 = Y0
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM0
.if (\i == 5) || (\i == 6) || (\i == 7)
.irpc index, \i_seq
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, %xmm\index
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
.endr
.irpc index, \i_seq
pxor 16*0(%arg1), %xmm\index
.endr
.irpc index, \i_seq
movaps 0x10(%rdi), \TMP1
AESENC \TMP1, %xmm\index # Round 1
.endr
.irpc index, \i_seq
movaps 0x20(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x30(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x40(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x50(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x60(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x70(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x80(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0x90(%arg1), \TMP1
AESENC \TMP1, %xmm\index # Round 2
.endr
.irpc index, \i_seq
movaps 0xa0(%arg1), \TMP1
AESENCLAST \TMP1, %xmm\index # Round 10
.endr
.irpc index, \i_seq
movdqu (%arg3 , %r11, 1), \TMP1
pxor \TMP1, %xmm\index
movdqu %xmm\index, (%arg2 , %r11, 1)
# write back plaintext/ciphertext for num_initial_blocks
add $16, %r11
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\index
# prepare plaintext/ciphertext for GHASH computation
.endr
.endif
GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
# apply GHASH on num_initial_blocks blocks
.if \i == 5
pxor %xmm5, %xmm6
GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
pxor %xmm6, %xmm7
GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
pxor %xmm7, %xmm8
GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
.elseif \i == 6
pxor %xmm6, %xmm7
GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
pxor %xmm7, %xmm8
GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
.elseif \i == 7
pxor %xmm7, %xmm8
GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
.endif
cmp $64, %r13
jl _initial_blocks_done\num_initial_blocks\operation
# no need for precomputed values
/*
*
* Precomputations for HashKey parallel with encryption of first 4 blocks.
* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
*/
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, \XMM1
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, \XMM2
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, \XMM3
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
paddd ONE(%rip), \XMM0 # INCR Y0
movdqa \XMM0, \XMM4
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
pxor 16*0(%arg1), \XMM1
pxor 16*0(%arg1), \XMM2
pxor 16*0(%arg1), \XMM3
pxor 16*0(%arg1), \XMM4
movdqa \TMP3, \TMP5
pshufd $78, \TMP3, \TMP1
pxor \TMP3, \TMP1
movdqa \TMP1, HashKey_k(%rsp)
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^2<<1 (mod poly)
movdqa \TMP5, HashKey_2(%rsp)
# HashKey_2 = HashKey^2<<1 (mod poly)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
movdqa \TMP1, HashKey_2_k(%rsp)
.irpc index, 1234 # do 4 rounds
movaps 0x10*\index(%arg1), \TMP1
AESENC \TMP1, \XMM1
AESENC \TMP1, \XMM2
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
.endr
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
movdqa \TMP5, HashKey_3(%rsp)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
movdqa \TMP1, HashKey_3_k(%rsp)
.irpc index, 56789 # do next 5 rounds
movaps 0x10*\index(%arg1), \TMP1
AESENC \TMP1, \XMM1
AESENC \TMP1, \XMM2
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
.endr
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
# TMP5 = HashKey^3<<1 (mod poly)
movdqa \TMP5, HashKey_4(%rsp)
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
movdqa \TMP1, HashKey_4_k(%rsp)
movaps 0xa0(%arg1), \TMP2
AESENCLAST \TMP2, \XMM1
AESENCLAST \TMP2, \XMM2
AESENCLAST \TMP2, \XMM3
AESENCLAST \TMP2, \XMM4
movdqu 16*0(%arg3 , %r11 , 1), \TMP1
pxor \TMP1, \XMM1
movdqu 16*1(%arg3 , %r11 , 1), \TMP1
pxor \TMP1, \XMM2
movdqu 16*2(%arg3 , %r11 , 1), \TMP1
pxor \TMP1, \XMM3
movdqu 16*3(%arg3 , %r11 , 1), \TMP1
pxor \TMP1, \XMM4
movdqu \XMM1, 16*0(%arg2 , %r11 , 1)
movdqu \XMM2, 16*1(%arg2 , %r11 , 1)
movdqu \XMM3, 16*2(%arg2 , %r11 , 1)
movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
add $64, %r11
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
pxor \XMMDst, \XMM1
# combine GHASHed value with the corresponding ciphertext
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
_initial_blocks_done\num_initial_blocks\operation:
.endm
/*
* encrypt 4 blocks at a time
* ghash the 4 previously encrypted ciphertext blocks
* arg1, %arg2, %arg3 are used as pointers only, not modified
* %r11 is the data offset value
*/
.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM1, \XMM5
movdqa \XMM2, \XMM6
movdqa \XMM3, \XMM7
movdqa \XMM4, \XMM8
movdqa SHUF_MASK(%rip), %xmm15
# multiply TMP5 * HashKey using karatsuba
movdqa \XMM5, \TMP4
pshufd $78, \XMM5, \TMP6
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa HashKey_4(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM2
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM3
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM4
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
pxor (%arg1), \XMM1
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
movdqa HashKey_4_k(%rsp), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1
AESENC \TMP1, \XMM2
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
movaps 0x20(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 2
AESENC \TMP1, \XMM2
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
movdqa HashKey_3(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0
movaps 0x40(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 4
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
movdqa HashKey_3_k(%rsp), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM6, \XMM5
pxor \TMP2, \TMP6
movdqa \XMM7, \TMP1
pshufd $78, \XMM7, \TMP2
pxor \XMM7, \TMP2
movdqa HashKey_2(%rsp ), \TMP5
# Multiply TMP5 * HashKey using karatsuba
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x60(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 6
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0
movaps 0x70(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 7
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
movdqa HashKey_2_k(%rsp), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM7, \XMM5
pxor \TMP2, \TMP6
# Multiply XMM8 * HashKey
# XMM8 and TMP5 hold the values for the two operands
movdqa \XMM8, \TMP1
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
movdqa HashKey(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
movaps 0xa0(%arg1), \TMP3
AESENCLAST \TMP3, \XMM1 # Round 10
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
movdqa HashKey_k(%rsp), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg3,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
movdqu 16(%arg3,%r11,1), \TMP3
pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
movdqu 32(%arg3,%r11,1), \TMP3
pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
movdqu 48(%arg3,%r11,1), \TMP3
pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer
movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer
movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer
movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
pxor \TMP4, \TMP1
pxor \XMM8, \XMM5
pxor \TMP6, \TMP2
pxor \TMP1, \TMP2
pxor \XMM5, \TMP2
movdqa \TMP2, \TMP3
pslldq $8, \TMP3 # left shift TMP3 2 DWs
psrldq $8, \TMP2 # right shift TMP2 2 DWs
pxor \TMP3, \XMM5
pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5
# first phase of reduction
movdqa \XMM5, \TMP2
movdqa \XMM5, \TMP3
movdqa \XMM5, \TMP4
# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
pslld $31, \TMP2 # packed right shift << 31
pslld $30, \TMP3 # packed right shift << 30
pslld $25, \TMP4 # packed right shift << 25
pxor \TMP3, \TMP2 # xor the shifted versions
pxor \TMP4, \TMP2
movdqa \TMP2, \TMP5
psrldq $4, \TMP5 # right shift T5 1 DW
pslldq $12, \TMP2 # left shift T2 3 DWs
pxor \TMP2, \XMM5
# second phase of reduction
movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
movdqa \XMM5,\TMP3
movdqa \XMM5,\TMP4
psrld $1, \TMP2 # packed left shift >>1
psrld $2, \TMP3 # packed left shift >>2
psrld $7, \TMP4 # packed left shift >>7
pxor \TMP3,\TMP2 # xor the shifted versions
pxor \TMP4,\TMP2
pxor \TMP5, \TMP2
pxor \TMP2, \XMM5
pxor \TMP1, \XMM5 # result is in TMP1
pxor \XMM5, \XMM1
.endm
/*
* decrypt 4 blocks at a time
* ghash the 4 previously decrypted ciphertext blocks
* arg1, %arg2, %arg3 are used as pointers only, not modified
* %r11 is the data offset value
*/
.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM1, \XMM5
movdqa \XMM2, \XMM6
movdqa \XMM3, \XMM7
movdqa \XMM4, \XMM8
movdqa SHUF_MASK(%rip), %xmm15
# multiply TMP5 * HashKey using karatsuba
movdqa \XMM5, \TMP4
pshufd $78, \XMM5, \TMP6
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa HashKey_4(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM2
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM3
paddd ONE(%rip), \XMM0 # INCR CNT
movdqa \XMM0, \XMM4
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
pxor (%arg1), \XMM1
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
movdqa HashKey_4_k(%rsp), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1
AESENC \TMP1, \XMM2
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
movaps 0x20(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 2
AESENC \TMP1, \XMM2
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
movdqa HashKey_3(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0
movaps 0x40(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 4
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
movdqa HashKey_3_k(%rsp), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM6, \XMM5
pxor \TMP2, \TMP6
movdqa \XMM7, \TMP1
pshufd $78, \XMM7, \TMP2
pxor \XMM7, \TMP2
movdqa HashKey_2(%rsp ), \TMP5
# Multiply TMP5 * HashKey using karatsuba
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x60(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 6
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0
movaps 0x70(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 7
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
movdqa HashKey_2_k(%rsp), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
pxor \TMP1, \TMP4
# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
pxor \XMM7, \XMM5
pxor \TMP2, \TMP6
# Multiply XMM8 * HashKey
# XMM8 and TMP5 hold the values for the two operands
movdqa \XMM8, \TMP1
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
movdqa HashKey(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
movaps 0xa0(%arg1), \TMP3
AESENCLAST \TMP3, \XMM1 # Round 10
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
movdqa HashKey_k(%rsp), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqu (%arg3,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM1
movdqu 16(%arg3,%r11,1), \TMP3
pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM2
movdqu 32(%arg3,%r11,1), \TMP3
pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM3
movdqu 48(%arg3,%r11,1), \TMP3
pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM4
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap
pxor \TMP4, \TMP1
pxor \XMM8, \XMM5
pxor \TMP6, \TMP2
pxor \TMP1, \TMP2
pxor \XMM5, \TMP2
movdqa \TMP2, \TMP3
pslldq $8, \TMP3 # left shift TMP3 2 DWs
psrldq $8, \TMP2 # right shift TMP2 2 DWs
pxor \TMP3, \XMM5
pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5
# first phase of reduction
movdqa \XMM5, \TMP2
movdqa \XMM5, \TMP3
movdqa \XMM5, \TMP4
# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
pslld $31, \TMP2 # packed right shift << 31
pslld $30, \TMP3 # packed right shift << 30
pslld $25, \TMP4 # packed right shift << 25
pxor \TMP3, \TMP2 # xor the shifted versions
pxor \TMP4, \TMP2
movdqa \TMP2, \TMP5
psrldq $4, \TMP5 # right shift T5 1 DW
pslldq $12, \TMP2 # left shift T2 3 DWs
pxor \TMP2, \XMM5
# second phase of reduction
movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
movdqa \XMM5,\TMP3
movdqa \XMM5,\TMP4
psrld $1, \TMP2 # packed left shift >>1
psrld $2, \TMP3 # packed left shift >>2
psrld $7, \TMP4 # packed left shift >>7
pxor \TMP3,\TMP2 # xor the shifted versions
pxor \TMP4,\TMP2
pxor \TMP5, \TMP2
pxor \TMP2, \XMM5
pxor \TMP1, \XMM5 # result is in TMP1
pxor \XMM5, \XMM1
.endm
/* GHASH the last 4 ciphertext blocks. */
.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
# Multiply TMP6 * HashKey (using Karatsuba)
movdqa \XMM1, \TMP6
pshufd $78, \XMM1, \TMP2
pxor \XMM1, \TMP2
movdqa HashKey_4(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
movdqa HashKey_4_k(%rsp), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqa \XMM1, \XMMDst
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
# Multiply TMP1 * HashKey (using Karatsuba)
movdqa \XMM2, \TMP1
pshufd $78, \XMM2, \TMP2
pxor \XMM2, \TMP2
movdqa HashKey_3(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
movdqa HashKey_3_k(%rsp), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM2, \XMMDst
pxor \TMP2, \XMM1
# results accumulated in TMP6, XMMDst, XMM1
# Multiply TMP1 * HashKey (using Karatsuba)
movdqa \XMM3, \TMP1
pshufd $78, \XMM3, \TMP2
pxor \XMM3, \TMP2
movdqa HashKey_2(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
movdqa HashKey_2_k(%rsp), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM3, \XMMDst
pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1
# Multiply TMP1 * HashKey (using Karatsuba)
movdqa \XMM4, \TMP1
pshufd $78, \XMM4, \TMP2
pxor \XMM4, \TMP2
movdqa HashKey(%rsp), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
movdqa HashKey_k(%rsp), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM4, \XMMDst
pxor \XMM1, \TMP2
pxor \TMP6, \TMP2
pxor \XMMDst, \TMP2
# middle section of the temp results combined as in karatsuba algorithm
movdqa \TMP2, \TMP4
pslldq $8, \TMP4 # left shift TMP4 2 DWs
psrldq $8, \TMP2 # right shift TMP2 2 DWs
pxor \TMP4, \XMMDst
pxor \TMP2, \TMP6
# TMP6:XMMDst holds the result of the accumulated carry-less multiplications
# first phase of the reduction
movdqa \XMMDst, \TMP2
movdqa \XMMDst, \TMP3
movdqa \XMMDst, \TMP4
# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently
pslld $31, \TMP2 # packed right shifting << 31
pslld $30, \TMP3 # packed right shifting << 30
pslld $25, \TMP4 # packed right shifting << 25
pxor \TMP3, \TMP2 # xor the shifted versions
pxor \TMP4, \TMP2
movdqa \TMP2, \TMP7
psrldq $4, \TMP7 # right shift TMP7 1 DW
pslldq $12, \TMP2 # left shift TMP2 3 DWs
pxor \TMP2, \XMMDst
# second phase of the reduction
movdqa \XMMDst, \TMP2
# make 3 copies of XMMDst for doing 3 shift operations
movdqa \XMMDst, \TMP3
movdqa \XMMDst, \TMP4
psrld $1, \TMP2 # packed left shift >> 1
psrld $2, \TMP3 # packed left shift >> 2
psrld $7, \TMP4 # packed left shift >> 7
pxor \TMP3, \TMP2 # xor the shifted versions
pxor \TMP4, \TMP2
pxor \TMP7, \TMP2
pxor \TMP2, \XMMDst
pxor \TMP6, \XMMDst # reduced result is in XMMDst
.endm
/* Encryption of a single block done*/
.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
pxor (%arg1), \XMM0
movaps 16(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 32(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 48(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 64(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 80(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 96(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 112(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 128(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 144(%arg1), \TMP1
AESENC \TMP1, \XMM0
movaps 160(%arg1), \TMP1
AESENCLAST \TMP1, \XMM0
.endm
/*****************************************************************************
* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
* u8 *out, // Plaintext output. Encrypt in-place is allowed.
* const u8 *in, // Ciphertext input
* u64 plaintext_len, // Length of data in bytes for decryption.
* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association)
* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
* // concatenated with 0x00000001. 16-byte aligned pointer.
* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary.
* const u8 *aad, // Additional Authentication Data (AAD)
* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the
* // given authentication tag and only return the plaintext if they match.
* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16
* // (most likely), 12 or 8.
*
* Assumptions:
*
* keys:
* keys are pre-expanded and aligned to 16 bytes. we are using the first
* set of 11 keys in the data structure void *aes_ctx
*
* iv:
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Salt (From the SA) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Initialization Vector |
* | (This is the sequence number from IPSec header) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 0x1 |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
*
*
* AAD:
* AAD padded to 128 bits with 0
* for example, assume AAD is a u32 vector
*
* if AAD is 8 bytes:
* AAD[3] = {A0, A1};
* padded AAD in xmm register = {A1 A0 0 0}
*
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | SPI (A1) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 32-bit Sequence Number (A0) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 0x0 |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* AAD Format with 32-bit Sequence Number
*
* if AAD is 12 bytes:
* AAD[3] = {A0, A1, A2};
* padded AAD in xmm register = {A2 A1 A0 0}
*
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | SPI (A2) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 64-bit Extended Sequence Number {A1,A0} |
* | |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 0x0 |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* AAD Format with 64-bit Extended Sequence Number
*
* aadLen:
* from the definition of the spec, aadLen can only be 8 or 12 bytes.
* The code supports 16 too but for other sizes, the code will fail.
*
* TLen:
* from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
* For other sizes, the code will fail.
*
* poly = x^128 + x^127 + x^126 + x^121 + 1
*
*****************************************************************************/
ENTRY(aesni_gcm_dec)
push %r12
push %r13
push %r14
mov %rsp, %r14
/*
* states of %xmm registers %xmm6:%xmm15 not saved
* all %xmm registers are clobbered
*/
sub $VARIABLE_OFFSET, %rsp
and $~63, %rsp # align rsp to 64 bytes
mov %arg6, %r12
movdqu (%r12), %xmm13 # %xmm13 = HashKey
movdqa SHUF_MASK(%rip), %xmm2
PSHUFB_XMM %xmm2, %xmm13
# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH)
movdqa %xmm13, %xmm2
psllq $1, %xmm13
psrlq $63, %xmm2
movdqa %xmm2, %xmm1
pslldq $8, %xmm2
psrldq $8, %xmm1
por %xmm2, %xmm13
# Reduction
pshufd $0x24, %xmm1, %xmm2
pcmpeqd TWOONE(%rip), %xmm2
pand POLY(%rip), %xmm2
pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly)
# Decrypt first few blocks
movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly)
mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext
and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
mov %r13, %r12
and $(3<<4), %r12
jz _initial_num_blocks_is_0_decrypt
cmp $(2<<4), %r12
jb _initial_num_blocks_is_1_decrypt
je _initial_num_blocks_is_2_decrypt
_initial_num_blocks_is_3_decrypt:
INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec
sub $48, %r13
jmp _initial_blocks_decrypted
_initial_num_blocks_is_2_decrypt:
INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec
sub $32, %r13
jmp _initial_blocks_decrypted
_initial_num_blocks_is_1_decrypt:
INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec
sub $16, %r13
jmp _initial_blocks_decrypted
_initial_num_blocks_is_0_decrypt:
INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec
_initial_blocks_decrypted:
cmp $0, %r13
je _zero_cipher_left_decrypt
sub $64, %r13
je _four_cipher_left_decrypt
_decrypt_by_4:
GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec
add $64, %r11
sub $64, %r13
jne _decrypt_by_4
_four_cipher_left_decrypt:
GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
_zero_cipher_left_decrypt:
mov %arg4, %r13
and $15, %r13 # %r13 = arg4 (mod 16)
je _multiple_of_16_bytes_decrypt
# Handle the last <16 byte block seperately
paddd ONE(%rip), %xmm0 # increment CNT to get Yn
movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10, %xmm0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn)
sub $16, %r11
add %r13, %r11
movdqu (%arg3,%r11,1), %xmm1 # recieve the last <16 byte block
lea SHIFT_MASK+16(%rip), %r12
sub %r13, %r12
# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
# (%r13 is the number of bytes in plaintext mod 16)
movdqu (%r12), %xmm2 # get the appropriate shuffle mask
PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes
movdqa %xmm1, %xmm2
pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn)
movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0
pand %xmm1, %xmm2
movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10 ,%xmm2
pxor %xmm2, %xmm8
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
# GHASH computation for the last <16 byte block
sub %r13, %r11
add $16, %r11
# output %r13 bytes
MOVQ_R64_XMM %xmm0, %rax
cmp $8, %r13
jle _less_than_8_bytes_left_decrypt
mov %rax, (%arg2 , %r11, 1)
add $8, %r11
psrldq $8, %xmm0
MOVQ_R64_XMM %xmm0, %rax
sub $8, %r13
_less_than_8_bytes_left_decrypt:
mov %al, (%arg2, %r11, 1)
add $1, %r11
shr $8, %rax
sub $1, %r13
jne _less_than_8_bytes_left_decrypt
_multiple_of_16_bytes_decrypt:
mov arg8, %r12 # %r13 = aadLen (number of bytes)
shl $3, %r12 # convert into number of bits
movd %r12d, %xmm15 # len(A) in %xmm15
shl $3, %arg4 # len(C) in bits (*128)
MOVQ_R64_XMM %arg4, %xmm1
pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
pxor %xmm15, %xmm8
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
# final GHASH computation
movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10, %xmm8
mov %arg5, %rax # %rax = *Y0
movdqu (%rax), %xmm0 # %xmm0 = Y0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
pxor %xmm8, %xmm0
_return_T_decrypt:
mov arg9, %r10 # %r10 = authTag
mov arg10, %r11 # %r11 = auth_tag_len
cmp $16, %r11
je _T_16_decrypt
cmp $12, %r11
je _T_12_decrypt
_T_8_decrypt:
MOVQ_R64_XMM %xmm0, %rax
mov %rax, (%r10)
jmp _return_T_done_decrypt
_T_12_decrypt:
MOVQ_R64_XMM %xmm0, %rax
mov %rax, (%r10)
psrldq $8, %xmm0
movd %xmm0, %eax
mov %eax, 8(%r10)
jmp _return_T_done_decrypt
_T_16_decrypt:
movdqu %xmm0, (%r10)
_return_T_done_decrypt:
mov %r14, %rsp
pop %r14
pop %r13
pop %r12
ret
/*****************************************************************************
* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
* u8 *out, // Ciphertext output. Encrypt in-place is allowed.
* const u8 *in, // Plaintext input
* u64 plaintext_len, // Length of data in bytes for encryption.
* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association)
* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
* // concatenated with 0x00000001. 16-byte aligned pointer.
* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary.
* const u8 *aad, // Additional Authentication Data (AAD)
* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
* u8 *auth_tag, // Authenticated Tag output.
* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
* // 12 or 8.
*
* Assumptions:
*
* keys:
* keys are pre-expanded and aligned to 16 bytes. we are using the
* first set of 11 keys in the data structure void *aes_ctx
*
*
* iv:
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Salt (From the SA) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | Initialization Vector |
* | (This is the sequence number from IPSec header) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 0x1 |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
*
*
* AAD:
* AAD padded to 128 bits with 0
* for example, assume AAD is a u32 vector
*
* if AAD is 8 bytes:
* AAD[3] = {A0, A1};
* padded AAD in xmm register = {A1 A0 0 0}
*
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | SPI (A1) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 32-bit Sequence Number (A0) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 0x0 |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* AAD Format with 32-bit Sequence Number
*
* if AAD is 12 bytes:
* AAD[3] = {A0, A1, A2};
* padded AAD in xmm register = {A2 A1 A0 0}
*
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | SPI (A2) |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 64-bit Extended Sequence Number {A1,A0} |
* | |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | 0x0 |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* AAD Format with 64-bit Extended Sequence Number
*
* aadLen:
* from the definition of the spec, aadLen can only be 8 or 12 bytes.
* The code supports 16 too but for other sizes, the code will fail.
*
* TLen:
* from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
* For other sizes, the code will fail.
*
* poly = x^128 + x^127 + x^126 + x^121 + 1
***************************************************************************/
ENTRY(aesni_gcm_enc)
push %r12
push %r13
push %r14
mov %rsp, %r14
#
# states of %xmm registers %xmm6:%xmm15 not saved
# all %xmm registers are clobbered
#
sub $VARIABLE_OFFSET, %rsp
and $~63, %rsp
mov %arg6, %r12
movdqu (%r12), %xmm13
movdqa SHUF_MASK(%rip), %xmm2
PSHUFB_XMM %xmm2, %xmm13
# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
movdqa %xmm13, %xmm2
psllq $1, %xmm13
psrlq $63, %xmm2
movdqa %xmm2, %xmm1
pslldq $8, %xmm2
psrldq $8, %xmm1
por %xmm2, %xmm13
# reduce HashKey<<1
pshufd $0x24, %xmm1, %xmm2
pcmpeqd TWOONE(%rip), %xmm2
pand POLY(%rip), %xmm2
pxor %xmm2, %xmm13
movdqa %xmm13, HashKey(%rsp)
mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly)
and $-16, %r13
mov %r13, %r12
# Encrypt first few blocks
and $(3<<4), %r12
jz _initial_num_blocks_is_0_encrypt
cmp $(2<<4), %r12
jb _initial_num_blocks_is_1_encrypt
je _initial_num_blocks_is_2_encrypt
_initial_num_blocks_is_3_encrypt:
INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc
sub $48, %r13
jmp _initial_blocks_encrypted
_initial_num_blocks_is_2_encrypt:
INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc
sub $32, %r13
jmp _initial_blocks_encrypted
_initial_num_blocks_is_1_encrypt:
INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc
sub $16, %r13
jmp _initial_blocks_encrypted
_initial_num_blocks_is_0_encrypt:
INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc
_initial_blocks_encrypted:
# Main loop - Encrypt remaining blocks
cmp $0, %r13
je _zero_cipher_left_encrypt
sub $64, %r13
je _four_cipher_left_encrypt
_encrypt_by_4_encrypt:
GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc
add $64, %r11
sub $64, %r13
jne _encrypt_by_4_encrypt
_four_cipher_left_encrypt:
GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
_zero_cipher_left_encrypt:
mov %arg4, %r13
and $15, %r13 # %r13 = arg4 (mod 16)
je _multiple_of_16_bytes_encrypt
# Handle the last <16 Byte block seperately
paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10, %xmm0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
sub $16, %r11
add %r13, %r11
movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks
lea SHIFT_MASK+16(%rip), %r12
sub %r13, %r12
# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
# (%r13 is the number of bytes in plaintext mod 16)
movdqu (%r12), %xmm2 # get the appropriate shuffle mask
PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte
pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn)
movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
# get the appropriate mask to mask out top 16-r13 bytes of xmm0
pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0
movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10,%xmm0
pxor %xmm0, %xmm8
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
# GHASH computation for the last <16 byte block
sub %r13, %r11
add $16, %r11
PSHUFB_XMM %xmm10, %xmm1
# shuffle xmm0 back to output as ciphertext
# Output %r13 bytes
MOVQ_R64_XMM %xmm0, %rax
cmp $8, %r13
jle _less_than_8_bytes_left_encrypt
mov %rax, (%arg2 , %r11, 1)
add $8, %r11
psrldq $8, %xmm0
MOVQ_R64_XMM %xmm0, %rax
sub $8, %r13
_less_than_8_bytes_left_encrypt:
mov %al, (%arg2, %r11, 1)
add $1, %r11
shr $8, %rax
sub $1, %r13
jne _less_than_8_bytes_left_encrypt
_multiple_of_16_bytes_encrypt:
mov arg8, %r12 # %r12 = addLen (number of bytes)
shl $3, %r12
movd %r12d, %xmm15 # len(A) in %xmm15
shl $3, %arg4 # len(C) in bits (*128)
MOVQ_R64_XMM %arg4, %xmm1
pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
pxor %xmm15, %xmm8
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
# final GHASH computation
movdqa SHUF_MASK(%rip), %xmm10
PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap
mov %arg5, %rax # %rax = *Y0
movdqu (%rax), %xmm0 # %xmm0 = Y0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0)
pxor %xmm8, %xmm0
_return_T_encrypt:
mov arg9, %r10 # %r10 = authTag
mov arg10, %r11 # %r11 = auth_tag_len
cmp $16, %r11
je _T_16_encrypt
cmp $12, %r11
je _T_12_encrypt
_T_8_encrypt:
MOVQ_R64_XMM %xmm0, %rax
mov %rax, (%r10)
jmp _return_T_done_encrypt
_T_12_encrypt:
MOVQ_R64_XMM %xmm0, %rax
mov %rax, (%r10)
psrldq $8, %xmm0
movd %xmm0, %eax
mov %eax, 8(%r10)
jmp _return_T_done_encrypt
_T_16_encrypt:
movdqu %xmm0, (%r10)
_return_T_done_encrypt:
mov %r14, %rsp
pop %r14
pop %r13
pop %r12
ret
#endif
_key_expansion_128:
_key_expansion_256a:
......@@ -55,10 +1709,11 @@ _key_expansion_256a:
shufps $0b10001100, %xmm0, %xmm4
pxor %xmm4, %xmm0
pxor %xmm1, %xmm0
movaps %xmm0, (%rcx)
add $0x10, %rcx
movaps %xmm0, (TKEYP)
add $0x10, TKEYP
ret
.align 4
_key_expansion_192a:
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
......@@ -76,12 +1731,13 @@ _key_expansion_192a:
movaps %xmm0, %xmm1
shufps $0b01000100, %xmm0, %xmm6
movaps %xmm6, (%rcx)
movaps %xmm6, (TKEYP)
shufps $0b01001110, %xmm2, %xmm1
movaps %xmm1, 16(%rcx)
add $0x20, %rcx
movaps %xmm1, 0x10(TKEYP)
add $0x20, TKEYP
ret
.align 4
_key_expansion_192b:
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
......@@ -96,10 +1752,11 @@ _key_expansion_192b:
pxor %xmm3, %xmm2
pxor %xmm5, %xmm2
movaps %xmm0, (%rcx)
add $0x10, %rcx
movaps %xmm0, (TKEYP)
add $0x10, TKEYP
ret
.align 4
_key_expansion_256b:
pshufd $0b10101010, %xmm1, %xmm1
shufps $0b00010000, %xmm2, %xmm4
......@@ -107,8 +1764,8 @@ _key_expansion_256b:
shufps $0b10001100, %xmm2, %xmm4
pxor %xmm4, %xmm2
pxor %xmm1, %xmm2
movaps %xmm2, (%rcx)
add $0x10, %rcx
movaps %xmm2, (TKEYP)
add $0x10, TKEYP
ret
/*
......@@ -116,17 +1773,23 @@ _key_expansion_256b:
* unsigned int key_len)
*/
ENTRY(aesni_set_key)
movups (%rsi), %xmm0 # user key (first 16 bytes)
movaps %xmm0, (%rdi)
lea 0x10(%rdi), %rcx # key addr
movl %edx, 480(%rdi)
#ifndef __x86_64__
pushl KEYP
movl 8(%esp), KEYP # ctx
movl 12(%esp), UKEYP # in_key
movl 16(%esp), %edx # key_len
#endif
movups (UKEYP), %xmm0 # user key (first 16 bytes)
movaps %xmm0, (KEYP)
lea 0x10(KEYP), TKEYP # key addr
movl %edx, 480(KEYP)
pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
cmp $24, %dl
jb .Lenc_key128
je .Lenc_key192
movups 0x10(%rsi), %xmm2 # other user key
movaps %xmm2, (%rcx)
add $0x10, %rcx
movups 0x10(UKEYP), %xmm2 # other user key
movaps %xmm2, (TKEYP)
add $0x10, TKEYP
AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
call _key_expansion_256a
AESKEYGENASSIST 0x1 %xmm0 %xmm1
......@@ -155,7 +1818,7 @@ ENTRY(aesni_set_key)
call _key_expansion_256a
jmp .Ldec_key
.Lenc_key192:
movq 0x10(%rsi), %xmm2 # other user key
movq 0x10(UKEYP), %xmm2 # other user key
AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
call _key_expansion_192a
AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
......@@ -195,33 +1858,47 @@ ENTRY(aesni_set_key)
AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
call _key_expansion_128
.Ldec_key:
sub $0x10, %rcx
movaps (%rdi), %xmm0
movaps (%rcx), %xmm1
movaps %xmm0, 240(%rcx)
movaps %xmm1, 240(%rdi)
add $0x10, %rdi
lea 240-16(%rcx), %rsi
sub $0x10, TKEYP
movaps (KEYP), %xmm0
movaps (TKEYP), %xmm1
movaps %xmm0, 240(TKEYP)
movaps %xmm1, 240(KEYP)
add $0x10, KEYP
lea 240-16(TKEYP), UKEYP
.align 4
.Ldec_key_loop:
movaps (%rdi), %xmm0
movaps (KEYP), %xmm0
AESIMC %xmm0 %xmm1
movaps %xmm1, (%rsi)
add $0x10, %rdi
sub $0x10, %rsi
cmp %rcx, %rdi
movaps %xmm1, (UKEYP)
add $0x10, KEYP
sub $0x10, UKEYP
cmp TKEYP, KEYP
jb .Ldec_key_loop
xor %rax, %rax
xor AREG, AREG
#ifndef __x86_64__
popl KEYP
#endif
ret
/*
* void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
*/
ENTRY(aesni_enc)
#ifndef __x86_64__
pushl KEYP
pushl KLEN
movl 12(%esp), KEYP
movl 16(%esp), OUTP
movl 20(%esp), INP
#endif
movl 480(KEYP), KLEN # key length
movups (INP), STATE # input
call _aesni_enc1
movups STATE, (OUTP) # output
#ifndef __x86_64__
popl KLEN
popl KEYP
#endif
ret
/*
......@@ -236,6 +1913,7 @@ ENTRY(aesni_enc)
* KEY
* TKEYP (T1)
*/
.align 4
_aesni_enc1:
movaps (KEYP), KEY # key
mov KEYP, TKEYP
......@@ -298,6 +1976,7 @@ _aesni_enc1:
* KEY
* TKEYP (T1)
*/
.align 4
_aesni_enc4:
movaps (KEYP), KEY # key
mov KEYP, TKEYP
......@@ -391,11 +2070,22 @@ _aesni_enc4:
* void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
*/
ENTRY(aesni_dec)
#ifndef __x86_64__
pushl KEYP
pushl KLEN
movl 12(%esp), KEYP
movl 16(%esp), OUTP
movl 20(%esp), INP
#endif
mov 480(KEYP), KLEN # key length
add $240, KEYP
movups (INP), STATE # input
call _aesni_dec1
movups STATE, (OUTP) #output
#ifndef __x86_64__
popl KLEN
popl KEYP
#endif
ret
/*
......@@ -410,6 +2100,7 @@ ENTRY(aesni_dec)
* KEY
* TKEYP (T1)
*/
.align 4
_aesni_dec1:
movaps (KEYP), KEY # key
mov KEYP, TKEYP
......@@ -472,6 +2163,7 @@ _aesni_dec1:
* KEY
* TKEYP (T1)
*/
.align 4
_aesni_dec4:
movaps (KEYP), KEY # key
mov KEYP, TKEYP
......@@ -566,6 +2258,15 @@ _aesni_dec4:
* size_t len)
*/
ENTRY(aesni_ecb_enc)
#ifndef __x86_64__
pushl LEN
pushl KEYP
pushl KLEN
movl 16(%esp), KEYP
movl 20(%esp), OUTP
movl 24(%esp), INP
movl 28(%esp), LEN
#endif
test LEN, LEN # check length
jz .Lecb_enc_ret
mov 480(KEYP), KLEN
......@@ -602,6 +2303,11 @@ ENTRY(aesni_ecb_enc)
cmp $16, LEN
jge .Lecb_enc_loop1
.Lecb_enc_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
#endif
ret
/*
......@@ -609,6 +2315,15 @@ ENTRY(aesni_ecb_enc)
* size_t len);
*/
ENTRY(aesni_ecb_dec)
#ifndef __x86_64__
pushl LEN
pushl KEYP
pushl KLEN
movl 16(%esp), KEYP
movl 20(%esp), OUTP
movl 24(%esp), INP
movl 28(%esp), LEN
#endif
test LEN, LEN
jz .Lecb_dec_ret
mov 480(KEYP), KLEN
......@@ -646,6 +2361,11 @@ ENTRY(aesni_ecb_dec)
cmp $16, LEN
jge .Lecb_dec_loop1
.Lecb_dec_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
#endif
ret
/*
......@@ -653,6 +2373,17 @@ ENTRY(aesni_ecb_dec)
* size_t len, u8 *iv)
*/
ENTRY(aesni_cbc_enc)
#ifndef __x86_64__
pushl IVP
pushl LEN
pushl KEYP
pushl KLEN
movl 20(%esp), KEYP
movl 24(%esp), OUTP
movl 28(%esp), INP
movl 32(%esp), LEN
movl 36(%esp), IVP
#endif
cmp $16, LEN
jb .Lcbc_enc_ret
mov 480(KEYP), KLEN
......@@ -670,6 +2401,12 @@ ENTRY(aesni_cbc_enc)
jge .Lcbc_enc_loop
movups STATE, (IVP)
.Lcbc_enc_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
popl IVP
#endif
ret
/*
......@@ -677,6 +2414,17 @@ ENTRY(aesni_cbc_enc)
* size_t len, u8 *iv)
*/
ENTRY(aesni_cbc_dec)
#ifndef __x86_64__
pushl IVP
pushl LEN
pushl KEYP
pushl KLEN
movl 20(%esp), KEYP
movl 24(%esp), OUTP
movl 28(%esp), INP
movl 32(%esp), LEN
movl 36(%esp), IVP
#endif
cmp $16, LEN
jb .Lcbc_dec_just_ret
mov 480(KEYP), KLEN
......@@ -690,16 +2438,30 @@ ENTRY(aesni_cbc_dec)
movaps IN1, STATE1
movups 0x10(INP), IN2
movaps IN2, STATE2
#ifdef __x86_64__
movups 0x20(INP), IN3
movaps IN3, STATE3
movups 0x30(INP), IN4
movaps IN4, STATE4
#else
movups 0x20(INP), IN1
movaps IN1, STATE3
movups 0x30(INP), IN2
movaps IN2, STATE4
#endif
call _aesni_dec4
pxor IV, STATE1
#ifdef __x86_64__
pxor IN1, STATE2
pxor IN2, STATE3
pxor IN3, STATE4
movaps IN4, IV
#else
pxor (INP), STATE2
pxor 0x10(INP), STATE3
pxor IN1, STATE4
movaps IN2, IV
#endif
movups STATE1, (OUTP)
movups STATE2, 0x10(OUTP)
movups STATE3, 0x20(OUTP)
......@@ -727,8 +2489,15 @@ ENTRY(aesni_cbc_dec)
.Lcbc_dec_ret:
movups IV, (IVP)
.Lcbc_dec_just_ret:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
popl IVP
#endif
ret
#ifdef __x86_64__
.align 16
.Lbswap_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
......@@ -744,6 +2513,7 @@ ENTRY(aesni_cbc_dec)
* INC: == 1, in little endian
* BSWAP_MASK == endian swapping mask
*/
.align 4
_aesni_inc_init:
movaps .Lbswap_mask, BSWAP_MASK
movaps IV, CTR
......@@ -768,6 +2538,7 @@ _aesni_inc_init:
* CTR: == output IV, in little endian
* TCTR_LOW: == lower qword of CTR
*/
.align 4
_aesni_inc:
paddq INC, CTR
add $1, TCTR_LOW
......@@ -839,3 +2610,4 @@ ENTRY(aesni_ctr_enc)
movups IV, (IVP)
.Lctr_enc_just_ret:
ret
#endif
......@@ -5,6 +5,14 @@
* Copyright (C) 2008, Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
* interface for 64-bit kernels.
* Authors: Adrian Hoban <adrian.hoban@intel.com>
* Gabriele Paoloni <gabriele.paoloni@intel.com>
* Tadeusz Struk (tadeusz.struk@intel.com)
* Aidan O'Mahony (aidan.o.mahony@intel.com)
* Copyright (c) 2010, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
......@@ -21,6 +29,10 @@
#include <crypto/ctr.h>
#include <asm/i387.h>
#include <asm/aes.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
#define HAS_CTR
......@@ -42,8 +54,31 @@ struct async_aes_ctx {
struct cryptd_ablkcipher *cryptd_tfm;
};
#define AESNI_ALIGN 16
/* This data is stored at the end of the crypto_tfm struct.
* It's a type of per "session" data storage location.
* This needs to be 16 byte aligned.
*/
struct aesni_rfc4106_gcm_ctx {
u8 hash_subkey[16];
struct crypto_aes_ctx aes_key_expanded;
u8 nonce[4];
struct cryptd_aead *cryptd_tfm;
};
struct aesni_gcm_set_hash_subkey_result {
int err;
struct completion completion;
};
struct aesni_hash_subkey_req_data {
u8 iv[16];
struct aesni_gcm_set_hash_subkey_result result;
struct scatterlist sg;
};
#define AESNI_ALIGN (16)
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
#define RFC4106_HASH_SUBKEY_SIZE 16
asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len);
......@@ -59,9 +94,62 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
#ifdef CONFIG_X86_64
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
/* asmlinkage void aesni_gcm_enc()
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
* u8 *out, Ciphertext output. Encrypt in-place is allowed.
* const u8 *in, Plaintext input
* unsigned long plaintext_len, Length of data in bytes for encryption.
* u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
* concatenated with 8 byte Initialisation Vector (from IPSec ESP
* Payload) concatenated with 0x00000001. 16-byte aligned pointer.
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
* const u8 *aad, Additional Authentication Data (AAD)
* unsigned long aad_len, Length of AAD in bytes. With RFC4106 this
* is going to be 8 or 12 bytes
* u8 *auth_tag, Authenticated Tag output.
* unsigned long auth_tag_len), Authenticated Tag Length in bytes.
* Valid values are 16 (most likely), 12 or 8.
*/
asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
/* asmlinkage void aesni_gcm_dec()
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
* u8 *out, Plaintext output. Decrypt in-place is allowed.
* const u8 *in, Ciphertext input
* unsigned long ciphertext_len, Length of data in bytes for decryption.
* u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
* concatenated with 8 byte Initialisation Vector (from IPSec ESP
* Payload) concatenated with 0x00000001. 16-byte aligned pointer.
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
* const u8 *aad, Additional Authentication Data (AAD)
* unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
* to be 8 or 12 bytes
* u8 *auth_tag, Authenticated Tag output.
* unsigned long auth_tag_len) Authenticated Tag Length in bytes.
* Valid values are 16 (most likely), 12 or 8.
*/
asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
static inline struct
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
{
return
(struct aesni_rfc4106_gcm_ctx *)
PTR_ALIGN((u8 *)
crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN);
}
#endif
static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
{
unsigned long addr = (unsigned long)raw_ctx;
......@@ -324,6 +412,7 @@ static struct crypto_alg blk_cbc_alg = {
},
};
#ifdef CONFIG_X86_64
static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
struct blkcipher_walk *walk)
{
......@@ -389,6 +478,7 @@ static struct crypto_alg blk_ctr_alg = {
},
},
};
#endif
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
......@@ -536,6 +626,7 @@ static struct crypto_alg ablk_cbc_alg = {
},
};
#ifdef CONFIG_X86_64
static int ablk_ctr_init(struct crypto_tfm *tfm)
{
struct cryptd_ablkcipher *cryptd_tfm;
......@@ -612,6 +703,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = {
},
};
#endif
#endif
#ifdef HAS_LRW
static int ablk_lrw_init(struct crypto_tfm *tfm)
......@@ -730,6 +822,424 @@ static struct crypto_alg ablk_xts_alg = {
};
#endif
#ifdef CONFIG_X86_64
static int rfc4106_init(struct crypto_tfm *tfm)
{
struct cryptd_aead *cryptd_tfm;
struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *)
PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
tfm->crt_aead.reqsize = sizeof(struct aead_request)
+ crypto_aead_reqsize(&cryptd_tfm->base);
return 0;
}
static void rfc4106_exit(struct crypto_tfm *tfm)
{
struct aesni_rfc4106_gcm_ctx *ctx =
(struct aesni_rfc4106_gcm_ctx *)
PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
if (!IS_ERR(ctx->cryptd_tfm))
cryptd_free_aead(ctx->cryptd_tfm);
return;
}
static void
rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err)
{
struct aesni_gcm_set_hash_subkey_result *result = req->data;
if (err == -EINPROGRESS)
return;
result->err = err;
complete(&result->completion);
}
static int
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
{
struct crypto_ablkcipher *ctr_tfm;
struct ablkcipher_request *req;
int ret = -EINVAL;
struct aesni_hash_subkey_req_data *req_data;
ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0);
if (IS_ERR(ctr_tfm))
return PTR_ERR(ctr_tfm);
crypto_ablkcipher_clear_flags(ctr_tfm, ~0);
ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len);
if (ret) {
crypto_free_ablkcipher(ctr_tfm);
return ret;
}
req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL);
if (!req) {
crypto_free_ablkcipher(ctr_tfm);
return -EINVAL;
}
req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
if (!req_data) {
crypto_free_ablkcipher(ctr_tfm);
return -ENOMEM;
}
memset(req_data->iv, 0, sizeof(req_data->iv));
/* Clear the data in the hash sub key container to zero.*/
/* We want to cipher all zeros to create the hash sub key. */
memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
init_completion(&req_data->result.completion);
sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE);
ablkcipher_request_set_tfm(req, ctr_tfm);
ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
CRYPTO_TFM_REQ_MAY_BACKLOG,
rfc4106_set_hash_subkey_done,
&req_data->result);
ablkcipher_request_set_crypt(req, &req_data->sg,
&req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv);
ret = crypto_ablkcipher_encrypt(req);
if (ret == -EINPROGRESS || ret == -EBUSY) {
ret = wait_for_completion_interruptible
(&req_data->result.completion);
if (!ret)
ret = req_data->result.err;
}
ablkcipher_request_free(req);
kfree(req_data);
crypto_free_ablkcipher(ctr_tfm);
return ret;
}
static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
unsigned int key_len)
{
int ret = 0;
struct crypto_tfm *tfm = crypto_aead_tfm(parent);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
u8 *new_key_mem = NULL;
if (key_len < 4) {
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
/*Account for 4 byte nonce at the end.*/
key_len -= 4;
if (key_len != AES_KEYSIZE_128) {
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
/*This must be on a 16 byte boundary!*/
if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN)
return -EINVAL;
if ((unsigned long)key % AESNI_ALIGN) {
/*key is not aligned: use an auxuliar aligned pointer*/
new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL);
if (!new_key_mem)
return -ENOMEM;
new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
memcpy(new_key_mem, key, key_len);
key = new_key_mem;
}
if (!irq_fpu_usable())
ret = crypto_aes_expand_key(&(ctx->aes_key_expanded),
key, key_len);
else {
kernel_fpu_begin();
ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len);
kernel_fpu_end();
}
/*This must be on a 16 byte boundary!*/
if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) {
ret = -EINVAL;
goto exit;
}
ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
exit:
kfree(new_key_mem);
return ret;
}
/* This is the Integrity Check Value (aka the authentication tag length and can
* be 8, 12 or 16 bytes long. */
static int rfc4106_set_authsize(struct crypto_aead *parent,
unsigned int authsize)
{
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
switch (authsize) {
case 8:
case 12:
case 16:
break;
default:
return -EINVAL;
}
crypto_aead_crt(parent)->authsize = authsize;
crypto_aead_crt(cryptd_child)->authsize = authsize;
return 0;
}
static int rfc4106_encrypt(struct aead_request *req)
{
int ret;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
if (!irq_fpu_usable()) {
struct aead_request *cryptd_req =
(struct aead_request *) aead_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_aead_encrypt(cryptd_req);
} else {
kernel_fpu_begin();
ret = cryptd_child->base.crt_aead.encrypt(req);
kernel_fpu_end();
return ret;
}
}
static int rfc4106_decrypt(struct aead_request *req)
{
int ret;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
if (!irq_fpu_usable()) {
struct aead_request *cryptd_req =
(struct aead_request *) aead_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
return crypto_aead_decrypt(cryptd_req);
} else {
kernel_fpu_begin();
ret = cryptd_child->base.crt_aead.decrypt(req);
kernel_fpu_end();
return ret;
}
}
static struct crypto_alg rfc4106_alg = {
.cra_name = "rfc4106(gcm(aes))",
.cra_driver_name = "rfc4106-gcm-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN,
.cra_alignmask = 0,
.cra_type = &crypto_nivaead_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list),
.cra_init = rfc4106_init,
.cra_exit = rfc4106_exit,
.cra_u = {
.aead = {
.setkey = rfc4106_set_key,
.setauthsize = rfc4106_set_authsize,
.encrypt = rfc4106_encrypt,
.decrypt = rfc4106_decrypt,
.geniv = "seqiv",
.ivsize = 8,
.maxauthsize = 16,
},
},
};
static int __driver_rfc4106_encrypt(struct aead_request *req)
{
u8 one_entry_in_sg = 0;
u8 *src, *dst, *assoc;
__be32 counter = cpu_to_be32(1);
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
u8 iv_tab[16+AESNI_ALIGN];
u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN);
struct scatter_walk src_sg_walk;
struct scatter_walk assoc_sg_walk;
struct scatter_walk dst_sg_walk;
unsigned int i;
/* Assuming we are supporting rfc4106 64-bit extended */
/* sequence numbers We need to have the AAD length equal */
/* to 8 or 12 bytes */
if (unlikely(req->assoclen != 8 && req->assoclen != 12))
return -EINVAL;
/* IV below built */
for (i = 0; i < 4; i++)
*(iv+i) = ctx->nonce[i];
for (i = 0; i < 8; i++)
*(iv+4+i) = req->iv[i];
*((__be32 *)(iv+12)) = counter;
if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) {
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
scatterwalk_start(&assoc_sg_walk, req->assoc);
src = scatterwalk_map(&src_sg_walk, 0);
assoc = scatterwalk_map(&assoc_sg_walk, 0);
dst = src;
if (unlikely(req->src != req->dst)) {
scatterwalk_start(&dst_sg_walk, req->dst);
dst = scatterwalk_map(&dst_sg_walk, 0);
}
} else {
/* Allocate memory for src, dst, assoc */
src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen,
GFP_ATOMIC);
if (unlikely(!src))
return -ENOMEM;
assoc = (src + req->cryptlen + auth_tag_len);
scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
scatterwalk_map_and_copy(assoc, req->assoc, 0,
req->assoclen, 0);
dst = src;
}
aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
+ ((unsigned long)req->cryptlen), auth_tag_len);
/* The authTag (aka the Integrity Check Value) needs to be written
* back to the packet. */
if (one_entry_in_sg) {
if (unlikely(req->src != req->dst)) {
scatterwalk_unmap(dst, 0);
scatterwalk_done(&dst_sg_walk, 0, 0);
}
scatterwalk_unmap(src, 0);
scatterwalk_unmap(assoc, 0);
scatterwalk_done(&src_sg_walk, 0, 0);
scatterwalk_done(&assoc_sg_walk, 0, 0);
} else {
scatterwalk_map_and_copy(dst, req->dst, 0,
req->cryptlen + auth_tag_len, 1);
kfree(src);
}
return 0;
}
static int __driver_rfc4106_decrypt(struct aead_request *req)
{
u8 one_entry_in_sg = 0;
u8 *src, *dst, *assoc;
unsigned long tempCipherLen = 0;
__be32 counter = cpu_to_be32(1);
int retval = 0;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
u8 iv_and_authTag[32+AESNI_ALIGN];
u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN);
u8 *authTag = iv + 16;
struct scatter_walk src_sg_walk;
struct scatter_walk assoc_sg_walk;
struct scatter_walk dst_sg_walk;
unsigned int i;
if (unlikely((req->cryptlen < auth_tag_len) ||
(req->assoclen != 8 && req->assoclen != 12)))
return -EINVAL;
/* Assuming we are supporting rfc4106 64-bit extended */
/* sequence numbers We need to have the AAD length */
/* equal to 8 or 12 bytes */
tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
/* IV below built */
for (i = 0; i < 4; i++)
*(iv+i) = ctx->nonce[i];
for (i = 0; i < 8; i++)
*(iv+4+i) = req->iv[i];
*((__be32 *)(iv+12)) = counter;
if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) {
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
scatterwalk_start(&assoc_sg_walk, req->assoc);
src = scatterwalk_map(&src_sg_walk, 0);
assoc = scatterwalk_map(&assoc_sg_walk, 0);
dst = src;
if (unlikely(req->src != req->dst)) {
scatterwalk_start(&dst_sg_walk, req->dst);
dst = scatterwalk_map(&dst_sg_walk, 0);
}
} else {
/* Allocate memory for src, dst, assoc */
src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
if (!src)
return -ENOMEM;
assoc = (src + req->cryptlen + auth_tag_len);
scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
scatterwalk_map_and_copy(assoc, req->assoc, 0,
req->assoclen, 0);
dst = src;
}
aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv,
ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
authTag, auth_tag_len);
/* Compare generated tag with passed in tag. */
retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ?
-EBADMSG : 0;
if (one_entry_in_sg) {
if (unlikely(req->src != req->dst)) {
scatterwalk_unmap(dst, 0);
scatterwalk_done(&dst_sg_walk, 0, 0);
}
scatterwalk_unmap(src, 0);
scatterwalk_unmap(assoc, 0);
scatterwalk_done(&src_sg_walk, 0, 0);
scatterwalk_done(&assoc_sg_walk, 0, 0);
} else {
scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1);
kfree(src);
}
return retval;
}
static struct crypto_alg __rfc4106_alg = {
.cra_name = "__gcm-aes-aesni",
.cra_driver_name = "__driver-gcm-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN,
.cra_alignmask = 0,
.cra_type = &crypto_aead_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list),
.cra_u = {
.aead = {
.encrypt = __driver_rfc4106_encrypt,
.decrypt = __driver_rfc4106_decrypt,
},
},
};
#endif
static int __init aesni_init(void)
{
int err;
......@@ -738,6 +1248,7 @@ static int __init aesni_init(void)
printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
return -ENODEV;
}
if ((err = crypto_register_alg(&aesni_alg)))
goto aes_err;
if ((err = crypto_register_alg(&__aesni_alg)))
......@@ -746,18 +1257,24 @@ static int __init aesni_init(void)
goto blk_ecb_err;
if ((err = crypto_register_alg(&blk_cbc_alg)))
goto blk_cbc_err;
if ((err = crypto_register_alg(&blk_ctr_alg)))
goto blk_ctr_err;
if ((err = crypto_register_alg(&ablk_ecb_alg)))
goto ablk_ecb_err;
if ((err = crypto_register_alg(&ablk_cbc_alg)))
goto ablk_cbc_err;
#ifdef CONFIG_X86_64
if ((err = crypto_register_alg(&blk_ctr_alg)))
goto blk_ctr_err;
if ((err = crypto_register_alg(&ablk_ctr_alg)))
goto ablk_ctr_err;
if ((err = crypto_register_alg(&__rfc4106_alg)))
goto __aead_gcm_err;
if ((err = crypto_register_alg(&rfc4106_alg)))
goto aead_gcm_err;
#ifdef HAS_CTR
if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
goto ablk_rfc3686_ctr_err;
#endif
#endif
#ifdef HAS_LRW
if ((err = crypto_register_alg(&ablk_lrw_alg)))
goto ablk_lrw_err;
......@@ -770,7 +1287,6 @@ static int __init aesni_init(void)
if ((err = crypto_register_alg(&ablk_xts_alg)))
goto ablk_xts_err;
#endif
return err;
#ifdef HAS_XTS
......@@ -784,18 +1300,24 @@ static int __init aesni_init(void)
crypto_unregister_alg(&ablk_lrw_alg);
ablk_lrw_err:
#endif
#ifdef CONFIG_X86_64
#ifdef HAS_CTR
crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
ablk_rfc3686_ctr_err:
#endif
crypto_unregister_alg(&rfc4106_alg);
aead_gcm_err:
crypto_unregister_alg(&__rfc4106_alg);
__aead_gcm_err:
crypto_unregister_alg(&ablk_ctr_alg);
ablk_ctr_err:
crypto_unregister_alg(&blk_ctr_alg);
blk_ctr_err:
#endif
crypto_unregister_alg(&ablk_cbc_alg);
ablk_cbc_err:
crypto_unregister_alg(&ablk_ecb_alg);
ablk_ecb_err:
crypto_unregister_alg(&blk_ctr_alg);
blk_ctr_err:
crypto_unregister_alg(&blk_cbc_alg);
blk_cbc_err:
crypto_unregister_alg(&blk_ecb_alg);
......@@ -818,13 +1340,17 @@ static void __exit aesni_exit(void)
#ifdef HAS_LRW
crypto_unregister_alg(&ablk_lrw_alg);
#endif
#ifdef CONFIG_X86_64
#ifdef HAS_CTR
crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
#endif
crypto_unregister_alg(&rfc4106_alg);
crypto_unregister_alg(&__rfc4106_alg);
crypto_unregister_alg(&ablk_ctr_alg);
crypto_unregister_alg(&blk_ctr_alg);
#endif
crypto_unregister_alg(&ablk_cbc_alg);
crypto_unregister_alg(&ablk_ecb_alg);
crypto_unregister_alg(&blk_ctr_alg);
crypto_unregister_alg(&blk_cbc_alg);
crypto_unregister_alg(&blk_ecb_alg);
crypto_unregister_alg(&__aesni_alg);
......
......@@ -110,7 +110,6 @@ config CRYPTO_MANAGER_DISABLE_TESTS
config CRYPTO_GF128MUL
tristate "GF(2^128) multiplication functions (EXPERIMENTAL)"
depends on EXPERIMENTAL
help
Efficient table driven implementation of multiplications in the
field GF(2^128). This is needed by some cypher modes. This
......@@ -539,8 +538,9 @@ config CRYPTO_AES_X86_64
config CRYPTO_AES_NI_INTEL
tristate "AES cipher algorithms (AES-NI)"
depends on (X86 || UML_X86) && 64BIT
select CRYPTO_AES_X86_64
depends on (X86 || UML_X86)
select CRYPTO_AES_X86_64 if 64BIT
select CRYPTO_AES_586 if !64BIT
select CRYPTO_CRYPTD
select CRYPTO_ALGAPI
select CRYPTO_FPU
......@@ -563,9 +563,10 @@ config CRYPTO_AES_NI_INTEL
See <http://csrc.nist.gov/encryption/aes/> for more information.
In addition to AES cipher algorithm support, the
acceleration for some popular block cipher mode is supported
too, including ECB, CBC, CTR, LRW, PCBC, XTS.
In addition to AES cipher algorithm support, the acceleration
for some popular block cipher mode is supported too, including
ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
acceleration for CTR.
config CRYPTO_ANUBIS
tristate "Anubis cipher algorithm"
......@@ -841,6 +842,27 @@ config CRYPTO_ANSI_CPRNG
ANSI X9.31 A.2.4. Note that this option must be enabled if
CRYPTO_FIPS is selected
config CRYPTO_USER_API
tristate
config CRYPTO_USER_API_HASH
tristate "User-space interface for hash algorithms"
depends on NET
select CRYPTO_HASH
select CRYPTO_USER_API
help
This option enables the user-spaces interface for hash
algorithms.
config CRYPTO_USER_API_SKCIPHER
tristate "User-space interface for symmetric key cipher algorithms"
depends on NET
select CRYPTO_BLKCIPHER
select CRYPTO_USER_API
help
This option enables the user-spaces interface for symmetric
key cipher algorithms.
source "drivers/crypto/Kconfig"
endif # if CRYPTO
......@@ -3,32 +3,32 @@
#
obj-$(CONFIG_CRYPTO) += crypto.o
crypto-objs := api.o cipher.o compress.o
crypto-y := api.o cipher.o compress.o
obj-$(CONFIG_CRYPTO_WORKQUEUE) += crypto_wq.o
obj-$(CONFIG_CRYPTO_FIPS) += fips.o
crypto_algapi-$(CONFIG_PROC_FS) += proc.o
crypto_algapi-objs := algapi.o scatterwalk.o $(crypto_algapi-y)
crypto_algapi-y := algapi.o scatterwalk.o $(crypto_algapi-y)
obj-$(CONFIG_CRYPTO_ALGAPI2) += crypto_algapi.o
obj-$(CONFIG_CRYPTO_AEAD2) += aead.o
crypto_blkcipher-objs := ablkcipher.o
crypto_blkcipher-objs += blkcipher.o
crypto_blkcipher-y := ablkcipher.o
crypto_blkcipher-y += blkcipher.o
obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o
obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o
obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o
obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
crypto_hash-objs += ahash.o
crypto_hash-objs += shash.o
crypto_hash-y += ahash.o
crypto_hash-y += shash.o
obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o
cryptomgr-objs := algboss.o testmgr.o
cryptomgr-y := algboss.o testmgr.o
obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
......@@ -85,6 +85,9 @@ obj-$(CONFIG_CRYPTO_RNG2) += krng.o
obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o
obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
#
# generic algorithms and the async_tx api
......
/*
* af_alg: User-space algorithm interface
*
* This file provides the user-space API for algorithms.
*
* Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <asm/atomic.h>
#include <crypto/if_alg.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/rwsem.h>
struct alg_type_list {
const struct af_alg_type *type;
struct list_head list;
};
static atomic_long_t alg_memory_allocated;
static struct proto alg_proto = {
.name = "ALG",
.owner = THIS_MODULE,
.memory_allocated = &alg_memory_allocated,
.obj_size = sizeof(struct alg_sock),
};
static LIST_HEAD(alg_types);
static DECLARE_RWSEM(alg_types_sem);
static const struct af_alg_type *alg_get_type(const char *name)
{
const struct af_alg_type *type = ERR_PTR(-ENOENT);
struct alg_type_list *node;
down_read(&alg_types_sem);
list_for_each_entry(node, &alg_types, list) {
if (strcmp(node->type->name, name))
continue;
if (try_module_get(node->type->owner))
type = node->type;
break;
}
up_read(&alg_types_sem);
return type;
}
int af_alg_register_type(const struct af_alg_type *type)
{
struct alg_type_list *node;
int err = -EEXIST;
down_write(&alg_types_sem);
list_for_each_entry(node, &alg_types, list) {
if (!strcmp(node->type->name, type->name))
goto unlock;
}
node = kmalloc(sizeof(*node), GFP_KERNEL);
err = -ENOMEM;
if (!node)
goto unlock;
type->ops->owner = THIS_MODULE;
node->type = type;
list_add(&node->list, &alg_types);
err = 0;
unlock:
up_write(&alg_types_sem);
return err;
}
EXPORT_SYMBOL_GPL(af_alg_register_type);
int af_alg_unregister_type(const struct af_alg_type *type)
{
struct alg_type_list *node;
int err = -ENOENT;
down_write(&alg_types_sem);
list_for_each_entry(node, &alg_types, list) {
if (strcmp(node->type->name, type->name))
continue;
list_del(&node->list);
kfree(node);
err = 0;
break;
}
up_write(&alg_types_sem);
return err;
}
EXPORT_SYMBOL_GPL(af_alg_unregister_type);
static void alg_do_release(const struct af_alg_type *type, void *private)
{
if (!type)
return;
type->release(private);
module_put(type->owner);
}
int af_alg_release(struct socket *sock)
{
if (sock->sk)
sock_put(sock->sk);
return 0;
}
EXPORT_SYMBOL_GPL(af_alg_release);
static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct sockaddr_alg *sa = (void *)uaddr;
const struct af_alg_type *type;
void *private;
if (sock->state == SS_CONNECTED)
return -EINVAL;
if (addr_len != sizeof(*sa))
return -EINVAL;
sa->salg_type[sizeof(sa->salg_type) - 1] = 0;
sa->salg_name[sizeof(sa->salg_name) - 1] = 0;
type = alg_get_type(sa->salg_type);
if (IS_ERR(type) && PTR_ERR(type) == -ENOENT) {
request_module("algif-%s", sa->salg_type);
type = alg_get_type(sa->salg_type);
}
if (IS_ERR(type))
return PTR_ERR(type);
private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask);
if (IS_ERR(private)) {
module_put(type->owner);
return PTR_ERR(private);
}
lock_sock(sk);
swap(ask->type, type);
swap(ask->private, private);
release_sock(sk);
alg_do_release(type, private);
return 0;
}
static int alg_setkey(struct sock *sk, char __user *ukey,
unsigned int keylen)
{
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type = ask->type;
u8 *key;
int err;
key = sock_kmalloc(sk, keylen, GFP_KERNEL);
if (!key)
return -ENOMEM;
err = -EFAULT;
if (copy_from_user(key, ukey, keylen))
goto out;
err = type->setkey(ask->private, key, keylen);
out:
sock_kfree_s(sk, key, keylen);
return err;
}
static int alg_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type;
int err = -ENOPROTOOPT;
lock_sock(sk);
type = ask->type;
if (level != SOL_ALG || !type)
goto unlock;
switch (optname) {
case ALG_SET_KEY:
if (sock->state == SS_CONNECTED)
goto unlock;
if (!type->setkey)
goto unlock;
err = alg_setkey(sk, optval, optlen);
}
unlock:
release_sock(sk);
return err;
}
int af_alg_accept(struct sock *sk, struct socket *newsock)
{
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type;
struct sock *sk2;
int err;
lock_sock(sk);
type = ask->type;
err = -EINVAL;
if (!type)
goto unlock;
sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto);
err = -ENOMEM;
if (!sk2)
goto unlock;
sock_init_data(newsock, sk2);
sock_graft(sk2, newsock);
err = type->accept(ask->private, sk2);
if (err) {
sk_free(sk2);
goto unlock;
}
sk2->sk_family = PF_ALG;
sock_hold(sk);
alg_sk(sk2)->parent = sk;
alg_sk(sk2)->type = type;
newsock->ops = type->ops;
newsock->state = SS_CONNECTED;
err = 0;
unlock:
release_sock(sk);
return err;
}
EXPORT_SYMBOL_GPL(af_alg_accept);
static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
{
return af_alg_accept(sock->sk, newsock);
}
static const struct proto_ops alg_proto_ops = {
.family = PF_ALG,
.owner = THIS_MODULE,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.getname = sock_no_getname,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.getsockopt = sock_no_getsockopt,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
.poll = sock_no_poll,
.bind = alg_bind,
.release = af_alg_release,
.setsockopt = alg_setsockopt,
.accept = alg_accept,
};
static void alg_sock_destruct(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
alg_do_release(ask->type, ask->private);
}
static int alg_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct sock *sk;
int err;
if (sock->type != SOCK_SEQPACKET)
return -ESOCKTNOSUPPORT;
if (protocol != 0)
return -EPROTONOSUPPORT;
err = -ENOMEM;
sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto);
if (!sk)
goto out;
sock->ops = &alg_proto_ops;
sock_init_data(sock, sk);
sk->sk_family = PF_ALG;
sk->sk_destruct = alg_sock_destruct;
return 0;
out:
return err;
}
static const struct net_proto_family alg_family = {
.family = PF_ALG,
.create = alg_create,
.owner = THIS_MODULE,
};
int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len,
int write)
{
unsigned long from = (unsigned long)addr;
unsigned long npages;
unsigned off;
int err;
int i;
err = -EFAULT;
if (!access_ok(write ? VERIFY_READ : VERIFY_WRITE, addr, len))
goto out;
off = from & ~PAGE_MASK;
npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (npages > ALG_MAX_PAGES)
npages = ALG_MAX_PAGES;
err = get_user_pages_fast(from, npages, write, sgl->pages);
if (err < 0)
goto out;
npages = err;
err = -EINVAL;
if (WARN_ON(npages == 0))
goto out;
err = 0;
sg_init_table(sgl->sg, npages);
for (i = 0; i < npages; i++) {
int plen = min_t(int, len, PAGE_SIZE - off);
sg_set_page(sgl->sg + i, sgl->pages[i], plen, off);
off = 0;
len -= plen;
err += plen;
}
out:
return err;
}
EXPORT_SYMBOL_GPL(af_alg_make_sg);
void af_alg_free_sg(struct af_alg_sgl *sgl)
{
int i;
i = 0;
do {
put_page(sgl->pages[i]);
} while (!sg_is_last(sgl->sg + (i++)));
}
EXPORT_SYMBOL_GPL(af_alg_free_sg);
int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con)
{
struct cmsghdr *cmsg;
for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
if (!CMSG_OK(msg, cmsg))
return -EINVAL;
if (cmsg->cmsg_level != SOL_ALG)
continue;
switch(cmsg->cmsg_type) {
case ALG_SET_IV:
if (cmsg->cmsg_len < CMSG_LEN(sizeof(*con->iv)))
return -EINVAL;
con->iv = (void *)CMSG_DATA(cmsg);
if (cmsg->cmsg_len < CMSG_LEN(con->iv->ivlen +
sizeof(*con->iv)))
return -EINVAL;
break;
case ALG_SET_OP:
if (cmsg->cmsg_len < CMSG_LEN(sizeof(u32)))
return -EINVAL;
con->op = *(u32 *)CMSG_DATA(cmsg);
break;
default:
return -EINVAL;
}
}
return 0;
}
EXPORT_SYMBOL_GPL(af_alg_cmsg_send);
int af_alg_wait_for_completion(int err, struct af_alg_completion *completion)
{
switch (err) {
case -EINPROGRESS:
case -EBUSY:
wait_for_completion(&completion->completion);
INIT_COMPLETION(completion->completion);
err = completion->err;
break;
};
return err;
}
EXPORT_SYMBOL_GPL(af_alg_wait_for_completion);
void af_alg_complete(struct crypto_async_request *req, int err)
{
struct af_alg_completion *completion = req->data;
completion->err = err;
complete(&completion->completion);
}
EXPORT_SYMBOL_GPL(af_alg_complete);
static int __init af_alg_init(void)
{
int err = proto_register(&alg_proto, 0);
if (err)
goto out;
err = sock_register(&alg_family);
if (err != 0)
goto out_unregister_proto;
out:
return err;
out_unregister_proto:
proto_unregister(&alg_proto);
goto out;
}
static void __exit af_alg_exit(void)
{
sock_unregister(PF_ALG);
proto_unregister(&alg_proto);
}
module_init(af_alg_init);
module_exit(af_alg_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(AF_ALG);
/*
* algif_hash: User-space interface for hash algorithms
*
* This file provides the user-space API for hash algorithms.
*
* Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/hash.h>
#include <crypto/if_alg.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/net.h>
#include <net/sock.h>
struct hash_ctx {
struct af_alg_sgl sgl;
u8 *result;
struct af_alg_completion completion;
unsigned int len;
bool more;
struct ahash_request req;
};
static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
struct msghdr *msg, size_t ignored)
{
int limit = ALG_MAX_PAGES * PAGE_SIZE;
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
unsigned long iovlen;
struct iovec *iov;
long copied = 0;
int err;
if (limit > sk->sk_sndbuf)
limit = sk->sk_sndbuf;
lock_sock(sk);
if (!ctx->more) {
err = crypto_ahash_init(&ctx->req);
if (err)
goto unlock;
}
ctx->more = 0;
for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
iovlen--, iov++) {
unsigned long seglen = iov->iov_len;
char __user *from = iov->iov_base;
while (seglen) {
int len = min_t(unsigned long, seglen, limit);
int newlen;
newlen = af_alg_make_sg(&ctx->sgl, from, len, 0);
if (newlen < 0)
goto unlock;
ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL,
newlen);
err = af_alg_wait_for_completion(
crypto_ahash_update(&ctx->req),
&ctx->completion);
af_alg_free_sg(&ctx->sgl);
if (err)
goto unlock;
seglen -= newlen;
from += newlen;
copied += newlen;
}
}
err = 0;
ctx->more = msg->msg_flags & MSG_MORE;
if (!ctx->more) {
ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
&ctx->completion);
}
unlock:
release_sock(sk);
return err ?: copied;
}
static ssize_t hash_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
int err;
lock_sock(sk);
sg_init_table(ctx->sgl.sg, 1);
sg_set_page(ctx->sgl.sg, page, size, offset);
ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
if (!(flags & MSG_MORE)) {
if (ctx->more)
err = crypto_ahash_finup(&ctx->req);
else
err = crypto_ahash_digest(&ctx->req);
} else {
if (!ctx->more) {
err = crypto_ahash_init(&ctx->req);
if (err)
goto unlock;
}
err = crypto_ahash_update(&ctx->req);
}
err = af_alg_wait_for_completion(err, &ctx->completion);
if (err)
goto unlock;
ctx->more = flags & MSG_MORE;
unlock:
release_sock(sk);
return err ?: size;
}
static int hash_recvmsg(struct kiocb *unused, struct socket *sock,
struct msghdr *msg, size_t len, int flags)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
unsigned ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
int err;
if (len > ds)
len = ds;
else if (len < ds)
msg->msg_flags |= MSG_TRUNC;
lock_sock(sk);
if (ctx->more) {
ctx->more = 0;
ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
&ctx->completion);
if (err)
goto unlock;
}
err = memcpy_toiovec(msg->msg_iov, ctx->result, len);
unlock:
release_sock(sk);
return err ?: len;
}
static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
struct ahash_request *req = &ctx->req;
char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))];
struct sock *sk2;
struct alg_sock *ask2;
struct hash_ctx *ctx2;
int err;
err = crypto_ahash_export(req, state);
if (err)
return err;
err = af_alg_accept(ask->parent, newsock);
if (err)
return err;
sk2 = newsock->sk;
ask2 = alg_sk(sk2);
ctx2 = ask2->private;
ctx2->more = 1;
err = crypto_ahash_import(&ctx2->req, state);
if (err) {
sock_orphan(sk2);
sock_put(sk2);
}
return err;
}
static struct proto_ops algif_hash_ops = {
.family = PF_ALG,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.getname = sock_no_getname,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.getsockopt = sock_no_getsockopt,
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.setsockopt = sock_no_setsockopt,
.poll = sock_no_poll,
.release = af_alg_release,
.sendmsg = hash_sendmsg,
.sendpage = hash_sendpage,
.recvmsg = hash_recvmsg,
.accept = hash_accept,
};
static void *hash_bind(const char *name, u32 type, u32 mask)
{
return crypto_alloc_ahash(name, type, mask);
}
static void hash_release(void *private)
{
crypto_free_ahash(private);
}
static int hash_setkey(void *private, const u8 *key, unsigned int keylen)
{
return crypto_ahash_setkey(private, key, keylen);
}
static void hash_sock_destruct(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
sock_kfree_s(sk, ctx->result,
crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req)));
sock_kfree_s(sk, ctx, ctx->len);
af_alg_release_parent(sk);
}
static int hash_accept_parent(void *private, struct sock *sk)
{
struct hash_ctx *ctx;
struct alg_sock *ask = alg_sk(sk);
unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private);
unsigned ds = crypto_ahash_digestsize(private);
ctx = sock_kmalloc(sk, len, GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL);
if (!ctx->result) {
sock_kfree_s(sk, ctx, len);
return -ENOMEM;
}
memset(ctx->result, 0, ds);
ctx->len = len;
ctx->more = 0;
af_alg_init_completion(&ctx->completion);
ask->private = ctx;
ahash_request_set_tfm(&ctx->req, private);
ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
af_alg_complete, &ctx->completion);
sk->sk_destruct = hash_sock_destruct;
return 0;
}
static const struct af_alg_type algif_type_hash = {
.bind = hash_bind,
.release = hash_release,
.setkey = hash_setkey,
.accept = hash_accept_parent,
.ops = &algif_hash_ops,
.name = "hash",
.owner = THIS_MODULE
};
static int __init algif_hash_init(void)
{
return af_alg_register_type(&algif_type_hash);
}
static void __exit algif_hash_exit(void)
{
int err = af_alg_unregister_type(&algif_type_hash);
BUG_ON(err);
}
module_init(algif_hash_init);
module_exit(algif_hash_exit);
MODULE_LICENSE("GPL");
/*
* algif_skcipher: User-space interface for skcipher algorithms
*
* This file provides the user-space API for symmetric key ciphers.
*
* Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/scatterwalk.h>
#include <crypto/skcipher.h>
#include <crypto/if_alg.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/net.h>
#include <net/sock.h>
struct skcipher_sg_list {
struct list_head list;
int cur;
struct scatterlist sg[0];
};
struct skcipher_ctx {
struct list_head tsgl;
struct af_alg_sgl rsgl;
void *iv;
struct af_alg_completion completion;
unsigned used;
unsigned int len;
bool more;
bool merge;
bool enc;
struct ablkcipher_request req;
};
#define MAX_SGL_ENTS ((PAGE_SIZE - sizeof(struct skcipher_sg_list)) / \
sizeof(struct scatterlist) - 1)
static inline int skcipher_sndbuf(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
ctx->used, 0);
}
static inline bool skcipher_writable(struct sock *sk)
{
return PAGE_SIZE <= skcipher_sndbuf(sk);
}
static int skcipher_alloc_sgl(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
struct skcipher_sg_list *sgl;
struct scatterlist *sg = NULL;
sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
if (!list_empty(&ctx->tsgl))
sg = sgl->sg;
if (!sg || sgl->cur >= MAX_SGL_ENTS) {
sgl = sock_kmalloc(sk, sizeof(*sgl) +
sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
GFP_KERNEL);
if (!sgl)
return -ENOMEM;
sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
sgl->cur = 0;
if (sg)
scatterwalk_sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
list_add_tail(&sgl->list, &ctx->tsgl);
}
return 0;
}
static void skcipher_pull_sgl(struct sock *sk, int used)
{
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
struct skcipher_sg_list *sgl;
struct scatterlist *sg;
int i;
while (!list_empty(&ctx->tsgl)) {
sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list,
list);
sg = sgl->sg;
for (i = 0; i < sgl->cur; i++) {
int plen = min_t(int, used, sg[i].length);
if (!sg_page(sg + i))
continue;
sg[i].length -= plen;
sg[i].offset += plen;
used -= plen;
ctx->used -= plen;
if (sg[i].length)
return;
put_page(sg_page(sg + i));
sg_assign_page(sg + i, NULL);
}
list_del(&sgl->list);
sock_kfree_s(sk, sgl,
sizeof(*sgl) + sizeof(sgl->sg[0]) *
(MAX_SGL_ENTS + 1));
}
if (!ctx->used)
ctx->merge = 0;
}
static void skcipher_free_sgl(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
skcipher_pull_sgl(sk, ctx->used);
}
static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
{
long timeout;
DEFINE_WAIT(wait);
int err = -ERESTARTSYS;
if (flags & MSG_DONTWAIT)
return -EAGAIN;
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
for (;;) {
if (signal_pending(current))
break;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = MAX_SCHEDULE_TIMEOUT;
if (sk_wait_event(sk, &timeout, skcipher_writable(sk))) {
err = 0;
break;
}
}
finish_wait(sk_sleep(sk), &wait);
return err;
}
static void skcipher_wmem_wakeup(struct sock *sk)
{
struct socket_wq *wq;
if (!skcipher_writable(sk))
return;
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
POLLRDNORM |
POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}
static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
{
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
long timeout;
DEFINE_WAIT(wait);
int err = -ERESTARTSYS;
if (flags & MSG_DONTWAIT) {
return -EAGAIN;
}
set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
for (;;) {
if (signal_pending(current))
break;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
timeout = MAX_SCHEDULE_TIMEOUT;
if (sk_wait_event(sk, &timeout, ctx->used)) {
err = 0;
break;
}
}
finish_wait(sk_sleep(sk), &wait);
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
return err;
}
static void skcipher_data_wakeup(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
struct socket_wq *wq;
if (!ctx->used)
return;
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
POLLRDNORM |
POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock,
struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
unsigned ivsize = crypto_ablkcipher_ivsize(tfm);
struct skcipher_sg_list *sgl;
struct af_alg_control con = {};
long copied = 0;
bool enc = 0;
int err;
int i;
if (msg->msg_controllen) {
err = af_alg_cmsg_send(msg, &con);
if (err)
return err;
switch (con.op) {
case ALG_OP_ENCRYPT:
enc = 1;
break;
case ALG_OP_DECRYPT:
enc = 0;
break;
default:
return -EINVAL;
}
if (con.iv && con.iv->ivlen != ivsize)
return -EINVAL;
}
err = -EINVAL;
lock_sock(sk);
if (!ctx->more && ctx->used)
goto unlock;
if (!ctx->used) {
ctx->enc = enc;
if (con.iv)
memcpy(ctx->iv, con.iv->iv, ivsize);
}
while (size) {
struct scatterlist *sg;
unsigned long len = size;
int plen;
if (ctx->merge) {
sgl = list_entry(ctx->tsgl.prev,
struct skcipher_sg_list, list);
sg = sgl->sg + sgl->cur - 1;
len = min_t(unsigned long, len,
PAGE_SIZE - sg->offset - sg->length);
err = memcpy_fromiovec(page_address(sg_page(sg)) +
sg->offset + sg->length,
msg->msg_iov, len);
if (err)
goto unlock;
sg->length += len;
ctx->merge = (sg->offset + sg->length) &
(PAGE_SIZE - 1);
ctx->used += len;
copied += len;
size -= len;
continue;
}
if (!skcipher_writable(sk)) {
err = skcipher_wait_for_wmem(sk, msg->msg_flags);
if (err)
goto unlock;
}
len = min_t(unsigned long, len, skcipher_sndbuf(sk));
err = skcipher_alloc_sgl(sk);
if (err)
goto unlock;
sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
sg = sgl->sg;
do {
i = sgl->cur;
plen = min_t(int, len, PAGE_SIZE);
sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
err = -ENOMEM;
if (!sg_page(sg + i))
goto unlock;
err = memcpy_fromiovec(page_address(sg_page(sg + i)),
msg->msg_iov, plen);
if (err) {
__free_page(sg_page(sg + i));
sg_assign_page(sg + i, NULL);
goto unlock;
}
sg[i].length = plen;
len -= plen;
ctx->used += plen;
copied += plen;
size -= plen;
sgl->cur++;
} while (len && sgl->cur < MAX_SGL_ENTS);
ctx->merge = plen & (PAGE_SIZE - 1);
}
err = 0;
ctx->more = msg->msg_flags & MSG_MORE;
if (!ctx->more && !list_empty(&ctx->tsgl))
sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
unlock:
skcipher_data_wakeup(sk);
release_sock(sk);
return copied ?: err;
}
static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
struct skcipher_sg_list *sgl;
int err = -EINVAL;
lock_sock(sk);
if (!ctx->more && ctx->used)
goto unlock;
if (!size)
goto done;
if (!skcipher_writable(sk)) {
err = skcipher_wait_for_wmem(sk, flags);
if (err)
goto unlock;
}
err = skcipher_alloc_sgl(sk);
if (err)
goto unlock;
ctx->merge = 0;
sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
get_page(page);
sg_set_page(sgl->sg + sgl->cur, page, size, offset);
sgl->cur++;
ctx->used += size;
done:
ctx->more = flags & MSG_MORE;
if (!ctx->more && !list_empty(&ctx->tsgl))
sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
unlock:
skcipher_data_wakeup(sk);
release_sock(sk);
return err ?: size;
}
static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
struct msghdr *msg, size_t ignored, int flags)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
unsigned bs = crypto_ablkcipher_blocksize(crypto_ablkcipher_reqtfm(
&ctx->req));
struct skcipher_sg_list *sgl;
struct scatterlist *sg;
unsigned long iovlen;
struct iovec *iov;
int err = -EAGAIN;
int used;
long copied = 0;
lock_sock(sk);
for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
iovlen--, iov++) {
unsigned long seglen = iov->iov_len;
char __user *from = iov->iov_base;
while (seglen) {
sgl = list_first_entry(&ctx->tsgl,
struct skcipher_sg_list, list);
sg = sgl->sg;
while (!sg->length)
sg++;
used = ctx->used;
if (!used) {
err = skcipher_wait_for_data(sk, flags);
if (err)
goto unlock;
}
used = min_t(unsigned long, used, seglen);
used = af_alg_make_sg(&ctx->rsgl, from, used, 1);
err = used;
if (err < 0)
goto unlock;
if (ctx->more || used < ctx->used)
used -= used % bs;
err = -EINVAL;
if (!used)
goto free;
ablkcipher_request_set_crypt(&ctx->req, sg,
ctx->rsgl.sg, used,
ctx->iv);
err = af_alg_wait_for_completion(
ctx->enc ?
crypto_ablkcipher_encrypt(&ctx->req) :
crypto_ablkcipher_decrypt(&ctx->req),
&ctx->completion);
free:
af_alg_free_sg(&ctx->rsgl);
if (err)
goto unlock;
copied += used;
from += used;
seglen -= used;
skcipher_pull_sgl(sk, used);
}
}
err = 0;
unlock:
skcipher_wmem_wakeup(sk);
release_sock(sk);
return copied ?: err;
}
static unsigned int skcipher_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
unsigned int mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
if (ctx->used)
mask |= POLLIN | POLLRDNORM;
if (skcipher_writable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
return mask;
}
static struct proto_ops algif_skcipher_ops = {
.family = PF_ALG,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.getname = sock_no_getname,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.getsockopt = sock_no_getsockopt,
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.accept = sock_no_accept,
.setsockopt = sock_no_setsockopt,
.release = af_alg_release,
.sendmsg = skcipher_sendmsg,
.sendpage = skcipher_sendpage,
.recvmsg = skcipher_recvmsg,
.poll = skcipher_poll,
};
static void *skcipher_bind(const char *name, u32 type, u32 mask)
{
return crypto_alloc_ablkcipher(name, type, mask);
}
static void skcipher_release(void *private)
{
crypto_free_ablkcipher(private);
}
static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
{
return crypto_ablkcipher_setkey(private, key, keylen);
}
static void skcipher_sock_destruct(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
skcipher_free_sgl(sk);
sock_kfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm));
sock_kfree_s(sk, ctx, ctx->len);
af_alg_release_parent(sk);
}
static int skcipher_accept_parent(void *private, struct sock *sk)
{
struct skcipher_ctx *ctx;
struct alg_sock *ask = alg_sk(sk);
unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private);
ctx = sock_kmalloc(sk, len, GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private),
GFP_KERNEL);
if (!ctx->iv) {
sock_kfree_s(sk, ctx, len);
return -ENOMEM;
}
memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private));
INIT_LIST_HEAD(&ctx->tsgl);
ctx->len = len;
ctx->used = 0;
ctx->more = 0;
ctx->merge = 0;
ctx->enc = 0;
af_alg_init_completion(&ctx->completion);
ask->private = ctx;
ablkcipher_request_set_tfm(&ctx->req, private);
ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
af_alg_complete, &ctx->completion);
sk->sk_destruct = skcipher_sock_destruct;
return 0;
}
static const struct af_alg_type algif_type_skcipher = {
.bind = skcipher_bind,
.release = skcipher_release,
.setkey = skcipher_setkey,
.accept = skcipher_accept_parent,
.ops = &algif_skcipher_ops,
.name = "skcipher",
.owner = THIS_MODULE
};
static int __init algif_skcipher_init(void)
{
return af_alg_register_type(&algif_type_skcipher);
}
static void __exit algif_skcipher_exit(void)
{
int err = af_alg_unregister_type(&algif_type_skcipher);
BUG_ON(err);
}
module_init(algif_skcipher_init);
module_exit(algif_skcipher_exit);
MODULE_LICENSE("GPL");
......@@ -107,20 +107,6 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
goto out;
}
static void authenc_chain(struct scatterlist *head, struct scatterlist *sg,
int chain)
{
if (chain) {
head->length += sg->length;
sg = scatterwalk_sg_next(sg);
}
if (sg)
scatterwalk_sg_chain(head, 2, sg);
else
sg_mark_end(head);
}
static void authenc_geniv_ahash_update_done(struct crypto_async_request *areq,
int err)
{
......@@ -345,7 +331,7 @@ static int crypto_authenc_genicv(struct aead_request *req, u8 *iv,
if (ivsize) {
sg_init_table(cipher, 2);
sg_set_buf(cipher, iv, ivsize);
authenc_chain(cipher, dst, vdst == iv + ivsize);
scatterwalk_crypto_chain(cipher, dst, vdst == iv + ivsize, 2);
dst = cipher;
cryptlen += ivsize;
}
......@@ -354,7 +340,7 @@ static int crypto_authenc_genicv(struct aead_request *req, u8 *iv,
authenc_ahash_fn = crypto_authenc_ahash;
sg_init_table(asg, 2);
sg_set_page(asg, sg_page(assoc), assoc->length, assoc->offset);
authenc_chain(asg, dst, 0);
scatterwalk_crypto_chain(asg, dst, 0, 2);
dst = asg;
cryptlen += req->assoclen;
}
......@@ -499,7 +485,7 @@ static int crypto_authenc_iverify(struct aead_request *req, u8 *iv,
if (ivsize) {
sg_init_table(cipher, 2);
sg_set_buf(cipher, iv, ivsize);
authenc_chain(cipher, src, vsrc == iv + ivsize);
scatterwalk_crypto_chain(cipher, src, vsrc == iv + ivsize, 2);
src = cipher;
cryptlen += ivsize;
}
......@@ -508,7 +494,7 @@ static int crypto_authenc_iverify(struct aead_request *req, u8 *iv,
authenc_ahash_fn = crypto_authenc_ahash;
sg_init_table(asg, 2);
sg_set_page(asg, sg_page(assoc), assoc->length, assoc->offset);
authenc_chain(asg, src, 0);
scatterwalk_crypto_chain(asg, src, 0, 2);
src = asg;
cryptlen += req->assoclen;
}
......
......@@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
* Rounds 3, 6, 9, 12, and 15 use f function Type 3.
*/
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
if (!(c->rr)) {
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]);
} else {
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
}
/* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and
......@@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
} else {
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
}
t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
dst[0] = cpu_to_be32(r);
dst[1] = cpu_to_be32(l);
......
......@@ -20,7 +20,8 @@ EXPORT_SYMBOL_GPL(kcrypto_wq);
static int __init crypto_wq_init(void)
{
kcrypto_wq = create_workqueue("crypto");
kcrypto_wq = alloc_workqueue("crypto",
WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1);
if (unlikely(!kcrypto_wq))
return -ENOMEM;
return 0;
......
......@@ -48,12 +48,11 @@ static int deflate_comp_init(struct deflate_ctx *ctx)
int ret = 0;
struct z_stream_s *stream = &ctx->comp_stream;
stream->workspace = vmalloc(zlib_deflate_workspacesize());
stream->workspace = vzalloc(zlib_deflate_workspacesize());
if (!stream->workspace) {
ret = -ENOMEM;
goto out;
}
memset(stream->workspace, 0, zlib_deflate_workspacesize());
ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
-DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL,
Z_DEFAULT_STRATEGY);
......
......@@ -62,20 +62,6 @@ static void eseqiv_complete(struct crypto_async_request *base, int err)
skcipher_givcrypt_complete(req, err);
}
static void eseqiv_chain(struct scatterlist *head, struct scatterlist *sg,
int chain)
{
if (chain) {
head->length += sg->length;
sg = scatterwalk_sg_next(sg);
}
if (sg)
scatterwalk_sg_chain(head, 2, sg);
else
sg_mark_end(head);
}
static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
{
struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
......@@ -124,13 +110,13 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req)
sg_init_table(reqctx->src, 2);
sg_set_buf(reqctx->src, giv, ivsize);
eseqiv_chain(reqctx->src, osrc, vsrc == giv + ivsize);
scatterwalk_crypto_chain(reqctx->src, osrc, vsrc == giv + ivsize, 2);
dst = reqctx->src;
if (osrc != odst) {
sg_init_table(reqctx->dst, 2);
sg_set_buf(reqctx->dst, giv, ivsize);
eseqiv_chain(reqctx->dst, odst, vdst == giv + ivsize);
scatterwalk_crypto_chain(reqctx->dst, odst, vdst == giv + ivsize, 2);
dst = reqctx->dst;
}
......
......@@ -1102,21 +1102,6 @@ static int crypto_rfc4543_setauthsize(struct crypto_aead *parent,
return crypto_aead_setauthsize(ctx->child, authsize);
}
/* this is the same as crypto_authenc_chain */
static void crypto_rfc4543_chain(struct scatterlist *head,
struct scatterlist *sg, int chain)
{
if (chain) {
head->length += sg->length;
sg = scatterwalk_sg_next(sg);
}
if (sg)
scatterwalk_sg_chain(head, 2, sg);
else
sg_mark_end(head);
}
static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
int enc)
{
......@@ -1154,13 +1139,13 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
sg_init_table(payload, 2);
sg_set_buf(payload, req->iv, 8);
crypto_rfc4543_chain(payload, dst, vdst == req->iv + 8);
scatterwalk_crypto_chain(payload, dst, vdst == req->iv + 8, 2);
assoclen += 8 + req->cryptlen - (enc ? 0 : authsize);
sg_init_table(assoc, 2);
sg_set_page(assoc, sg_page(req->assoc), req->assoc->length,
req->assoc->offset);
crypto_rfc4543_chain(assoc, payload, 0);
scatterwalk_crypto_chain(assoc, payload, 0, 2);
aead_request_set_tfm(subreq, ctx->child);
aead_request_set_callback(subreq, req->base.flags, req->base.complete,
......
......@@ -455,7 +455,8 @@ static int pcrypt_init_padata(struct padata_pcrypt *pcrypt,
get_online_cpus();
pcrypt->wq = create_workqueue(name);
pcrypt->wq = alloc_workqueue(name,
WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1);
if (!pcrypt->wq)
goto err;
......
......@@ -5,7 +5,7 @@
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
* Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch>
* Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
......@@ -325,4 +325,5 @@ module_init(rmd128_mod_init);
module_exit(rmd128_mod_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-128 Message Digest");
......@@ -5,7 +5,7 @@
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
* Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch>
* Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
......@@ -369,4 +369,5 @@ module_init(rmd160_mod_init);
module_exit(rmd160_mod_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-160 Message Digest");
......@@ -5,7 +5,7 @@
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
* Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch>
* Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
......@@ -344,4 +344,5 @@ module_init(rmd256_mod_init);
module_exit(rmd256_mod_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-256 Message Digest");
......@@ -5,7 +5,7 @@
*
* Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC
*
* Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch>
* Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
......@@ -393,4 +393,5 @@ module_init(rmd320_mod_init);
module_exit(rmd320_mod_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
MODULE_DESCRIPTION("RIPEMD-320 Message Digest");
......@@ -310,7 +310,13 @@ static int shash_async_export(struct ahash_request *req, void *out)
static int shash_async_import(struct ahash_request *req, const void *in)
{
return crypto_shash_import(ahash_request_ctx(req), in);
struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
struct shash_desc *desc = ahash_request_ctx(req);
desc->tfm = *ctx;
desc->flags = req->base.flags;
return crypto_shash_import(desc, in);
}
static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm)
......
......@@ -8,6 +8,13 @@
* Copyright (c) 2002 Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) 2007 Nokia Siemens Networks
*
* Updated RFC4106 AES-GCM testing.
* Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
* Adrian Hoban <adrian.hoban@intel.com>
* Gabriele Paoloni <gabriele.paoloni@intel.com>
* Tadeusz Struk (tadeusz.struk@intel.com)
* Copyright (c) 2010, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
......@@ -980,6 +987,10 @@ static int do_test(int m)
ret += tcrypt_test("ansi_cprng");
break;
case 151:
ret += tcrypt_test("rfc4106(gcm(aes))");
break;
case 200:
test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
speed_template_16_24_32);
......
......@@ -6,6 +6,13 @@
* Copyright (c) 2007 Nokia Siemens Networks
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
*
* Updated RFC4106 AES-GCM testing.
* Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
* Adrian Hoban <adrian.hoban@intel.com>
* Gabriele Paoloni <gabriele.paoloni@intel.com>
* Tadeusz Struk (tadeusz.struk@intel.com)
* Copyright (c) 2010, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
......@@ -2242,6 +2249,23 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
.alg = "rfc4106(gcm(aes))",
.test = alg_test_aead,
.suite = {
.aead = {
.enc = {
.vecs = aes_gcm_rfc4106_enc_tv_template,
.count = AES_GCM_4106_ENC_TEST_VECTORS
},
.dec = {
.vecs = aes_gcm_rfc4106_dec_tv_template,
.count = AES_GCM_4106_DEC_TEST_VECTORS
}
}
}
}, {
.alg = "rfc4309(ccm(aes))",
.test = alg_test_aead,
.fips_allowed = 1,
......
......@@ -6,6 +6,15 @@
* Copyright (c) 2007 Nokia Siemens Networks
* Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
*
* Updated RFC4106 AES-GCM testing. Some test vectors were taken from
* http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/
* gcm/gcm-test-vectors.tar.gz
* Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
* Adrian Hoban <adrian.hoban@intel.com>
* Gabriele Paoloni <gabriele.paoloni@intel.com>
* Tadeusz Struk (tadeusz.struk@intel.com)
* Copyright (c) 2010, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
......@@ -2947,6 +2956,8 @@ static struct cipher_testvec cast6_dec_tv_template[] = {
#define AES_CTR_3686_DEC_TEST_VECTORS 6
#define AES_GCM_ENC_TEST_VECTORS 9
#define AES_GCM_DEC_TEST_VECTORS 8
#define AES_GCM_4106_ENC_TEST_VECTORS 7
#define AES_GCM_4106_DEC_TEST_VECTORS 7
#define AES_CCM_ENC_TEST_VECTORS 7
#define AES_CCM_DEC_TEST_VECTORS 7
#define AES_CCM_4309_ENC_TEST_VECTORS 7
......@@ -5829,6 +5840,356 @@ static struct aead_testvec aes_gcm_dec_tv_template[] = {
}
};
static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
{ /* Generated using Crypto++ */
.key = zeroed_string,
.klen = 20,
.iv = zeroed_string,
.input = zeroed_string,
.ilen = 16,
.assoc = zeroed_string,
.alen = 8,
.result = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92"
"\xF3\x28\xC2\xB9\x71\xB2\xFE\x78"
"\x97\xFE\x4C\x23\x37\x42\x01\xE0"
"\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B",
.rlen = 32,
},{
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
"\x00\x00\x00\x00",
.input = zeroed_string,
.ilen = 16,
.assoc = zeroed_string,
.alen = 8,
.result = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18"
"\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28"
"\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D"
"\x2F\x70\x44\x92\xF7\xF2\xE3\xEF",
.rlen = 32,
}, {
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = zeroed_string,
.input = "\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01",
.ilen = 16,
.assoc = zeroed_string,
.alen = 8,
.result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
"\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
"\x0B\x8F\x88\x69\x17\xE6\xB4\x3C"
"\xB1\x68\xFD\x14\x52\x64\x61\xB2",
.rlen = 32,
}, {
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = zeroed_string,
.input = "\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01",
.ilen = 16,
.assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
.alen = 8,
.result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
"\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
"\x90\x92\xB7\xE3\x5F\xA3\x9A\x63"
"\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5",
.rlen = 32,
}, {
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
"\x00\x00\x00\x00",
.input = "\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01",
.ilen = 16,
.assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
.alen = 8,
.result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
"\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
"\x64\x50\xF9\x32\x13\xFB\x74\x61"
"\xF4\xED\x52\xD3\xC5\x10\x55\x3C",
.rlen = 32,
}, {
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
"\x00\x00\x00\x00",
.input = "\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01",
.ilen = 64,
.assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
.alen = 8,
.result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
"\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
"\x98\x14\xA1\x42\x37\x80\xFD\x90"
"\x68\x12\x01\xA8\x91\x89\xB9\x83"
"\x5B\x11\x77\x12\x9B\xFF\x24\x89"
"\x94\x5F\x18\x12\xBA\x27\x09\x39"
"\x99\x96\x76\x42\x15\x1C\xCD\xCB"
"\xDC\xD3\xDA\x65\x73\xAF\x80\xCD"
"\xD2\xB6\xC2\x4A\x76\xC2\x92\x85"
"\xBD\xCF\x62\x98\x58\x14\xE5\xBD",
.rlen = 80,
}, {
.key = "\x00\x01\x02\x03\x04\x05\x06\x07"
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
"\x00\x00\x00\x00",
.klen = 20,
.iv = "\x00\x00\x45\x67\x89\xab\xcd\xef"
"\x00\x00\x00\x00",
.input = "\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff",
.ilen = 192,
.assoc = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa",
.alen = 12,
.result = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE"
"\xDE\x89\x3D\x42\xE7\xC9\x69\x8A"
"\x44\x6D\xC3\x88\x46\x2E\xC2\x01"
"\x5E\xF6\x0C\x39\xF0\xC4\xA5\x82"
"\xCD\xE8\x31\xCC\x0A\x4C\xE4\x44"
"\x41\xA9\x82\x6F\x22\xA1\x23\x1A"
"\xA8\xE3\x16\xFD\x31\x5C\x27\x31"
"\xF1\x7F\x01\x63\xA3\xAF\x70\xA1"
"\xCF\x07\x57\x41\x67\xD0\xC4\x42"
"\xDB\x18\xC6\x4C\x4C\xE0\x3D\x9F"
"\x05\x07\xFB\x13\x7D\x4A\xCA\x5B"
"\xF0\xBF\x64\x7E\x05\xB1\x72\xEE"
"\x7C\x3B\xD4\xCD\x14\x03\xB2\x2C"
"\xD3\xA9\xEE\xFA\x17\xFC\x9C\xDF"
"\xC7\x75\x40\xFF\xAE\xAD\x1E\x59"
"\x2F\x30\x24\xFB\xAD\x6B\x10\xFA"
"\x6C\x9F\x5B\xE7\x25\xD5\xD0\x25"
"\xAC\x4A\x4B\xDA\xFC\x7A\x85\x1B"
"\x7E\x13\x06\x82\x08\x17\xA4\x35"
"\xEC\xC5\x8D\x63\x96\x81\x0A\x8F"
"\xA3\x05\x38\x95\x20\x1A\x47\x04"
"\x6F\x6D\xDA\x8F\xEF\xC1\x76\x35"
"\x6B\xC7\x4D\x0F\x94\x12\xCA\x3E"
"\x2E\xD5\x03\x2E\x86\x7E\xAA\x3B"
"\x37\x08\x1C\xCF\xBA\x5D\x71\x46"
"\x80\x72\xB0\x4C\x82\x0D\x60\x3C",
.rlen = 208,
}
};
static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
{ /* Generated using Crypto++ */
.key = zeroed_string,
.klen = 20,
.iv = zeroed_string,
.input = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92"
"\xF3\x28\xC2\xB9\x71\xB2\xFE\x78"
"\x97\xFE\x4C\x23\x37\x42\x01\xE0"
"\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B",
.ilen = 32,
.assoc = zeroed_string,
.alen = 8,
.result = zeroed_string,
.rlen = 16,
},{
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
"\x00\x00\x00\x00",
.input = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18"
"\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28"
"\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D"
"\x2F\x70\x44\x92\xF7\xF2\xE3\xEF",
.ilen = 32,
.assoc = zeroed_string,
.alen = 8,
.result = zeroed_string,
.rlen = 16,
}, {
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = zeroed_string,
.input = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
"\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
"\x0B\x8F\x88\x69\x17\xE6\xB4\x3C"
"\xB1\x68\xFD\x14\x52\x64\x61\xB2",
.ilen = 32,
.assoc = zeroed_string,
.alen = 8,
.result = "\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01",
.rlen = 16,
}, {
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = zeroed_string,
.input = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
"\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
"\x90\x92\xB7\xE3\x5F\xA3\x9A\x63"
"\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5",
.ilen = 32,
.assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
.alen = 8,
.result = "\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01",
.rlen = 16,
}, {
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
"\x00\x00\x00\x00",
.input = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
"\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
"\x64\x50\xF9\x32\x13\xFB\x74\x61"
"\xF4\xED\x52\xD3\xC5\x10\x55\x3C",
.ilen = 32,
.assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
.alen = 8,
.result = "\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01",
.rlen = 16,
}, {
.key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
"\x6d\x6a\x8f\x94\x67\x30\x83\x08"
"\x00\x00\x00\x00",
.klen = 20,
.iv = "\x00\x00\x00\x00\x00\x00\x00\x01"
"\x00\x00\x00\x00",
.input = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
"\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
"\x98\x14\xA1\x42\x37\x80\xFD\x90"
"\x68\x12\x01\xA8\x91\x89\xB9\x83"
"\x5B\x11\x77\x12\x9B\xFF\x24\x89"
"\x94\x5F\x18\x12\xBA\x27\x09\x39"
"\x99\x96\x76\x42\x15\x1C\xCD\xCB"
"\xDC\xD3\xDA\x65\x73\xAF\x80\xCD"
"\xD2\xB6\xC2\x4A\x76\xC2\x92\x85"
"\xBD\xCF\x62\x98\x58\x14\xE5\xBD",
.ilen = 80,
.assoc = "\x01\x01\x01\x01\x01\x01\x01\x01",
.alen = 8,
.result = "\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01",
.rlen = 64,
}, {
.key = "\x00\x01\x02\x03\x04\x05\x06\x07"
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
"\x00\x00\x00\x00",
.klen = 20,
.iv = "\x00\x00\x45\x67\x89\xab\xcd\xef"
"\x00\x00\x00\x00",
.input = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE"
"\xDE\x89\x3D\x42\xE7\xC9\x69\x8A"
"\x44\x6D\xC3\x88\x46\x2E\xC2\x01"
"\x5E\xF6\x0C\x39\xF0\xC4\xA5\x82"
"\xCD\xE8\x31\xCC\x0A\x4C\xE4\x44"
"\x41\xA9\x82\x6F\x22\xA1\x23\x1A"
"\xA8\xE3\x16\xFD\x31\x5C\x27\x31"
"\xF1\x7F\x01\x63\xA3\xAF\x70\xA1"
"\xCF\x07\x57\x41\x67\xD0\xC4\x42"
"\xDB\x18\xC6\x4C\x4C\xE0\x3D\x9F"
"\x05\x07\xFB\x13\x7D\x4A\xCA\x5B"
"\xF0\xBF\x64\x7E\x05\xB1\x72\xEE"
"\x7C\x3B\xD4\xCD\x14\x03\xB2\x2C"
"\xD3\xA9\xEE\xFA\x17\xFC\x9C\xDF"
"\xC7\x75\x40\xFF\xAE\xAD\x1E\x59"
"\x2F\x30\x24\xFB\xAD\x6B\x10\xFA"
"\x6C\x9F\x5B\xE7\x25\xD5\xD0\x25"
"\xAC\x4A\x4B\xDA\xFC\x7A\x85\x1B"
"\x7E\x13\x06\x82\x08\x17\xA4\x35"
"\xEC\xC5\x8D\x63\x96\x81\x0A\x8F"
"\xA3\x05\x38\x95\x20\x1A\x47\x04"
"\x6F\x6D\xDA\x8F\xEF\xC1\x76\x35"
"\x6B\xC7\x4D\x0F\x94\x12\xCA\x3E"
"\x2E\xD5\x03\x2E\x86\x7E\xAA\x3B"
"\x37\x08\x1C\xCF\xBA\x5D\x71\x46"
"\x80\x72\xB0\x4C\x82\x0D\x60\x3C",
.ilen = 208,
.assoc = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
"\xaa\xaa\xaa\xaa",
.alen = 12,
.result = "\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff",
.rlen = 192,
}
};
static struct aead_testvec aes_ccm_enc_tv_template[] = {
{ /* From RFC 3610 */
.key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
......
......@@ -95,11 +95,10 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params,
zlib_comp_exit(ctx);
workspacesize = zlib_deflate_workspacesize();
stream->workspace = vmalloc(workspacesize);
stream->workspace = vzalloc(workspacesize);
if (!stream->workspace)
return -ENOMEM;
memset(stream->workspace, 0, workspacesize);
ret = zlib_deflateInit2(stream,
tb[ZLIB_COMP_LEVEL]
? nla_get_u32(tb[ZLIB_COMP_LEVEL])
......
......@@ -24,6 +24,7 @@
* warranty of any kind, whether express or implied.
*/
#include <crypto/padlock.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/hw_random.h>
......@@ -34,7 +35,6 @@
#include <asm/i387.h>
#define PFX KBUILD_MODNAME ": "
enum {
......@@ -81,8 +81,7 @@ static inline u32 xstore(u32 *addr, u32 edx_in)
ts_state = irq_ts_save();
asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */"
:"=m"(*addr), "=a"(eax_out)
:"D"(addr), "d"(edx_in));
: "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr));
irq_ts_restore(ts_state);
return eax_out;
......@@ -90,8 +89,10 @@ static inline u32 xstore(u32 *addr, u32 edx_in)
static int via_rng_data_present(struct hwrng *rng, int wait)
{
char buf[16 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
((aligned(STACK_ALIGN)));
u32 *via_rng_datum = (u32 *)PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
u32 bytes_out;
u32 *via_rng_datum = (u32 *)(&rng->priv);
int i;
/* We choose the recommended 1-byte-per-instruction RNG rate,
......@@ -115,6 +116,7 @@ static int via_rng_data_present(struct hwrng *rng, int wait)
break;
udelay(10);
}
rng->priv = *via_rng_datum;
return bytes_out ? 1 : 0;
}
......
......@@ -857,7 +857,7 @@ static int mv_cra_hash_init(struct crypto_tfm *tfm, const char *base_hash_name,
printk(KERN_WARNING MV_CESA
"Base driver '%s' could not be loaded!\n",
base_hash_name);
err = PTR_ERR(fallback_tfm);
err = PTR_ERR(base_hash);
goto err_bad_base;
}
}
......
......@@ -1542,7 +1542,7 @@ static int __devinit n2_register_algs(void)
return err;
}
static void __exit n2_unregister_algs(void)
static void __devexit n2_unregister_algs(void)
{
mutex_lock(&spu_lock);
if (!--algs_registered)
......
......@@ -74,11 +74,9 @@
#define FLAGS_CBC BIT(1)
#define FLAGS_GIV BIT(2)
#define FLAGS_NEW_KEY BIT(4)
#define FLAGS_NEW_IV BIT(5)
#define FLAGS_INIT BIT(6)
#define FLAGS_FAST BIT(7)
#define FLAGS_BUSY 8
#define FLAGS_INIT BIT(4)
#define FLAGS_FAST BIT(5)
#define FLAGS_BUSY BIT(6)
struct omap_aes_ctx {
struct omap_aes_dev *dd;
......@@ -98,19 +96,18 @@ struct omap_aes_reqctx {
struct omap_aes_dev {
struct list_head list;
unsigned long phys_base;
void __iomem *io_base;
void __iomem *io_base;
struct clk *iclk;
struct omap_aes_ctx *ctx;
struct device *dev;
unsigned long flags;
int err;
u32 *iv;
u32 ctrl;
spinlock_t lock;
struct crypto_queue queue;
spinlock_t lock;
struct crypto_queue queue;
struct tasklet_struct task;
struct tasklet_struct done_task;
struct tasklet_struct queue_task;
struct ablkcipher_request *req;
size_t total;
......@@ -179,9 +176,13 @@ static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit)
static int omap_aes_hw_init(struct omap_aes_dev *dd)
{
int err = 0;
/*
* clocks are enabled when request starts and disabled when finished.
* It may be long delays between requests.
* Device might go to off mode to save power.
*/
clk_enable(dd->iclk);
if (!(dd->flags & FLAGS_INIT)) {
/* is it necessary to reset before every operation? */
omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_SOFTRESET,
......@@ -193,39 +194,26 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd)
__asm__ __volatile__("nop");
__asm__ __volatile__("nop");
err = omap_aes_wait(dd, AES_REG_SYSSTATUS,
AES_REG_SYSSTATUS_RESETDONE);
if (!err)
dd->flags |= FLAGS_INIT;
}
if (omap_aes_wait(dd, AES_REG_SYSSTATUS,
AES_REG_SYSSTATUS_RESETDONE))
return -ETIMEDOUT;
return err;
}
dd->flags |= FLAGS_INIT;
dd->err = 0;
}
static void omap_aes_hw_cleanup(struct omap_aes_dev *dd)
{
clk_disable(dd->iclk);
return 0;
}
static void omap_aes_write_ctrl(struct omap_aes_dev *dd)
static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
{
unsigned int key32;
int i;
int i, err;
u32 val, mask;
val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
if (dd->flags & FLAGS_CBC)
val |= AES_REG_CTRL_CBC;
if (dd->flags & FLAGS_ENCRYPT)
val |= AES_REG_CTRL_DIRECTION;
if (dd->ctrl == val && !(dd->flags & FLAGS_NEW_IV) &&
!(dd->ctx->flags & FLAGS_NEW_KEY))
goto out;
/* only need to write control registers for new settings */
dd->ctrl = val;
err = omap_aes_hw_init(dd);
if (err)
return err;
val = 0;
if (dd->dma_lch_out >= 0)
......@@ -237,30 +225,43 @@ static void omap_aes_write_ctrl(struct omap_aes_dev *dd)
omap_aes_write_mask(dd, AES_REG_MASK, val, mask);
pr_debug("Set key\n");
key32 = dd->ctx->keylen / sizeof(u32);
/* set a key */
/* it seems a key should always be set even if it has not changed */
for (i = 0; i < key32; i++) {
omap_aes_write(dd, AES_REG_KEY(i),
__le32_to_cpu(dd->ctx->key[i]));
}
dd->ctx->flags &= ~FLAGS_NEW_KEY;
if (dd->flags & FLAGS_NEW_IV) {
pr_debug("Set IV\n");
omap_aes_write_n(dd, AES_REG_IV(0), dd->iv, 4);
dd->flags &= ~FLAGS_NEW_IV;
}
if ((dd->flags & FLAGS_CBC) && dd->req->info)
omap_aes_write_n(dd, AES_REG_IV(0), dd->req->info, 4);
val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
if (dd->flags & FLAGS_CBC)
val |= AES_REG_CTRL_CBC;
if (dd->flags & FLAGS_ENCRYPT)
val |= AES_REG_CTRL_DIRECTION;
mask = AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION |
AES_REG_CTRL_KEY_SIZE;
omap_aes_write_mask(dd, AES_REG_CTRL, dd->ctrl, mask);
omap_aes_write_mask(dd, AES_REG_CTRL, val, mask);
out:
/* start DMA or disable idle mode */
omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START,
AES_REG_MASK_START);
/* IN */
omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT,
dd->phys_base + AES_REG_DATA, 0, 4);
omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
/* OUT */
omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT,
dd->phys_base + AES_REG_DATA, 0, 4);
omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
return 0;
}
static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
......@@ -288,8 +289,16 @@ static void omap_aes_dma_callback(int lch, u16 ch_status, void *data)
{
struct omap_aes_dev *dd = data;
if (lch == dd->dma_lch_out)
tasklet_schedule(&dd->task);
if (ch_status != OMAP_DMA_BLOCK_IRQ) {
pr_err("omap-aes DMA error status: 0x%hx\n", ch_status);
dd->err = -EIO;
dd->flags &= ~FLAGS_INIT; /* request to re-initialize */
} else if (lch == dd->dma_lch_in) {
return;
}
/* dma_lch_out - completed */
tasklet_schedule(&dd->done_task);
}
static int omap_aes_dma_init(struct omap_aes_dev *dd)
......@@ -339,18 +348,6 @@ static int omap_aes_dma_init(struct omap_aes_dev *dd)
goto err_dma_out;
}
omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT,
dd->phys_base + AES_REG_DATA, 0, 4);
omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT,
dd->phys_base + AES_REG_DATA, 0, 4);
omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
return 0;
err_dma_out:
......@@ -406,6 +403,11 @@ static int sg_copy(struct scatterlist **sg, size_t *offset, void *buf,
if (!count)
return off;
/*
* buflen and total are AES_BLOCK_SIZE size aligned,
* so count should be also aligned
*/
sg_copy_buf(buf + off, *sg, *offset, count, out);
off += count;
......@@ -461,7 +463,9 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
omap_start_dma(dd->dma_lch_in);
omap_start_dma(dd->dma_lch_out);
omap_aes_write_ctrl(dd);
/* start DMA or disable idle mode */
omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START,
AES_REG_MASK_START);
return 0;
}
......@@ -488,8 +492,10 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
count = min(dd->total, sg_dma_len(dd->in_sg));
count = min(count, sg_dma_len(dd->out_sg));
if (count != dd->total)
if (count != dd->total) {
pr_err("request length != buffer length\n");
return -EINVAL;
}
pr_debug("fast\n");
......@@ -525,23 +531,25 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
dd->total -= count;
err = omap_aes_hw_init(dd);
err = omap_aes_crypt_dma(tfm, addr_in, addr_out, count);
if (err) {
dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE);
}
return err;
}
static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
{
struct omap_aes_ctx *ctx;
struct ablkcipher_request *req = dd->req;
pr_debug("err: %d\n", err);
ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(dd->req));
clk_disable(dd->iclk);
dd->flags &= ~FLAGS_BUSY;
if (!dd->total)
dd->req->base.complete(&dd->req->base, err);
req->base.complete(&req->base, err);
}
static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
......@@ -553,8 +561,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START);
omap_aes_hw_cleanup(dd);
omap_stop_dma(dd->dma_lch_in);
omap_stop_dma(dd->dma_lch_out);
......@@ -574,40 +580,39 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
}
}
if (err || !dd->total)
omap_aes_finish_req(dd, err);
return err;
}
static int omap_aes_handle_req(struct omap_aes_dev *dd)
static int omap_aes_handle_queue(struct omap_aes_dev *dd,
struct ablkcipher_request *req)
{
struct crypto_async_request *async_req, *backlog;
struct omap_aes_ctx *ctx;
struct omap_aes_reqctx *rctx;
struct ablkcipher_request *req;
unsigned long flags;
if (dd->total)
goto start;
int err, ret = 0;
spin_lock_irqsave(&dd->lock, flags);
if (req)
ret = ablkcipher_enqueue_request(&dd->queue, req);
if (dd->flags & FLAGS_BUSY) {
spin_unlock_irqrestore(&dd->lock, flags);
return ret;
}
backlog = crypto_get_backlog(&dd->queue);
async_req = crypto_dequeue_request(&dd->queue);
if (!async_req)
clear_bit(FLAGS_BUSY, &dd->flags);
if (async_req)
dd->flags |= FLAGS_BUSY;
spin_unlock_irqrestore(&dd->lock, flags);
if (!async_req)
return 0;
return ret;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
req = ablkcipher_request_cast(async_req);
pr_debug("get new req\n");
/* assign new request to device */
dd->req = req;
dd->total = req->nbytes;
......@@ -621,27 +626,22 @@ static int omap_aes_handle_req(struct omap_aes_dev *dd)
rctx->mode &= FLAGS_MODE_MASK;
dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
dd->iv = req->info;
if ((dd->flags & FLAGS_CBC) && dd->iv)
dd->flags |= FLAGS_NEW_IV;
else
dd->flags &= ~FLAGS_NEW_IV;
dd->ctx = ctx;
ctx->dd = dd;
if (dd->ctx != ctx) {
/* assign new context to device */
dd->ctx = ctx;
ctx->flags |= FLAGS_NEW_KEY;
}
if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE))
pr_err("request size is not exact amount of AES blocks\n");
err = omap_aes_write_ctrl(dd);
if (!err)
err = omap_aes_crypt_dma_start(dd);
if (err) {
/* aes_task will not finish it, so do it here */
omap_aes_finish_req(dd, err);
tasklet_schedule(&dd->queue_task);
}
start:
return omap_aes_crypt_dma_start(dd);
return ret; /* return ret, which is enqueue return value */
}
static void omap_aes_task(unsigned long data)
static void omap_aes_done_task(unsigned long data)
{
struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
int err;
......@@ -650,40 +650,50 @@ static void omap_aes_task(unsigned long data)
err = omap_aes_crypt_dma_stop(dd);
err = omap_aes_handle_req(dd);
err = dd->err ? : err;
if (dd->total && !err) {
err = omap_aes_crypt_dma_start(dd);
if (!err)
return; /* DMA started. Not fininishing. */
}
omap_aes_finish_req(dd, err);
omap_aes_handle_queue(dd, NULL);
pr_debug("exit\n");
}
static void omap_aes_queue_task(unsigned long data)
{
struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
omap_aes_handle_queue(dd, NULL);
}
static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
{
struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
crypto_ablkcipher_reqtfm(req));
struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
struct omap_aes_dev *dd;
unsigned long flags;
int err;
pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
!!(mode & FLAGS_ENCRYPT),
!!(mode & FLAGS_CBC));
if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
pr_err("request size is not exact amount of AES blocks\n");
return -EINVAL;
}
dd = omap_aes_find_dev(ctx);
if (!dd)
return -ENODEV;
rctx->mode = mode;
spin_lock_irqsave(&dd->lock, flags);
err = ablkcipher_enqueue_request(&dd->queue, req);
spin_unlock_irqrestore(&dd->lock, flags);
if (!test_and_set_bit(FLAGS_BUSY, &dd->flags))
omap_aes_handle_req(dd);
pr_debug("exit\n");
return err;
return omap_aes_handle_queue(dd, req);
}
/* ********************** ALG API ************************************ */
......@@ -701,7 +711,6 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
memcpy(ctx->key, key, keylen);
ctx->keylen = keylen;
ctx->flags |= FLAGS_NEW_KEY;
return 0;
}
......@@ -750,7 +759,7 @@ static struct crypto_alg algs[] = {
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_aes_ctx),
.cra_alignmask = 0,
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = omap_aes_cra_init,
......@@ -770,7 +779,7 @@ static struct crypto_alg algs[] = {
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_aes_ctx),
.cra_alignmask = 0,
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = omap_aes_cra_init,
......@@ -849,7 +858,8 @@ static int omap_aes_probe(struct platform_device *pdev)
(reg & AES_REG_REV_MAJOR) >> 4, reg & AES_REG_REV_MINOR);
clk_disable(dd->iclk);
tasklet_init(&dd->task, omap_aes_task, (unsigned long)dd);
tasklet_init(&dd->done_task, omap_aes_done_task, (unsigned long)dd);
tasklet_init(&dd->queue_task, omap_aes_queue_task, (unsigned long)dd);
err = omap_aes_dma_init(dd);
if (err)
......@@ -876,7 +886,8 @@ static int omap_aes_probe(struct platform_device *pdev)
crypto_unregister_alg(&algs[j]);
omap_aes_dma_cleanup(dd);
err_dma:
tasklet_kill(&dd->task);
tasklet_kill(&dd->done_task);
tasklet_kill(&dd->queue_task);
iounmap(dd->io_base);
err_io:
clk_put(dd->iclk);
......@@ -903,7 +914,8 @@ static int omap_aes_remove(struct platform_device *pdev)
for (i = 0; i < ARRAY_SIZE(algs); i++)
crypto_unregister_alg(&algs[i]);
tasklet_kill(&dd->task);
tasklet_kill(&dd->done_task);
tasklet_kill(&dd->queue_task);
omap_aes_dma_cleanup(dd);
iounmap(dd->io_base);
clk_put(dd->iclk);
......
......@@ -72,10 +72,9 @@
#define DEFAULT_TIMEOUT_INTERVAL HZ
#define FLAGS_FIRST 0x0001
#define FLAGS_FINUP 0x0002
#define FLAGS_FINAL 0x0004
#define FLAGS_FAST 0x0008
#define FLAGS_SG 0x0008
#define FLAGS_SHA1 0x0010
#define FLAGS_DMA_ACTIVE 0x0020
#define FLAGS_OUTPUT_READY 0x0040
......@@ -83,13 +82,17 @@
#define FLAGS_INIT 0x0100
#define FLAGS_CPU 0x0200
#define FLAGS_HMAC 0x0400
/* 3rd byte */
#define FLAGS_BUSY 16
#define FLAGS_ERROR 0x0800
#define FLAGS_BUSY 0x1000
#define OP_UPDATE 1
#define OP_FINAL 2
#define OMAP_ALIGN_MASK (sizeof(u32)-1)
#define OMAP_ALIGNED __attribute__((aligned(sizeof(u32))))
#define BUFLEN PAGE_SIZE
struct omap_sham_dev;
struct omap_sham_reqctx {
......@@ -97,8 +100,8 @@ struct omap_sham_reqctx {
unsigned long flags;
unsigned long op;
u8 digest[SHA1_DIGEST_SIZE] OMAP_ALIGNED;
size_t digcnt;
u8 *buffer;
size_t bufcnt;
size_t buflen;
dma_addr_t dma_addr;
......@@ -107,6 +110,8 @@ struct omap_sham_reqctx {
struct scatterlist *sg;
unsigned int offset; /* offset in current sg */
unsigned int total; /* total request */
u8 buffer[0] OMAP_ALIGNED;
};
struct omap_sham_hmac_ctx {
......@@ -136,6 +141,7 @@ struct omap_sham_dev {
int irq;
struct clk *iclk;
spinlock_t lock;
int err;
int dma;
int dma_lch;
struct tasklet_struct done_task;
......@@ -194,53 +200,68 @@ static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit)
static void omap_sham_copy_hash(struct ahash_request *req, int out)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
u32 *hash = (u32 *)ctx->digest;
int i;
/* MD5 is almost unused. So copy sha1 size to reduce code */
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) {
if (out)
hash[i] = omap_sham_read(ctx->dd,
SHA_REG_DIGEST(i));
else
omap_sham_write(ctx->dd,
SHA_REG_DIGEST(i), hash[i]);
}
}
static void omap_sham_copy_ready_hash(struct ahash_request *req)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
u32 *in = (u32 *)ctx->digest;
u32 *hash = (u32 *)req->result;
int i;
if (!hash)
return;
if (likely(ctx->flags & FLAGS_SHA1)) {
/* SHA1 results are in big endian */
for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
if (out)
hash[i] = be32_to_cpu(omap_sham_read(ctx->dd,
SHA_REG_DIGEST(i)));
else
omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
cpu_to_be32(hash[i]));
hash[i] = be32_to_cpu(in[i]);
} else {
/* MD5 results are in little endian */
for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++)
if (out)
hash[i] = le32_to_cpu(omap_sham_read(ctx->dd,
SHA_REG_DIGEST(i)));
else
omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
cpu_to_le32(hash[i]));
hash[i] = le32_to_cpu(in[i]);
}
}
static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
int final, int dma)
static int omap_sham_hw_init(struct omap_sham_dev *dd)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
u32 val = length << 5, mask;
clk_enable(dd->iclk);
if (unlikely(!ctx->digcnt)) {
if (!(dd->flags & FLAGS_INIT)) {
omap_sham_write_mask(dd, SHA_REG_MASK,
SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET);
clk_enable(dd->iclk);
if (omap_sham_wait(dd, SHA_REG_SYSSTATUS,
SHA_REG_SYSSTATUS_RESETDONE))
return -ETIMEDOUT;
if (!(dd->flags & FLAGS_INIT)) {
omap_sham_write_mask(dd, SHA_REG_MASK,
SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET);
dd->flags |= FLAGS_INIT;
dd->err = 0;
}
if (omap_sham_wait(dd, SHA_REG_SYSSTATUS,
SHA_REG_SYSSTATUS_RESETDONE))
return -ETIMEDOUT;
return 0;
}
dd->flags |= FLAGS_INIT;
}
} else {
static void omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
int final, int dma)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
u32 val = length << 5, mask;
if (likely(ctx->digcnt))
omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt);
}
omap_sham_write_mask(dd, SHA_REG_MASK,
SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0),
......@@ -260,29 +281,26 @@ static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
SHA_REG_CTRL_ALGO | SHA_REG_CTRL_LENGTH;
omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask);
return 0;
}
static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
size_t length, int final)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
int err, count, len32;
int count, len32;
const u32 *buffer = (const u32 *)buf;
dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
ctx->digcnt, length, final);
err = omap_sham_write_ctrl(dd, length, final, 0);
if (err)
return err;
omap_sham_write_ctrl(dd, length, final, 0);
/* should be non-zero before next lines to disable clocks later */
ctx->digcnt += length;
if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY))
return -ETIMEDOUT;
ctx->digcnt += length;
if (final)
ctx->flags |= FLAGS_FINAL; /* catch last interrupt */
......@@ -298,16 +316,11 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
size_t length, int final)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
int err, len32;
int len32;
dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
ctx->digcnt, length, final);
/* flush cache entries related to our page */
if (dma_addr == ctx->dma_addr)
dma_sync_single_for_device(dd->dev, dma_addr, length,
DMA_TO_DEVICE);
len32 = DIV_ROUND_UP(length, sizeof(u32));
omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32,
......@@ -317,9 +330,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC,
dma_addr, 0, 0);
err = omap_sham_write_ctrl(dd, length, final, 1);
if (err)
return err;
omap_sham_write_ctrl(dd, length, final, 1);
ctx->digcnt += length;
......@@ -371,15 +382,29 @@ static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
return 0;
}
static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd,
struct omap_sham_reqctx *ctx,
size_t length, int final)
{
ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
DMA_TO_DEVICE);
if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
return -EINVAL;
}
ctx->flags &= ~FLAGS_SG;
/* next call does not fail... so no unmap in the case of error */
return omap_sham_xmit_dma(dd, ctx->dma_addr, length, final);
}
static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
unsigned int final;
size_t count;
if (!ctx->total)
return 0;
omap_sham_append_sg(ctx);
final = (ctx->flags & FLAGS_FINUP) && !ctx->total;
......@@ -390,30 +415,68 @@ static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
count = ctx->bufcnt;
ctx->bufcnt = 0;
return omap_sham_xmit_dma(dd, ctx->dma_addr, count, final);
return omap_sham_xmit_dma_map(dd, ctx, count, final);
}
return 0;
}
static int omap_sham_update_dma_fast(struct omap_sham_dev *dd)
/* Start address alignment */
#define SG_AA(sg) (IS_ALIGNED(sg->offset, sizeof(u32)))
/* SHA1 block size alignment */
#define SG_SA(sg) (IS_ALIGNED(sg->length, SHA1_MD5_BLOCK_SIZE))
static int omap_sham_update_dma_start(struct omap_sham_dev *dd)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
unsigned int length;
unsigned int length, final, tail;
struct scatterlist *sg;
ctx->flags |= FLAGS_FAST;
if (!ctx->total)
return 0;
if (ctx->bufcnt || ctx->offset)
return omap_sham_update_dma_slow(dd);
dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
ctx->digcnt, ctx->bufcnt, ctx->total);
sg = ctx->sg;
length = min(ctx->total, sg_dma_len(ctx->sg));
ctx->total = length;
if (!SG_AA(sg))
return omap_sham_update_dma_slow(dd);
if (!sg_is_last(sg) && !SG_SA(sg))
/* size is not SHA1_BLOCK_SIZE aligned */
return omap_sham_update_dma_slow(dd);
length = min(ctx->total, sg->length);
if (sg_is_last(sg)) {
if (!(ctx->flags & FLAGS_FINUP)) {
/* not last sg must be SHA1_MD5_BLOCK_SIZE aligned */
tail = length & (SHA1_MD5_BLOCK_SIZE - 1);
/* without finup() we need one block to close hash */
if (!tail)
tail = SHA1_MD5_BLOCK_SIZE;
length -= tail;
}
}
if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
dev_err(dd->dev, "dma_map_sg error\n");
return -EINVAL;
}
ctx->flags |= FLAGS_SG;
ctx->total -= length;
ctx->offset = length; /* offset where to start slow */
return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1);
final = (ctx->flags & FLAGS_FINUP) && !ctx->total;
/* next call does not fail... so no unmap in the case of error */
return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final);
}
static int omap_sham_update_cpu(struct omap_sham_dev *dd)
......@@ -433,8 +496,17 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
omap_stop_dma(dd->dma_lch);
if (ctx->flags & FLAGS_FAST)
if (ctx->flags & FLAGS_SG) {
dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
if (ctx->sg->length == ctx->offset) {
ctx->sg = sg_next(ctx->sg);
if (ctx->sg)
ctx->offset = 0;
}
} else {
dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
DMA_TO_DEVICE);
}
return 0;
}
......@@ -454,14 +526,7 @@ static void omap_sham_cleanup(struct ahash_request *req)
spin_unlock_irqrestore(&dd->lock, flags);
if (ctx->digcnt)
clk_disable(dd->iclk);
if (ctx->dma_addr)
dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
DMA_TO_DEVICE);
if (ctx->buffer)
free_page((unsigned long)ctx->buffer);
omap_sham_copy_ready_hash(req);
dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt);
}
......@@ -489,8 +554,6 @@ static int omap_sham_init(struct ahash_request *req)
ctx->flags = 0;
ctx->flags |= FLAGS_FIRST;
dev_dbg(dd->dev, "init: digest size: %d\n",
crypto_ahash_digestsize(tfm));
......@@ -499,21 +562,7 @@ static int omap_sham_init(struct ahash_request *req)
ctx->bufcnt = 0;
ctx->digcnt = 0;
ctx->buflen = PAGE_SIZE;
ctx->buffer = (void *)__get_free_page(
(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC);
if (!ctx->buffer)
return -ENOMEM;
ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
DMA_TO_DEVICE);
if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
free_page((unsigned long)ctx->buffer);
return -EINVAL;
}
ctx->buflen = BUFLEN;
if (tctx->flags & FLAGS_HMAC) {
struct omap_sham_hmac_ctx *bctx = tctx->base;
......@@ -538,10 +587,8 @@ static int omap_sham_update_req(struct omap_sham_dev *dd)
if (ctx->flags & FLAGS_CPU)
err = omap_sham_update_cpu(dd);
else if (ctx->flags & FLAGS_FAST)
err = omap_sham_update_dma_fast(dd);
else
err = omap_sham_update_dma_slow(dd);
err = omap_sham_update_dma_start(dd);
/* wait for dma completion before can take more data */
dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
......@@ -560,15 +607,12 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
use_dma = 0;
if (use_dma)
err = omap_sham_xmit_dma(dd, ctx->dma_addr, ctx->bufcnt, 1);
err = omap_sham_xmit_dma_map(dd, ctx, ctx->bufcnt, 1);
else
err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1);
ctx->bufcnt = 0;
if (err != -EINPROGRESS)
omap_sham_cleanup(req);
dev_dbg(dd->dev, "final_req: err: %d\n", err);
return err;
......@@ -576,6 +620,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
static int omap_sham_finish_req_hmac(struct ahash_request *req)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
struct omap_sham_hmac_ctx *bctx = tctx->base;
int bs = crypto_shash_blocksize(bctx->shash);
......@@ -590,48 +635,56 @@ static int omap_sham_finish_req_hmac(struct ahash_request *req)
return crypto_shash_init(&desc.shash) ?:
crypto_shash_update(&desc.shash, bctx->opad, bs) ?:
crypto_shash_finup(&desc.shash, req->result, ds, req->result);
crypto_shash_finup(&desc.shash, ctx->digest, ds, ctx->digest);
}
static void omap_sham_finish_req(struct ahash_request *req, int err)
{
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
struct omap_sham_dev *dd = ctx->dd;
if (!err) {
omap_sham_copy_hash(ctx->dd->req, 1);
if (ctx->flags & FLAGS_HMAC)
err = omap_sham_finish_req_hmac(req);
} else {
ctx->flags |= FLAGS_ERROR;
}
if (ctx->flags & FLAGS_FINAL)
if ((ctx->flags & FLAGS_FINAL) || err)
omap_sham_cleanup(req);
clear_bit(FLAGS_BUSY, &ctx->dd->flags);
clk_disable(dd->iclk);
dd->flags &= ~FLAGS_BUSY;
if (req->base.complete)
req->base.complete(&req->base, err);
}
static int omap_sham_handle_queue(struct omap_sham_dev *dd)
static int omap_sham_handle_queue(struct omap_sham_dev *dd,
struct ahash_request *req)
{
struct crypto_async_request *async_req, *backlog;
struct omap_sham_reqctx *ctx;
struct ahash_request *req, *prev_req;
struct ahash_request *prev_req;
unsigned long flags;
int err = 0;
if (test_and_set_bit(FLAGS_BUSY, &dd->flags))
return 0;
int err = 0, ret = 0;
spin_lock_irqsave(&dd->lock, flags);
if (req)
ret = ahash_enqueue_request(&dd->queue, req);
if (dd->flags & FLAGS_BUSY) {
spin_unlock_irqrestore(&dd->lock, flags);
return ret;
}
backlog = crypto_get_backlog(&dd->queue);
async_req = crypto_dequeue_request(&dd->queue);
if (!async_req)
clear_bit(FLAGS_BUSY, &dd->flags);
if (async_req)
dd->flags |= FLAGS_BUSY;
spin_unlock_irqrestore(&dd->lock, flags);
if (!async_req)
return 0;
return ret;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
......@@ -646,7 +699,22 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd)
dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
ctx->op, req->nbytes);
if (req != prev_req && ctx->digcnt)
err = omap_sham_hw_init(dd);
if (err)
goto err1;
omap_set_dma_dest_params(dd->dma_lch, 0,
OMAP_DMA_AMODE_CONSTANT,
dd->phys_base + SHA_REG_DIN(0), 0, 16);
omap_set_dma_dest_burst_mode(dd->dma_lch,
OMAP_DMA_DATA_BURST_16);
omap_set_dma_src_burst_mode(dd->dma_lch,
OMAP_DMA_DATA_BURST_4);
if (ctx->digcnt)
/* request has changed - restore hash */
omap_sham_copy_hash(req, 0);
......@@ -658,7 +726,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd)
} else if (ctx->op == OP_FINAL) {
err = omap_sham_final_req(dd);
}
err1:
if (err != -EINPROGRESS) {
/* done_task will not finish it, so do it here */
omap_sham_finish_req(req, err);
......@@ -667,7 +735,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd)
dev_dbg(dd->dev, "exit, err: %d\n", err);
return err;
return ret;
}
static int omap_sham_enqueue(struct ahash_request *req, unsigned int op)
......@@ -675,18 +743,10 @@ static int omap_sham_enqueue(struct ahash_request *req, unsigned int op)
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
struct omap_sham_dev *dd = tctx->dd;
unsigned long flags;
int err;
ctx->op = op;
spin_lock_irqsave(&dd->lock, flags);
err = ahash_enqueue_request(&dd->queue, req);
spin_unlock_irqrestore(&dd->lock, flags);
omap_sham_handle_queue(dd);
return err;
return omap_sham_handle_queue(dd, req);
}
static int omap_sham_update(struct ahash_request *req)
......@@ -709,21 +769,13 @@ static int omap_sham_update(struct ahash_request *req)
*/
omap_sham_append_sg(ctx);
return 0;
} else if (ctx->bufcnt + ctx->total <= 64) {
} else if (ctx->bufcnt + ctx->total <= SHA1_MD5_BLOCK_SIZE) {
/*
* faster to use CPU for short transfers
*/
ctx->flags |= FLAGS_CPU;
} else if (!ctx->bufcnt && sg_is_last(ctx->sg)) {
/* may be can use faster functions */
int aligned = IS_ALIGNED((u32)ctx->sg->offset,
sizeof(u32));
if (aligned && (ctx->flags & FLAGS_FIRST))
/* digest: first and final */
ctx->flags |= FLAGS_FAST;
ctx->flags &= ~FLAGS_FIRST;
}
} else if (ctx->bufcnt + ctx->total <= ctx->buflen) {
/* if not finaup -> not fast */
} else if (ctx->bufcnt + ctx->total < ctx->buflen) {
omap_sham_append_sg(ctx);
return 0;
}
......@@ -761,12 +813,14 @@ static int omap_sham_final(struct ahash_request *req)
ctx->flags |= FLAGS_FINUP;
/* OMAP HW accel works only with buffers >= 9 */
/* HMAC is always >= 9 because of ipad */
if ((ctx->digcnt + ctx->bufcnt) < 9)
err = omap_sham_final_shash(req);
else if (ctx->bufcnt)
return omap_sham_enqueue(req, OP_FINAL);
if (!(ctx->flags & FLAGS_ERROR)) {
/* OMAP HW accel works only with buffers >= 9 */
/* HMAC is always >= 9 because of ipad */
if ((ctx->digcnt + ctx->bufcnt) < 9)
err = omap_sham_final_shash(req);
else if (ctx->bufcnt)
return omap_sham_enqueue(req, OP_FINAL);
}
omap_sham_cleanup(req);
......@@ -836,6 +890,8 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
const char *alg_name = crypto_tfm_alg_name(tfm);
pr_info("enter\n");
/* Allocate a fallback and abort if it failed. */
tctx->fallback = crypto_alloc_shash(alg_name, 0,
CRYPTO_ALG_NEED_FALLBACK);
......@@ -846,7 +902,7 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
}
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct omap_sham_reqctx));
sizeof(struct omap_sham_reqctx) + BUFLEN);
if (alg_base) {
struct omap_sham_hmac_ctx *bctx = tctx->base;
......@@ -932,7 +988,7 @@ static struct ahash_alg algs[] = {
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_sham_ctx),
.cra_alignmask = 0,
.cra_alignmask = OMAP_ALIGN_MASK,
.cra_module = THIS_MODULE,
.cra_init = omap_sham_cra_init,
.cra_exit = omap_sham_cra_exit,
......@@ -956,7 +1012,7 @@ static struct ahash_alg algs[] = {
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_sham_ctx) +
sizeof(struct omap_sham_hmac_ctx),
.cra_alignmask = 0,
.cra_alignmask = OMAP_ALIGN_MASK,
.cra_module = THIS_MODULE,
.cra_init = omap_sham_cra_sha1_init,
.cra_exit = omap_sham_cra_exit,
......@@ -980,7 +1036,7 @@ static struct ahash_alg algs[] = {
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct omap_sham_ctx) +
sizeof(struct omap_sham_hmac_ctx),
.cra_alignmask = 0,
.cra_alignmask = OMAP_ALIGN_MASK,
.cra_module = THIS_MODULE,
.cra_init = omap_sham_cra_md5_init,
.cra_exit = omap_sham_cra_exit,
......@@ -993,7 +1049,7 @@ static void omap_sham_done_task(unsigned long data)
struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
struct ahash_request *req = dd->req;
struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
int ready = 1;
int ready = 0, err = 0;
if (ctx->flags & FLAGS_OUTPUT_READY) {
ctx->flags &= ~FLAGS_OUTPUT_READY;
......@@ -1003,15 +1059,18 @@ static void omap_sham_done_task(unsigned long data)
if (dd->flags & FLAGS_DMA_ACTIVE) {
dd->flags &= ~FLAGS_DMA_ACTIVE;
omap_sham_update_dma_stop(dd);
omap_sham_update_dma_slow(dd);
if (!dd->err)
err = omap_sham_update_dma_start(dd);
}
if (ready && !(dd->flags & FLAGS_DMA_ACTIVE)) {
dev_dbg(dd->dev, "update done\n");
err = dd->err ? : err;
if (err != -EINPROGRESS && (ready || err)) {
dev_dbg(dd->dev, "update done: err: %d\n", err);
/* finish curent request */
omap_sham_finish_req(req, 0);
omap_sham_finish_req(req, err);
/* start new request */
omap_sham_handle_queue(dd);
omap_sham_handle_queue(dd, NULL);
}
}
......@@ -1019,7 +1078,7 @@ static void omap_sham_queue_task(unsigned long data)
{
struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
omap_sham_handle_queue(dd);
omap_sham_handle_queue(dd, NULL);
}
static irqreturn_t omap_sham_irq(int irq, void *dev_id)
......@@ -1041,6 +1100,7 @@ static irqreturn_t omap_sham_irq(int irq, void *dev_id)
omap_sham_read(dd, SHA_REG_CTRL);
ctx->flags |= FLAGS_OUTPUT_READY;
dd->err = 0;
tasklet_schedule(&dd->done_task);
return IRQ_HANDLED;
......@@ -1050,8 +1110,13 @@ static void omap_sham_dma_callback(int lch, u16 ch_status, void *data)
{
struct omap_sham_dev *dd = data;
if (likely(lch == dd->dma_lch))
tasklet_schedule(&dd->done_task);
if (ch_status != OMAP_DMA_BLOCK_IRQ) {
pr_err("omap-sham DMA error status: 0x%hx\n", ch_status);
dd->err = -EIO;
dd->flags &= ~FLAGS_INIT; /* request to re-initialize */
}
tasklet_schedule(&dd->done_task);
}
static int omap_sham_dma_init(struct omap_sham_dev *dd)
......@@ -1066,15 +1131,6 @@ static int omap_sham_dma_init(struct omap_sham_dev *dd)
dev_err(dd->dev, "Unable to request DMA channel\n");
return err;
}
omap_set_dma_dest_params(dd->dma_lch, 0,
OMAP_DMA_AMODE_CONSTANT,
dd->phys_base + SHA_REG_DIN(0), 0, 16);
omap_set_dma_dest_burst_mode(dd->dma_lch,
OMAP_DMA_DATA_BURST_16);
omap_set_dma_src_burst_mode(dd->dma_lch,
OMAP_DMA_DATA_BURST_4);
return 0;
}
......
......@@ -9,6 +9,7 @@
#include <crypto/algapi.h>
#include <crypto/aes.h>
#include <crypto/padlock.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
......@@ -21,7 +22,6 @@
#include <asm/byteorder.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include "padlock.h"
/*
* Number of data blocks actually fetched for each xcrypt insn.
......
......@@ -13,6 +13,7 @@
*/
#include <crypto/internal/hash.h>
#include <crypto/padlock.h>
#include <crypto/sha.h>
#include <linux/err.h>
#include <linux/module.h>
......@@ -22,13 +23,6 @@
#include <linux/kernel.h>
#include <linux/scatterlist.h>
#include <asm/i387.h>
#include "padlock.h"
#ifdef CONFIG_64BIT
#define STACK_ALIGN 16
#else
#define STACK_ALIGN 4
#endif
struct padlock_sha_desc {
struct shash_desc fallback;
......
/*
* if_alg: User-space algorithm interface
*
* Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#ifndef _CRYPTO_IF_ALG_H
#define _CRYPTO_IF_ALG_H
#include <linux/compiler.h>
#include <linux/completion.h>
#include <linux/if_alg.h>
#include <linux/types.h>
#include <net/sock.h>
#define ALG_MAX_PAGES 16
struct crypto_async_request;
struct alg_sock {
/* struct sock must be the first member of struct alg_sock */
struct sock sk;
struct sock *parent;
const struct af_alg_type *type;
void *private;
};
struct af_alg_completion {
struct completion completion;
int err;
};
struct af_alg_control {
struct af_alg_iv *iv;
int op;
};
struct af_alg_type {
void *(*bind)(const char *name, u32 type, u32 mask);
void (*release)(void *private);
int (*setkey)(void *private, const u8 *key, unsigned int keylen);
int (*accept)(void *private, struct sock *sk);
struct proto_ops *ops;
struct module *owner;
char name[14];
};
struct af_alg_sgl {
struct scatterlist sg[ALG_MAX_PAGES];
struct page *pages[ALG_MAX_PAGES];
};
int af_alg_register_type(const struct af_alg_type *type);
int af_alg_unregister_type(const struct af_alg_type *type);
int af_alg_release(struct socket *sock);
int af_alg_accept(struct sock *sk, struct socket *newsock);
int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len,
int write);
void af_alg_free_sg(struct af_alg_sgl *sgl);
int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con);
int af_alg_wait_for_completion(int err, struct af_alg_completion *completion);
void af_alg_complete(struct crypto_async_request *req, int err);
static inline struct alg_sock *alg_sk(struct sock *sk)
{
return (struct alg_sock *)sk;
}
static inline void af_alg_release_parent(struct sock *sk)
{
sock_put(alg_sk(sk)->parent);
}
static inline void af_alg_init_completion(struct af_alg_completion *completion)
{
init_completion(&completion->completion);
}
#endif /* _CRYPTO_IF_ALG_H */
......@@ -15,9 +15,15 @@
#define PADLOCK_ALIGNMENT 16
#define PFX "padlock: "
#define PFX KBUILD_MODNAME ": "
#define PADLOCK_CRA_PRIORITY 300
#define PADLOCK_COMPOSITE_PRIORITY 400
#ifdef CONFIG_64BIT
#define STACK_ALIGN 16
#else
#define STACK_ALIGN 4
#endif
#endif /* _CRYPTO_PADLOCK_H */
......@@ -68,6 +68,21 @@ static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg)
return (++sg)->length ? sg : (void *)sg_page(sg);
}
static inline void scatterwalk_crypto_chain(struct scatterlist *head,
struct scatterlist *sg,
int chain, int num)
{
if (chain) {
head->length += sg->length;
sg = scatterwalk_sg_next(sg);
}
if (sg)
scatterwalk_sg_chain(head, num, sg);
else
sg_mark_end(head);
}
static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in,
struct scatter_walk *walk_out)
{
......
......@@ -158,6 +158,7 @@ header-y += icmpv6.h
header-y += if.h
header-y += if_addr.h
header-y += if_addrlabel.h
header-y += if_alg.h
header-y += if_arcnet.h
header-y += if_arp.h
header-y += if_bonding.h
......
/*
* if_alg: User-space algorithm interface
*
* Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#ifndef _LINUX_IF_ALG_H
#define _LINUX_IF_ALG_H
#include <linux/types.h>
struct sockaddr_alg {
__u16 salg_family;
__u8 salg_type[14];
__u32 salg_feat;
__u32 salg_mask;
__u8 salg_name[64];
};
struct af_alg_iv {
__u32 ivlen;
__u8 iv[0];
};
/* Socket options */
#define ALG_SET_KEY 1
#define ALG_SET_IV 2
#define ALG_SET_OP 3
/* Operations */
#define ALG_OP_DECRYPT 0
#define ALG_OP_ENCRYPT 1
#endif /* _LINUX_IF_ALG_H */
......@@ -191,7 +191,8 @@ struct ucred {
#define AF_PHONET 35 /* Phonet sockets */
#define AF_IEEE802154 36 /* IEEE802154 sockets */
#define AF_CAIF 37 /* CAIF sockets */
#define AF_MAX 38 /* For now.. */
#define AF_ALG 38 /* Algorithm sockets */
#define AF_MAX 39 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
......@@ -232,6 +233,7 @@ struct ucred {
#define PF_PHONET AF_PHONET
#define PF_IEEE802154 AF_IEEE802154
#define PF_CAIF AF_CAIF
#define PF_ALG AF_ALG
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
......@@ -305,6 +307,7 @@ struct ucred {
#define SOL_RDS 276
#define SOL_IUCV 277
#define SOL_CAIF 278
#define SOL_ALG 279
/* IPX options */
#define IPX_TYPE 1
......
......@@ -157,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
"sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
......@@ -173,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
"slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
......@@ -189,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-27" , "clock-28" , "clock-AF_CAN" ,
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
"clock-AF_MAX"
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment