Skip to content

Commit

Permalink
aes: support aes_armv8 on Rust 1.61+ using asm! (#365)
Browse files Browse the repository at this point in the history
Adds "polyfills" for the unstable ARMv8 AES intrinsics using the `asm!`
macro which was stabilized in Rust 1.59. However note we also need
`target_feature` stabilizations for `aes` and `neon` which occurred in
Rust 1.61.

Based on benchmarks this has no effect on performance, although it was
necessary to place AESE/AESMC and AESD/AESIMC into a single `asm!` block
in order to ensure that instructions fuse properly, as they did when
using the proper intrinsics.
  • Loading branch information
tarcieri committed Jun 17, 2023
1 parent e8970be commit 8d03900
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 41 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/aes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -209,15 +209,15 @@ jobs:
cross test --package aes --target ${{ matrix.target }}
cross test --package aes --target ${{ matrix.target }} --features hazmat
# ARMv8 cross-compiled tests for AES intrinsics (nightly-only)
# ARMv8 cross-compiled tests for AES intrinsics
armv8:
env:
RUSTFLAGS: "-Dwarnings --cfg aes_armv8"
strategy:
matrix:
include:
- target: aarch64-unknown-linux-gnu
rust: nightly
rust: 1.61.0 # MSRV for `aes_armv8`
runs-on: ubuntu-latest
# Cross mounts only current package, i.e. by default it ignores workspace's Cargo.toml
defaults:
Expand Down
3 changes: 2 additions & 1 deletion aes/src/armv8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub(crate) mod hazmat;

mod encdec;
mod expand;
mod intrinsics;
#[cfg(test)]
mod test_expand;

Expand Down Expand Up @@ -222,7 +223,7 @@ macro_rules! define_aes_impl {
impl From<&$name_enc> for $name_dec {
fn from(enc: &$name_enc) -> $name_dec {
let mut round_keys = enc.round_keys;
inv_expanded_keys(&mut round_keys);
unsafe { inv_expanded_keys(&mut round_keys) };
Self { round_keys }
}
}
Expand Down
34 changes: 14 additions & 20 deletions aes/src/armv8/encdec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ use crate::{Block, Block8};
use cipher::inout::InOut;
use core::arch::aarch64::*;

// Stable "polyfills" for unstable core::arch::aarch64 intrinsics
// TODO(tarcieri): remove when these intrinsics have been stabilized
use super::intrinsics::{
vaesdq_u8, vaesdq_u8_and_vaesimcq_u8, vaeseq_u8, vaeseq_u8_and_vaesmcq_u8,
};

/// Perform AES encryption using the given expanded keys.
#[target_feature(enable = "aes")]
#[target_feature(enable = "neon")]
Expand All @@ -19,11 +25,8 @@ pub(super) unsafe fn encrypt1<const N: usize>(
let mut state = vld1q_u8(in_ptr as *const u8);

for k in expanded_keys.iter().take(rounds - 1) {
// AES single round encryption
state = vaeseq_u8(state, *k);

// AES mix columns
state = vaesmcq_u8(state);
// AES single round encryption and mix columns
state = vaeseq_u8_and_vaesmcq_u8(state, *k);
}

// AES single round encryption
Expand Down Expand Up @@ -62,11 +65,8 @@ pub(super) unsafe fn encrypt8<const N: usize>(

for k in expanded_keys.iter().take(rounds - 1) {
for i in 0..8 {
// AES single round encryption
state[i] = vaeseq_u8(state[i], *k);

// AES mix columns
state[i] = vaesmcq_u8(state[i]);
// AES single round encryption and mix columns
state[i] = vaeseq_u8_and_vaesmcq_u8(state[i], *k);
}
}

Expand Down Expand Up @@ -95,11 +95,8 @@ pub(super) unsafe fn decrypt1<const N: usize>(
let mut state = vld1q_u8(in_ptr as *const u8);

for k in expanded_keys.iter().take(rounds - 1) {
// AES single round decryption
state = vaesdq_u8(state, *k);

// AES inverse mix columns
state = vaesimcq_u8(state);
// AES single round decryption and inverse mix columns
state = vaesdq_u8_and_vaesimcq_u8(state, *k);
}

// AES single round decryption
Expand Down Expand Up @@ -138,11 +135,8 @@ pub(super) unsafe fn decrypt8<const N: usize>(

for k in expanded_keys.iter().take(rounds - 1) {
for i in 0..8 {
// AES single round decryption
state[i] = vaesdq_u8(state[i], *k);

// AES inverse mix columns
state[i] = vaesimcq_u8(state[i]);
// AES single round decryption and inverse mix columns
state[i] = vaesdq_u8_and_vaesimcq_u8(state[i], *k);
}
}

Expand Down
28 changes: 15 additions & 13 deletions aes/src/armv8/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

use core::{arch::aarch64::*, mem, slice};

// Stable "polyfills" for unstable core::arch::aarch64 intrinsics
// TODO(tarcieri): remove when these intrinsics have been stabilized
use super::intrinsics::{vaeseq_u8, vaesimcq_u8};

/// There are 4 AES words in a block.
const BLOCK_WORDS: usize = 4;

Expand Down Expand Up @@ -37,9 +41,9 @@ pub(super) fn expand_key<const L: usize, const N: usize>(key: &[u8; L]) -> [uint
let mut word = ek_words[i - 1];

if i % nk == 0 {
word = sub_word(word).rotate_right(8) ^ ROUND_CONSTS[i / nk - 1];
word = unsafe { sub_word(word) }.rotate_right(8) ^ ROUND_CONSTS[i / nk - 1];
} else if nk > 6 && i % nk == 4 {
word = sub_word(word)
word = unsafe { sub_word(word) };
}

ek_words[i] = ek_words[i - nk] ^ word;
Expand All @@ -52,26 +56,24 @@ pub(super) fn expand_key<const L: usize, const N: usize>(key: &[u8; L]) -> [uint
///
/// This is the reverse of the encryption keys, with the Inverse Mix Columns
/// operation applied to all but the first and last expanded key.
#[inline]
pub(super) fn inv_expanded_keys<const N: usize>(expanded_keys: &mut [uint8x16_t; N]) {
#[target_feature(enable = "aes")]
pub(super) unsafe fn inv_expanded_keys<const N: usize>(expanded_keys: &mut [uint8x16_t; N]) {
assert!(N == 11 || N == 13 || N == 15);

for ek in expanded_keys.iter_mut().take(N - 1).skip(1) {
unsafe { *ek = vaesimcq_u8(*ek) }
*ek = vaesimcq_u8(*ek);
}

expanded_keys.reverse();
}

/// Sub bytes for a single AES word: used for key expansion.
#[inline(always)]
fn sub_word(input: u32) -> u32 {
unsafe {
let input = vreinterpretq_u8_u32(vdupq_n_u32(input));
#[target_feature(enable = "aes")]
unsafe fn sub_word(input: u32) -> u32 {
let input = vreinterpretq_u8_u32(vdupq_n_u32(input));

// AES single round encryption (with a "round" key of all zeros)
let sub_input = vaeseq_u8(input, vdupq_n_u8(0));
// AES single round encryption (with a "round" key of all zeros)
let sub_input = vaeseq_u8(input, vdupq_n_u8(0));

vgetq_lane_u32(vreinterpretq_u32_u8(sub_input), 0)
}
vgetq_lane_u32(vreinterpretq_u32_u8(sub_input), 0)
}
3 changes: 3 additions & 0 deletions aes/src/armv8/hazmat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
use crate::{Block, Block8};
use core::arch::aarch64::*;

// Stable "polyfills" for unstable core::arch::aarch64 intrinsics
use super::intrinsics::{vaesdq_u8, vaeseq_u8, vaesimcq_u8, vaesmcq_u8};

/// AES cipher (encrypt) round function.
#[allow(clippy::cast_ptr_alignment)]
#[target_feature(enable = "aes")]
Expand Down
93 changes: 93 additions & 0 deletions aes/src/armv8/intrinsics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//! Stable "polyfills" for unstable `core::arch::aarch64` intrinsics which use
//! `asm!` internally to allow use on stable Rust.
// TODO(tarcieri): remove when these intrinsics have been stabilized

use core::arch::{aarch64::uint8x16_t, asm};

/// AES single round encryption.
#[inline]
#[target_feature(enable = "aes")]
pub(super) unsafe fn vaeseq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
asm!(
"AESE {d:v}.16B, {k:v}.16B",
d = inout(vreg) data,
k = in(vreg) key,
options(pure, nomem, nostack, preserves_flags)
);
data
}

/// AES single round decryption.
#[inline]
#[target_feature(enable = "aes")]
pub(super) unsafe fn vaesdq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
asm!(
"AESD {d:v}.16B, {k:v}.16B",
d = inout(vreg) data,
k = in(vreg) key,
options(pure, nomem, nostack, preserves_flags)
);
data
}

/// AES mix columns.
#[cfg(feature = "hazmat")]
#[inline]
#[target_feature(enable = "aes")]
pub(super) unsafe fn vaesmcq_u8(mut data: uint8x16_t) -> uint8x16_t {
asm!(
"AESMC {d:v}.16B, {d:v}.16B",
d = inout(vreg) data,
options(pure, nomem, nostack, preserves_flags)
);
data
}

/// AES inverse mix columns.
#[inline]
#[target_feature(enable = "aes")]
pub(super) unsafe fn vaesimcq_u8(mut data: uint8x16_t) -> uint8x16_t {
asm!(
"AESIMC {d:v}.16B, {d:v}.16B",
d = inout(vreg) data,
options(pure, nomem, nostack, preserves_flags)
);
data
}

/// AES single round encryption combined with mix columns.
///
/// These two instructions are combined into a single assembly block to ensure
/// that instructions fuse properly.
#[inline]
#[target_feature(enable = "aes")]
pub(super) unsafe fn vaeseq_u8_and_vaesmcq_u8(mut data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
asm!(
"AESE {d:v}.16B, {k:v}.16B",
"AESMC {d:v}.16B, {d:v}.16B",
d = inout(vreg) data,
k = in(vreg) key,
options(pure, nomem, nostack, preserves_flags)
);
data
}

/// AES single round decryption combined with mix columns.
///
/// These two instructions are combined into a single assembly block to ensure
/// that instructions fuse properly.
#[inline]
#[target_feature(enable = "aes")]
pub(super) unsafe fn vaesdq_u8_and_vaesimcq_u8(
mut data: uint8x16_t,
key: uint8x16_t,
) -> uint8x16_t {
asm!(
"AESD {d:v}.16B, {k:v}.16B",
"AESIMC {d:v}.16B, {d:v}.16B",
d = inout(vreg) data,
k = in(vreg) key,
options(pure, nomem, nostack, preserves_flags)
);
data
}
2 changes: 1 addition & 1 deletion aes/src/armv8/test_expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ fn aes128_key_expansion() {
#[test]
fn aes128_key_expansion_inv() {
let mut ek = load_expanded_keys(AES128_EXP_KEYS);
inv_expanded_keys(&mut ek);
unsafe { inv_expanded_keys(&mut ek) };
assert_eq!(store_expanded_keys(ek), AES128_EXP_INVKEYS);
}

Expand Down
7 changes: 3 additions & 4 deletions aes/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
//! backend at the cost of decreased performance (using a modified form of
//! the fixslicing technique called "semi-fixslicing").
//!
//! ## ARMv8 intrinsics (nightly-only)
//! ## ARMv8 intrinsics (Rust 1.61+)
//! On `aarch64` targets including `aarch64-apple-darwin` (Apple M1) and Linux
//! targets such as `aarch64-unknown-linux-gnu` and `aarch64-unknown-linux-musl`,
//! support for using AES intrinsics provided by the ARMv8 Cryptography Extensions
//! is available when using the nightly compiler, and can be enabled using the
//! is available when using Rust 1.61 or above, and can be enabled using the
//! `aes_armv8` configuration flag.
//!
//! On Linux and macOS, when the `aes_armv8` flag is enabled support for AES
Expand Down Expand Up @@ -101,7 +101,7 @@
//!
//! You can modify crate using the following configuration flags:
//!
//! - `aes_armv8`: enable ARMv8 AES intrinsics (nightly-only).
//! - `aes_armv8`: enable ARMv8 AES intrinsics (Rust 1.61+).
//! - `aes_force_soft`: force software implementation.
//! - `aes_compact`: reduce code size at the cost of slower performance
//! (affects only software backend).
Expand All @@ -121,7 +121,6 @@
)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![warn(missing_docs, rust_2018_idioms)]
#![cfg_attr(all(aes_armv8, target_arch = "aarch64"), feature(stdsimd))]

#[cfg(feature = "hazmat")]
#[cfg_attr(docsrs, doc(cfg(feature = "hazmat")))]
Expand Down

0 comments on commit 8d03900

Please sign in to comment.