gccrs: Import stdarch 1.49.0

This commit imports stdarch 1.49.0 into libgrust/rustc-lib/stdarch. This
is necessary for compiling libcore, as libcore attempts to reuse some
files from stdarch.

libgrust/ChangeLog:

	* rustc-lib/stdarch/.cirrus.yml: New file.
	* rustc-lib/stdarch/.github/workflows/main.yml: New file.
	* rustc-lib/stdarch/.gitignore: New file.
	* rustc-lib/stdarch/CONTRIBUTING.md: New file.
	* rustc-lib/stdarch/Cargo.toml: New file.
	* rustc-lib/stdarch/LICENSE-APACHE: New file.
	* rustc-lib/stdarch/LICENSE-MIT: New file.
	* rustc-lib/stdarch/README.md: New file.
	* rustc-lib/stdarch/ci/android-install-ndk.sh: New file.
	* rustc-lib/stdarch/ci/android-install-sdk.sh: New file.
	* rustc-lib/stdarch/ci/android-sysimage.sh: New file.
	* rustc-lib/stdarch/ci/docker/aarch64-linux-android/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/arm-linux-androideabi/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/i586-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/i686-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/mips-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/mipsel-unknown-linux-musl/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/nvptx64-nvidia-cuda/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/powerpc-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/s390x-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/wasm32-wasi/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/x86_64-linux-android/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile: New file.
	* rustc-lib/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile: New file.
	* rustc-lib/stdarch/ci/dox.sh: New file.
	* rustc-lib/stdarch/ci/gba.json: New file.
	* rustc-lib/stdarch/ci/run-docker.sh: New file.
	* rustc-lib/stdarch/ci/run.sh: New file.
	* rustc-lib/stdarch/ci/runtest-android.rs: New file.
	* rustc-lib/stdarch/ci/style.sh: New file.
	* rustc-lib/stdarch/crates/assert-instr-macro/Cargo.toml: New file.
	* rustc-lib/stdarch/crates/assert-instr-macro/build.rs: New file.
	* rustc-lib/stdarch/crates/assert-instr-macro/src/lib.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/Cargo.toml: New file.
	* rustc-lib/stdarch/crates/core_arch/LICENSE-APACHE: New file.
	* rustc-lib/stdarch/crates/core_arch/LICENSE-MIT: New file.
	* rustc-lib/stdarch/crates/core_arch/README.md: New file.
	* rustc-lib/stdarch/crates/core_arch/avx512f.md: New file.
	* rustc-lib/stdarch/crates/core_arch/build.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/rustfmt.toml: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/crc.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/crypto.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/neon/generated.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/neon/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/prefetch.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/test_support.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/tme.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/aarch64/v8.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/barrier/common.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/barrier/cp15.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/barrier/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/barrier/not_mclass.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/barrier/v8.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/dsp.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/ex.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/hints.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/registers/aarch32.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/registers/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/registers/v6m.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/registers/v7m.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/sat.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/acle/simd32.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/armclang.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/crc.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/neon/generated.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/neon/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/neon/table_lookup_tests.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/test_support.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/v6.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/arm/v7.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/core_arch_docs.md: New file.
	* rustc-lib/stdarch/crates/core_arch/src/lib.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/macros.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/mips/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/mips/msa.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/mips/msa/macros.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/nvptx/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/powerpc/altivec.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/powerpc/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/powerpc/vsx.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/powerpc64/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/simd.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/simd_llvm.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/v64.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/wasm32/atomic.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/wasm32/memory.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/wasm32/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/wasm32/simd128.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/abm.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/adx.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/aes.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/avx.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/avx2.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/avx512f.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/avx512ifma.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/bmi1.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/bmi2.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/bswap.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/bt.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/cpuid.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/eflags.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/f16c.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/fma.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/fxsr.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/macros.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/pclmulqdq.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/rdrand.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/rdtsc.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/rtm.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/sha.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/sse.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/sse2.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/sse3.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/sse41.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/sse42.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/sse4a.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/ssse3.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/tbm.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/test.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86/xsave.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/abm.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/adx.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/avx.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/avx2.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/avx512f.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/bmi.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/bmi2.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/bswap.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/bt.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/fxsr.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/mod.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/rdrand.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/sse.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/sse2.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/sse41.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/sse42.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/src/x86_64/xsave.rs: New file.
	* rustc-lib/stdarch/crates/core_arch/tests/cpu-detection.rs: New file.
	* rustc-lib/stdarch/crates/simd-test-macro/Cargo.toml: New file.
	* rustc-lib/stdarch/crates/simd-test-macro/src/lib.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/Cargo.toml: New file.
	* rustc-lib/stdarch/crates/std_detect/LICENSE-APACHE: New file.
	* rustc-lib/stdarch/crates/std_detect/LICENSE-MIT: New file.
	* rustc-lib/stdarch/crates/std_detect/README.md: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/arch/aarch64.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/arch/arm.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/arch/mips.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/arch/mips64.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/arch/powerpc.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/arch/x86.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/bit.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/cache.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/error_macros.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/macros.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/mod.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/aarch64.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/arm.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/mips.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/mod.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/other.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/os/x86.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv: New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv:
	New file.
	* rustc-lib/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv:
	New file.
	* rustc-lib/stdarch/crates/std_detect/src/lib.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/src/mod.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/tests/cpu-detection.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/tests/macro_trailing_commas.rs: New file.
	* rustc-lib/stdarch/crates/std_detect/tests/x86-specific.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml: New file.
	* rustc-lib/stdarch/crates/stdarch-gen/README.md: New file.
	* rustc-lib/stdarch/crates/stdarch-gen/neon.spec: New file.
	* rustc-lib/stdarch/crates/stdarch-gen/src/main.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-test/Cargo.toml: New file.
	* rustc-lib/stdarch/crates/stdarch-test/src/disassembly.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-test/src/lib.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-test/src/wasm.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/.gitattributes: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/Cargo.toml: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/arm-intrinsics.html: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/build.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/mips-msa.h: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/src/lib.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/tests/arm.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/tests/mips.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/tests/x86-intel.rs: New file.
	* rustc-lib/stdarch/crates/stdarch-verify/x86-intel.xml: New file.
	* rustc-lib/stdarch/examples/Cargo.toml: New file.
	* rustc-lib/stdarch/examples/hex.rs: New file.
	* rustc-lib/stdarch/examples/wasm.rs: New file.
	* rustc-lib/stdarch/triagebot.toml: New file.
	* rustc-lib/stdarch/vendor.yml: New file.

Signed-off-by: Owen Avery <powerboat9.gamer@gmail.com>
This commit is contained in:
Owen Avery 2025-09-16 22:34:57 -04:00 committed by Arthur Cohen
parent bee204863e
commit b65abf4b39
216 changed files with 358363 additions and 0 deletions

View File

@ -0,0 +1,13 @@
task:
name: x86_64-unknown-freebsd
freebsd_instance:
image: freebsd-12-1-release-amd64
setup_script:
- pkg install -y curl
- curl https://sh.rustup.rs -sSf --output rustup.sh
- sh rustup.sh --default-toolchain nightly -y
- . $HOME/.cargo/env
- rustup default nightly
test_script:
- . $HOME/.cargo/env
- cargo build --all

View File

@ -0,0 +1,209 @@
name: CI
on:
push:
branches:
- auto
- try
pull_request:
branches:
- master
jobs:
style:
name: Check Style
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Install Rust
run: rustup update nightly && rustup default nightly
- run: ci/style.sh
docs:
name: Build Documentation
needs: [style]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Install Rust
run: rustup update nightly && rustup default nightly
- run: ci/dox.sh
env:
CI: 1
- name: Publish documentation
run: |
cd target/doc
git init
git add .
git -c user.name='ci' -c user.email='ci' commit -m init
git push -f -q https://git:${{ secrets.github_token }}@github.com/${{ github.repository }} HEAD:gh-pages
if: github.event_name == 'push' && github.event.ref == 'refs/heads/master'
verify:
name: Automatic intrinsic verification
needs: [style]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Install Rust
run: rustup update nightly && rustup default nightly
- run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
env_override:
name: Env Override
needs: [style]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Install Rust
run: rustup update nightly && rustup default nightly
- run: RUST_STD_DETECT_UNSTABLE=avx cargo test --features=std_detect_env_override --manifest-path crates/std_detect/Cargo.toml env_override_no_avx
test:
needs: [style]
name: Test
runs-on: ${{ matrix.os }}
strategy:
matrix:
target:
# Dockers that are run through docker on linux
- i686-unknown-linux-gnu
- x86_64-unknown-linux-gnu
- x86_64-unknown-linux-gnu-emulated
- arm-unknown-linux-gnueabihf
- armv7-unknown-linux-gnueabihf
- aarch64-unknown-linux-gnu
- powerpc64le-unknown-linux-gnu
- mips-unknown-linux-gnu
- mips64-unknown-linux-gnuabi64
- mips64el-unknown-linux-gnuabi64
- s390x-unknown-linux-gnu
- wasm32-wasi
- i586-unknown-linux-gnu
- x86_64-linux-android
- arm-linux-androideabi
- mipsel-unknown-linux-musl
- aarch64-linux-android
- nvptx64-nvidia-cuda
- thumbv6m-none-eabi
- thumbv7m-none-eabi
- thumbv7em-none-eabi
- thumbv7em-none-eabihf
# macOS targets
- x86_64-apple-darwin
# FIXME: gh-actions build environment doesn't have linker support
# - i686-apple-darwin
# Windows targets
- x86_64-pc-windows-msvc
# FIXME: Disassembly not implemented for the # following targets:
# - x86_64-pc-windows-gnu:
# - i686-pc-windows-gnu:
# - i686-pc-windows-msvc:
include:
- target: i686-unknown-linux-gnu
os: ubuntu-latest
- target: x86_64-unknown-linux-gnu
os: ubuntu-latest
- target: x86_64-unknown-linux-gnu-emulated
os: ubuntu-latest
test_everything: true
rustflags: --cfg stdarch_intel_sde
- target: arm-unknown-linux-gnueabihf
os: ubuntu-latest
- target: armv7-unknown-linux-gnueabihf
os: ubuntu-latest
rustflags: -C target-feature=+neon
- target: mips-unknown-linux-gnu
os: ubuntu-latest
norun: true
- target: mips64-unknown-linux-gnuabi64
os: ubuntu-latest
norun: true
- target: mips64el-unknown-linux-gnuabi64
os: ubuntu-latest
norun: true
- target: powerpc64le-unknown-linux-gnu
os: ubuntu-latest
disable_assert_instr: true
- target: s390x-unknown-linux-gnu
os: ubuntu-latest
- target: wasm32-wasi
os: ubuntu-latest
- target: aarch64-unknown-linux-gnu
os: ubuntu-latest
- target: x86_64-apple-darwin
os: macos-latest
- target: x86_64-pc-windows-msvc
os: windows-latest
- target: i586-unknown-linux-gnu
os: ubuntu-latest
- target: x86_64-linux-android
os: ubuntu-latest
disable_assert_instr: 1
- target: arm-linux-androideabi
os: ubuntu-latest
disable_assert_instr: 1
- target: mipsel-unknown-linux-musl
os: ubuntu-latest
norun: 1
- target: aarch64-linux-android
os: ubuntu-latest
disable_assert_instr: 1
- target: nvptx64-nvidia-cuda
os: ubuntu-latest
- target: thumbv6m-none-eabi
os: ubuntu-latest
- target: thumbv7m-none-eabi
os: ubuntu-latest
- target: thumbv7em-none-eabi
os: ubuntu-latest
- target: thumbv7em-none-eabihf
os: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Install Rust (rustup)
run: |
rustup update nightly --no-self-update
rustup default nightly
if: matrix.os != 'macos-latest'
- name: Install Rust (macos)
run: |
curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly
echo "##[add-path]$HOME/.cargo/bin"
rustup update nightly --no-self-update
rustup default nightly
if: matrix.os == 'macos-latest'
- run: |
rustup default nightly
rustup target add ${{ matrix.target }}
if: "!endsWith(matrix.target, 'emulated')"
- run: cargo generate-lockfile
# Configure some env vars based on matrix configuration
- run: echo "##[set-env name=NORUN]1"
if: matrix.norun != '' || startsWith(matrix.target, 'thumb') || matrix.target == 'nvptx64-nvidia-cuda'
- run: echo "##[set-env name=STDARCH_TEST_EVERYTHING]1"
if: matrix.test_everything != ''
- run: echo "##[set-env name=RUSTFLAGS]${{ matrix.rustflags }}"
if: matrix.rustflags != ''
- run: echo "##[set-env name=STDARCH_DISABLE_ASSERT_INSTR]1"
if: matrix.disable_assert_instr != ''
- run: echo "##[set-env name=NOSTD]1"
if: startsWith(matrix.target, 'thumb') || matrix.target == 'nvptx64-nvidia-cuda'
# Windows & OSX go straight to `run.sh` ...
- run: ./ci/run.sh
shell: bash
if: matrix.os != 'ubuntu-latest' || startsWith(matrix.target, 'thumb')
env:
TARGET: ${{ matrix.target }}
# ... while Linux goes to `run-docker.sh`
- run: ./ci/run-docker.sh ${{ matrix.target }}
shell: bash
if: "matrix.os == 'ubuntu-latest' && !startsWith(matrix.target, 'thumb')"
env:
TARGET: ${{ matrix.target }}

6
libgrust/rustc-lib/stdarch/.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
Cargo.lock
.*.swp
target
tags
crates/stdarch-gen/aarch64.rs
crates/stdarch-gen/arm.rs

View File

@ -0,0 +1,80 @@
# Contributing to stdarch
The `stdarch` crate is more than willing to accept contributions! First you'll
probably want to check out the repository and make sure that tests pass for you:
```
$ git clone https://github.com/rust-lang/stdarch
$ cd stdarch
$ cargo +nightly test
```
To run codegen tests, run in release mode:
```
$ cargo +nightly test --release -p coresimd
```
Remember that this repository requires the nightly channel of Rust! If any of
the above steps don't work, [please let us know][new]!
Next up you can [find an issue][issues] to help out on, we've selected a few
with the [`help wanted`][help] and [`impl-period`][impl] tags which could
particularly use some help. You may be most interested in [#40][vendor],
implementing all vendor intrinsics on x86. That issue's got some good pointers
about where to get started!
If you've got general questions feel free to [join us on gitter][gitter] and ask
around! Feel free to ping either @BurntSushi or @alexcrichton with questions.
[gitter]: https://gitter.im/rust-impl-period/WG-libs-simd
# How to write examples for stdarch intrinsics
There are a few features that must be enabled for the given intrinsic to work
properly and the example must only be run by `cargo test --doc` when the feature
is supported by the CPU. As a result, the default `fn main` that is generated by
`rustdoc` will not work (in most cases). Consider using the following as a guide
to ensure your example works as expected.
```rust
/// # // We need cfg_target_feature to ensure the example is only
/// # // run by `cargo test --doc` when the CPU supports the feature
/// # #![feature(cfg_target_feature)]
/// # // We need target_feature for the intrinsic to work
/// # #![feature(target_feature)]
/// #
/// # // rustdoc by default uses `extern crate stdarch`, but we need the
/// # // `#[macro_use]`
/// # #[macro_use] extern crate stdarch;
/// #
/// # // The real main function
/// # fn main() {
/// # // Only run this if `<target feature>` is supported
/// # if cfg_feature_enabled!("<target feature>") {
/// # // Create a `worker` function that will only be run if the target feature
/// # // is supported and ensure that `target_feature` is enabled for your worker
/// # // function
/// # #[target_feature(enable = "<target feature>")]
/// # unsafe fn worker() {
///
/// // Write your example here. Feature specific intrinsics will work here! Go wild!
///
/// # }
/// # unsafe { worker(); }
/// # }
/// # }
```
If some of the above syntax does not look familiar, the [Documentation as tests] section
of the [Rust Book] describes the `rustdoc` syntax quite well. As always, feel free
to [join us on gitter][gitter] and ask us if you hit any snags, and thank you for helping
to improve the documentation of `stdarch`!
[new]: https://github.com/rust-lang/stdarch/issues/new
[issues]: https://github.com/rust-lang/stdarch/issues
[help]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
[impl]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period
[vendor]: https://github.com/rust-lang/stdarch/issues/40
[Documentation as tests]: https://doc.rust-lang.org/book/first-edition/documentation.html#documentation-as-tests
[Rust Book]: https://doc.rust-lang.org/book/first-edition

View File

@ -0,0 +1,21 @@
[workspace]
members = [
"crates/stdarch-verify",
"crates/core_arch",
"crates/std_detect",
"crates/stdarch-gen",
"examples/"
]
exclude = [
"crates/wasm-assert-instr-tests"
]
[profile.release]
debug = true
opt-level = 3
incremental = true
[profile.bench]
debug = 1
opt-level = 3
incremental = true

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
Copyright (c) 2017 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,40 @@
stdarch - Rust's standard library SIMD components
=======
[![Actions Status](https://github.com/rust-lang/stdarch/workflows/CI/badge.svg)](https://github.com/rust-lang/stdarch/actions)
# Crates
This repository contains two main crates:
* [![core_arch_crate_badge]][core_arch_crate_link]
[![core_arch_docs_badge]][core_arch_docs_link]
[`core_arch`](crates/core_arch/README.md) implements `core::arch` - Rust's
core library architecture-specific intrinsics, and
* [![std_detect_crate_badge]][std_detect_crate_link]
[![std_detect_docs_badge]][std_detect_docs_link]
[`std_detect`](crates/std_detect/README.md) implements `std::detect` - Rust's
standard library run-time CPU feature detection.
The `std::simd` component now lives in the
[`packed_simd`](https://github.com/rust-lang-nursery/packed_simd) crate.
# How to do a release
To do a release of the `core_arch` and `std_detect` crates,
* bump up the version appropriately,
* comment out the `dev-dependencies` in their `Cargo.toml` files (due to
https://github.com/rust-lang/cargo/issues/4242),
* publish the crates.
[core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg
[core_arch_crate_link]: https://crates.io/crates/core_arch
[core_arch_docs_badge]: https://docs.rs/core_arch/badge.svg
[core_arch_docs_link]: https://docs.rs/core_arch/
[std_detect_crate_badge]: https://img.shields.io/crates/v/std_detect.svg
[std_detect_crate_link]: https://crates.io/crates/std_detect
[std_detect_docs_badge]: https://docs.rs/std_detect/badge.svg
[std_detect_docs_link]: https://docs.rs/std_detect/

View File

@ -0,0 +1,38 @@
#!/usr/bin/env sh
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
set -ex
curl --retry 5 -O \
https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip
unzip -q android-ndk-r15b-linux-x86_64.zip
case "${1}" in
aarch64)
arch=arm64
;;
i686)
arch=x86
;;
*)
arch="${1}"
;;
esac;
android-ndk-r15b/build/tools/make_standalone_toolchain.py \
--unified-headers \
--install-dir "/android/ndk-${1}" \
--arch "${arch}" \
--api 24
rm -rf ./android-ndk-r15b-linux-x86_64.zip ./android-ndk-r15b

View File

@ -0,0 +1,60 @@
#!/usr/bin/env sh
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
set -ex
# Prep the SDK and emulator
#
# Note that the update process requires that we accept a bunch of licenses, and
# we can't just pipe `yes` into it for some reason, so we take the same strategy
# located in https://github.com/appunite/docker by just wrapping it in a script
# which apparently magically accepts the licenses.
mkdir sdk
curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O
unzip -d sdk sdk-tools-linux-3859397.zip
case "$1" in
arm | armv7)
abi=armeabi-v7a
;;
aarch64)
abi=arm64-v8a
;;
i686)
abi=x86
;;
x86_64)
abi=x86_64
;;
*)
echo "invalid arch: $1"
exit 1
;;
esac;
# --no_https avoids
# javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found
yes | ./sdk/tools/bin/sdkmanager --licenses --no_https
yes | ./sdk/tools/bin/sdkmanager --no_https \
"emulator" \
"platform-tools" \
"platforms;android-24" \
"system-images;android-24;default;$abi"
echo "no" |
./sdk/tools/bin/avdmanager create avd \
--name "${1}" \
--package "system-images;android-24;default;$abi"

View File

@ -0,0 +1,56 @@
#!/usr/bin/env bash
# Copyright 2017 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
set -ex
URL=https://dl.google.com/android/repository/sys-img/android
main() {
local arch="${1}"
local name="${2}"
local dest=/system
local td
td="$(mktemp -d)"
apt-get install --no-install-recommends e2tools
pushd "$td"
curl --retry 5 -O "${URL}/${name}"
unzip -q "${name}"
local system
system=$(find . -name system.img)
mkdir -p $dest/{bin,lib,lib64}
# Extract android linker and libraries to /system
# This allows android executables to be run directly (or with qemu)
if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then
e2cp -p "${system}:/bin/linker64" "${dest}/bin/"
e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/"
e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/"
e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/"
else
e2cp -p "${system}:/bin/linker" "${dest}/bin/"
e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/"
e2cp -p "${system}:/lib/libc.so" "${dest}/lib/"
e2cp -p "${system}:/lib/libm.so" "${dest}/lib/"
fi
# clean up
apt-get purge --auto-remove -y e2tools
popd
rm -rf "${td}"
}
main "${@}"

View File

@ -0,0 +1,47 @@
FROM ubuntu:16.04
RUN dpkg --add-architecture i386 && \
apt-get update && \
apt-get install -y --no-install-recommends \
file \
make \
curl \
ca-certificates \
python \
unzip \
expect \
openjdk-9-jre \
libstdc++6:i386 \
libpulse0 \
gcc \
libc6-dev
WORKDIR /android/
COPY android* /android/
ENV ANDROID_ARCH=aarch64
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
RUN mv /root/.android /tmp
RUN chmod 777 -R /tmp/.android
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
ENV PATH=$PATH:/rust/bin \
CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \
CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \
OBJDUMP=aarch64-linux-android-objdump \
HOME=/tmp
ADD runtest-android.rs /tmp/runtest.rs
ENTRYPOINT [ \
"bash", \
"-c", \
# set SHELL so android can detect a 64bits system, see
# http://stackoverflow.com/a/41789144
"SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \
rustc /tmp/runtest.rs -o /tmp/runtest && \
exec \"$@\"", \
"--" \
]

View File

@ -0,0 +1,14 @@
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-aarch64-linux-gnu \
libc6-dev-arm64-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
OBJDUMP=aarch64-linux-gnu-objdump

View File

@ -0,0 +1,47 @@
FROM ubuntu:16.04
RUN dpkg --add-architecture i386 && \
apt-get update && \
apt-get install -y --no-install-recommends \
file \
make \
curl \
ca-certificates \
python \
unzip \
expect \
openjdk-9-jre \
libstdc++6:i386 \
libpulse0 \
gcc \
libc6-dev
WORKDIR /android/
COPY android* /android/
ENV ANDROID_ARCH=arm
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
RUN mv /root/.android /tmp
RUN chmod 777 -R /tmp/.android
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
ENV PATH=$PATH:/rust/bin \
CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
OBJDUMP=arm-linux-androideabi-objdump \
HOME=/tmp
ADD runtest-android.rs /tmp/runtest.rs
ENTRYPOINT [ \
"bash", \
"-c", \
# set SHELL so android can detect a 64bits system, see
# http://stackoverflow.com/a/41789144
"SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
rustc /tmp/runtest.rs -o /tmp/runtest && \
exec \"$@\"", \
"--" \
]

View File

@ -0,0 +1,13 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-arm-linux-gnueabihf \
libc6-dev-armhf-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump

View File

@ -0,0 +1,13 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
ca-certificates \
libc6-dev \
gcc-arm-linux-gnueabihf \
libc6-dev-armhf-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump

View File

@ -0,0 +1,7 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
file \
make \
ca-certificates

View File

@ -0,0 +1,7 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
file \
make \
ca-certificates

View File

@ -0,0 +1,13 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips-linux-gnu libc6-dev-mips-cross \
qemu-system-mips \
qemu-user \
make \
file
ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
OBJDUMP=mips-linux-gnu-objdump

View File

@ -0,0 +1,10 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
qemu-system-mips64 qemu-user
ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
OBJDUMP=mips64-linux-gnuabi64-objdump

View File

@ -0,0 +1,10 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
qemu-system-mips64el
ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
OBJDUMP=mips64el-linux-gnuabi64-objdump

View File

@ -0,0 +1,25 @@
FROM ubuntu:18.04
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
gcc \
libc6-dev \
make \
qemu-user \
qemu-system-mips \
bzip2 \
curl \
file
RUN mkdir /toolchain
# Note that this originally came from:
# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
tar xjf - -C /toolchain --strip-components=2
ENV PATH=$PATH:/rust/bin:/toolchain/bin \
CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"

View File

@ -0,0 +1,5 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
ca-certificates

View File

@ -0,0 +1,11 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
qemu-system-ppc make file
ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \
CC=powerpc-linux-gnu-gcc \
OBJDUMP=powerpc-linux-gnu-objdump

View File

@ -0,0 +1,11 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \
qemu-system-ppc file make
ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -cpu power9 -L /usr/powerpc64-linux-gnu" \
CC=powerpc64-linux-gnu-gcc \
OBJDUMP=powerpc64-linux-gnu-objdump

View File

@ -0,0 +1,12 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev qemu-user ca-certificates \
gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
qemu-system-ppc file make
# Work around qemu triggering a sigill on vec_subs if the cpu target is not defined.
ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -cpu power9 -L /usr/powerpc64le-linux-gnu" \
CC=powerpc64le-linux-gnu-gcc \
OBJDUMP=powerpc64le-linux-gnu-objdump

View File

@ -0,0 +1,13 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
curl ca-certificates \
gcc libc6-dev \
gcc-s390x-linux-gnu libc6-dev-s390x-cross \
qemu-user \
make \
file
ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \
OBJDUMP=s390x-linux-gnu-objdump

View File

@ -0,0 +1,16 @@
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update -y && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
xz-utils \
clang
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v0.19.0/wasmtime-v0.19.0-x86_64-linux.tar.xz | tar xJf -
ENV PATH=$PATH:/wasmtime-v0.19.0-x86_64-linux
ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime \
--enable-simd \
--mapdir .::/checkout/target/wasm32-wasi/release/deps \
--"

View File

@ -0,0 +1,29 @@
FROM ubuntu:16.04
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
curl \
gcc \
libc-dev \
python \
unzip \
file \
make
WORKDIR /android/
ENV ANDROID_ARCH=x86_64
COPY android-install-ndk.sh /android/
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
# We do not run x86_64-linux-android tests on an android emulator.
# See ci/android-sysimage.sh for informations about how tests are run.
COPY android-sysimage.sh /android/
RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip
ENV PATH=$PATH:/rust/bin:/android/ndk-$ANDROID_ARCH/bin \
CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android-gcc \
CC_x86_64_linux_android=x86_64-linux-android-gcc \
CXX_x86_64_linux_android=x86_64-linux-android-g++ \
OBJDUMP=x86_64-linux-android-objdump \
HOME=/tmp

View File

@ -0,0 +1,13 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
file \
make \
ca-certificates \
wget \
bzip2
RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.35.0-2019-03-11-lin.tar.bz2
RUN tar -xjf sde-external-8.35.0-2019-03-11-lin.tar.bz2
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.35.0-2019-03-11-lin/sde64 -rtm_mode full --"

View File

@ -0,0 +1,7 @@
FROM ubuntu:18.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
file \
make \
ca-certificates

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
# Builds documentation for all target triples that we have a registered URL for
# in liblibc. This scrapes the list of triples to document from `src/lib.rs`
# which has a bunch of `html_root_url` directives we pick up.
set -ex
rm -rf target/doc
mkdir -p target/doc
dox() {
local arch=$1
local target=$2
echo "documenting ${arch}"
if [ "$CI" != "" ]; then
rustup target add "${target}" || true
fi
rm -rf "target/doc/${arch}"
mkdir "target/doc/${arch}"
export RUSTFLAGS="--cfg core_arch_docs"
export RUSTDOCFLAGS="--cfg core_arch_docs"
cargo build --verbose --target "${target}" --manifest-path crates/core_arch/Cargo.toml
cargo build --verbose --target "${target}" --manifest-path crates/std_detect/Cargo.toml
rustdoc --verbose --target "${target}" \
-o "target/doc/${arch}" crates/core_arch/src/lib.rs \
--edition=2018 \
--crate-name core_arch \
--library-path "target/${target}/debug/deps" \
--cfg core_arch_docs
rustdoc --verbose --target "${target}" \
-o "target/doc/${arch}" crates/std_detect/src/lib.rs \
--edition=2018 \
--crate-name std_detect \
--library-path "target/${target}/debug/deps" \
--extern cfg_if="$(ls target/"${target}"/debug/deps/libcfg_if-*.rlib)" \
--extern libc="$(ls target/"${target}"/debug/deps/liblibc-*.rlib)" \
--cfg core_arch_docs
}
dox i686 i686-unknown-linux-gnu
dox x86_64 x86_64-unknown-linux-gnu
dox arm armv7-unknown-linux-gnueabihf
dox aarch64 aarch64-unknown-linux-gnu
dox powerpc powerpc-unknown-linux-gnu
dox powerpc64le powerpc64le-unknown-linux-gnu
dox mips mips-unknown-linux-gnu
dox mips64 mips64-unknown-linux-gnuabi64
dox wasm32 wasm32-unknown-unknown

View File

@ -0,0 +1,34 @@
{
"abi-blacklist": [
"stdcall",
"fastcall",
"vectorcall",
"thiscall",
"win64",
"sysv64"
],
"arch": "arm",
"atomic-cas": false,
"cpu": "arm7tdmi",
"data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
"emit-debug-gdb-scripts": false,
"env": "agb",
"executables": true,
"features": "+soft-float,+strict-align",
"linker": "arm-none-eabi-ld",
"linker-flavor": "ld",
"linker-is-gnu": true,
"llvm-target": "thumbv4-none-agb",
"os": "none",
"panic-strategy": "abort",
"pre-link-args": {
"ld": [
"-Tlinker.ld"
]
},
"relocation-model": "static",
"target-c-int-width": "32",
"target-endian": "little",
"target-pointer-width": "32",
"vendor": "nintendo"
}

View File

@ -0,0 +1,45 @@
#!/usr/bin/env sh
# Small script to run tests for a target (or all targets) inside all the
# respective docker images.
set -ex
run() {
target=$(echo "${1}" | sed 's/-emulated//')
echo "Building docker container for TARGET=${1}"
docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/
mkdir -p target
echo "Running docker"
# shellcheck disable=SC2016
docker run \
--rm \
--user "$(id -u)":"$(id -g)" \
--env CARGO_HOME=/cargo \
--env CARGO_TARGET_DIR=/checkout/target \
--env TARGET="${target}" \
--env STDARCH_TEST_EVERYTHING \
--env STDARCH_ASSERT_INSTR_IGNORE \
--env STDARCH_DISABLE_ASSERT_INSTR \
--env NOSTD \
--env NORUN \
--env RUSTFLAGS \
--env STDARCH_TEST_NORUN \
--volume "$(dirname "$(dirname "$(command -v cargo)")")":/cargo \
--volume "$(rustc --print sysroot)":/rust:ro \
--volume "$(pwd)":/checkout:ro \
--volume "$(pwd)"/target:/checkout/target \
--init \
--workdir /checkout \
--privileged \
stdarch \
sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh ${1}"
}
if [ -z "$1" ]; then
for d in ci/docker/*; do
run "${d}"
done
else
run "${1}"
fi

View File

@ -0,0 +1,122 @@
#!/usr/bin/env sh
set -ex
: "${TARGET?The TARGET environment variable must be set.}"
# Tests are all super fast anyway, and they fault often enough on travis that
# having only one thread increases debuggability to be worth it.
#export RUST_BACKTRACE=full
#export RUST_TEST_NOCAPTURE=1
#export RUST_TEST_THREADS=1
RUSTFLAGS="$RUSTFLAGS -D warnings "
case ${TARGET} in
# On 32-bit use a static relocation model which avoids some extra
# instructions when dealing with static data, notably allowing some
# instruction assertion checks to pass below the 20 instruction limit. If
# this is the default, dynamic, then too many instructions are generated
# when we assert the instruction for a function and it causes tests to fail.
#
# It's not clear why `-Z plt=yes` is required here. Probably a bug in LLVM.
# If you can remove it and CI passes, please feel free to do so!
i686-* | i586-*)
export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static -Z plt=yes"
;;
#Unoptimized build uses fast-isel which breaks with msa
mips-* | mipsel-*)
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
;;
esac
echo "RUSTFLAGS=${RUSTFLAGS}"
echo "FEATURES=${FEATURES}"
echo "OBJDUMP=${OBJDUMP}"
echo "STDARCH_DISABLE_ASSERT_INSTR=${STDARCH_DISABLE_ASSERT_INSTR}"
echo "STDARCH_TEST_EVERYTHING=${STDARCH_TEST_EVERYTHING}"
cargo_test() {
cmd="cargo"
subcmd="test"
if [ "$NORUN" = "1" ]; then
export subcmd="build"
fi
cmd="$cmd ${subcmd} --target=$TARGET $1"
cmd="$cmd -- $2"
# wasm targets can't catch panics so if a test failures make sure the test
# harness isn't trying to capture output, otherwise we won't get any useful
# output.
case ${TARGET} in
wasm32*)
cmd="$cmd --nocapture"
;;
esac
$cmd
}
CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml"
STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml"
cargo_test "${CORE_ARCH} --release"
if [ "$NOSTD" != "1" ]; then
cargo_test "${STD_DETECT}"
cargo_test "${STD_DETECT} --release"
cargo_test "${STD_DETECT} --no-default-features"
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_file_io"
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval"
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io"
cargo_test "${STDARCH_EXAMPLES}"
cargo_test "${STDARCH_EXAMPLES} --release"
fi
# Test targets compiled with extra features.
case ${TARGET} in
x86*)
export STDARCH_DISABLE_ASSERT_INSTR=1
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
cargo_test "--release"
;;
wasm32*)
prev="$RUSTFLAGS"
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128,+unimplemented-simd128"
cargo_test "--release"
export RUSTFLAGS="$prev"
;;
# FIXME: don't build anymore
#mips-*gnu* | mipsel-*gnu*)
# export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa,+fp64,+mips32r5"
# cargo_test "--release"
# ;;
mips64*)
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa"
cargo_test "--release"
;;
powerpc64*)
# We don't build the ppc 32-bit targets with these - these targets
# are mostly unsupported for now.
OLD_RUSTFLAGS="${RUSTFLAGS}"
export RUSTFLAGS="${OLD_RUSTFLAGS} -C target-feature=+altivec"
cargo_test "--release"
export RUSTFLAGS="${OLD_RUSTFLAGS} -C target-feature=+vsx"
cargo_test "--release"
;;
*)
;;
esac
if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
# Test examples
(
cd examples
cargo test --target "$TARGET"
echo test | cargo run --release hex
)
fi

View File

@ -0,0 +1,45 @@
use std::env;
use std::process::Command;
use std::path::{Path, PathBuf};
fn main() {
let args = env::args_os()
.skip(1)
.filter(|arg| arg != "--quiet")
.collect::<Vec<_>>();
assert_eq!(args.len(), 1);
let test = PathBuf::from(&args[0]);
let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap());
let status = Command::new("adb")
.arg("wait-for-device")
.status()
.expect("failed to run: adb wait-for-device");
assert!(status.success());
let status = Command::new("adb")
.arg("push")
.arg(&test)
.arg(&dst)
.status()
.expect("failed to run: adb pushr");
assert!(status.success());
let output = Command::new("adb")
.arg("shell")
.arg(&dst)
.output()
.expect("failed to run: adb shell");
assert!(status.success());
println!("status: {}\nstdout ---\n{}\nstderr ---\n{}",
output.status,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr));
let stdout = String::from_utf8_lossy(&output.stdout);
let mut lines = stdout.lines().filter(|l| l.starts_with("test result"));
if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) {
panic!("failed to find successful test run");
}
}

View File

@ -0,0 +1,22 @@
#!/usr/bin/env sh
set -ex
if rustup component add rustfmt-preview ; then
command -v rustfmt
rustfmt -V
cargo fmt --all -- --check
fi
# if rustup component add clippy-preview ; then
# cargo clippy -V
# cargo clippy --all -- -D clippy::pedantic
# fi
if shellcheck --version ; then
shellcheck -e SC2103 ci/*.sh
else
echo "shellcheck not found"
exit 1
fi

View File

@ -0,0 +1,13 @@
[package]
name = "assert-instr-macro"
version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
[lib]
proc-macro = true
test = false
[dependencies]
proc-macro2 = "1.0"
quote = "1.0"
syn = { version = "1.0", features = ["full"] }

View File

@ -0,0 +1,13 @@
use std::env;
fn main() {
println!("cargo:rerun-if-changed=build.rs");
let opt_level = env::var("OPT_LEVEL")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let profile = env::var("PROFILE").unwrap_or(String::new());
if profile == "release" || opt_level >= 2 {
println!("cargo:rustc-cfg=optimized");
}
}

View File

@ -0,0 +1,225 @@
//! Implementation of the `#[assert_instr]` macro
//!
//! This macro is used when testing the `stdarch` crate and is used to generate
//! test cases to assert that functions do indeed contain the instructions that
//! we're expecting them to contain.
//!
//! The procedural macro here is relatively simple, it simply appends a
//! `#[test]` function to the original token stream which asserts that the
//! function itself contains the relevant instruction.
extern crate proc_macro;
extern crate proc_macro2;
#[macro_use]
extern crate quote;
extern crate syn;
use proc_macro2::TokenStream;
use quote::ToTokens;
#[proc_macro_attribute]
pub fn assert_instr(
attr: proc_macro::TokenStream,
item: proc_macro::TokenStream,
) -> proc_macro::TokenStream {
let invoc = match syn::parse::<Invoc>(attr) {
Ok(s) => s,
Err(e) => return e.to_compile_error().into(),
};
let item = match syn::parse::<syn::Item>(item) {
Ok(s) => s,
Err(e) => return e.to_compile_error().into(),
};
let func = match item {
syn::Item::Fn(ref f) => f,
_ => panic!("must be attached to a function"),
};
let instr = &invoc.instr;
let name = &func.sig.ident;
// Disable assert_instr for x86 targets compiled with avx enabled, which
// causes LLVM to generate different intrinsics that the ones we are
// testing for.
let disable_assert_instr = std::env::var("STDARCH_DISABLE_ASSERT_INSTR").is_ok();
// If instruction tests are disabled avoid emitting this shim at all, just
// return the original item without our attribute.
if !cfg!(optimized) || disable_assert_instr {
return (quote! { #item }).into();
}
let instr_str = instr
.replace('.', "_")
.replace('/', "_")
.replace(':', "_")
.replace(char::is_whitespace, "");
let assert_name = syn::Ident::new(&format!("assert_{}_{}", name, instr_str), name.span());
// These name has to be unique enough for us to find it in the disassembly later on:
let shim_name = syn::Ident::new(
&format!("stdarch_test_shim_{}_{}", name, instr_str),
name.span(),
);
let mut inputs = Vec::new();
let mut input_vals = Vec::new();
let ret = &func.sig.output;
for arg in func.sig.inputs.iter() {
let capture = match *arg {
syn::FnArg::Typed(ref c) => c,
ref v => panic!(
"arguments must not have patterns: `{:?}`",
v.clone().into_token_stream()
),
};
let ident = match *capture.pat {
syn::Pat::Ident(ref i) => &i.ident,
_ => panic!("must have bare arguments"),
};
if let Some(&(_, ref tokens)) = invoc.args.iter().find(|a| *ident == a.0) {
input_vals.push(quote! { #tokens });
} else {
inputs.push(capture);
input_vals.push(quote! { #ident });
}
}
let attrs = func
.attrs
.iter()
.filter(|attr| {
attr.path
.segments
.first()
.expect("attr.path.segments.first() failed")
.ident
.to_string()
.starts_with("target")
})
.collect::<Vec<_>>();
let attrs = Append(&attrs);
// Use an ABI on Windows that passes SIMD values in registers, like what
// happens on Unix (I think?) by default.
let abi = if cfg!(windows) {
syn::LitStr::new("vectorcall", proc_macro2::Span::call_site())
} else {
syn::LitStr::new("C", proc_macro2::Span::call_site())
};
let shim_name_str = format!("{}{}", shim_name, assert_name);
let to_test = quote! {
#attrs
#[no_mangle]
#[inline(never)]
pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
// The compiler in optimized mode by default runs a pass called
// "mergefunc" where it'll merge functions that look identical.
// Turns out some intrinsics produce identical code and they're
// folded together, meaning that one just jumps to another. This
// messes up our inspection of the disassembly of this function and
// we're not a huge fan of that.
//
// To thwart this pass and prevent functions from being merged we
// generate some code that's hopefully very tight in terms of
// codegen but is otherwise unique to prevent code from being
// folded.
//
// This is avoided on Wasm32 right now since these functions aren't
// inlined which breaks our tests since each intrinsic looks like it
// calls functions. Turns out functions aren't similar enough to get
// merged on wasm32 anyway. This bug is tracked at
// rust-lang/rust#74320.
#[cfg(not(target_arch = "wasm32"))]
::stdarch_test::_DONT_DEDUP.store(
std::mem::transmute(#shim_name_str.as_bytes().as_ptr()),
std::sync::atomic::Ordering::Relaxed,
);
#name(#(#input_vals),*)
}
};
let tokens: TokenStream = quote! {
#[test]
#[allow(non_snake_case)]
fn #assert_name() {
#to_test
// Make sure that the shim is not removed by leaking it to unknown
// code:
unsafe { llvm_asm!("" : : "r"(#shim_name as usize) : "memory" : "volatile") };
::stdarch_test::assert(#shim_name as usize,
stringify!(#shim_name),
#instr);
}
};
let tokens: TokenStream = quote! {
#item
#tokens
};
tokens.into()
}
struct Invoc {
instr: String,
args: Vec<(syn::Ident, syn::Expr)>,
}
impl syn::parse::Parse for Invoc {
fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
use syn::{ext::IdentExt, Token};
let mut instr = String::new();
while !input.is_empty() {
if input.parse::<Token![,]>().is_ok() {
break;
}
if let Ok(ident) = syn::Ident::parse_any(input) {
instr.push_str(&ident.to_string());
continue;
}
if input.parse::<Token![.]>().is_ok() {
instr.push_str(".");
continue;
}
if let Ok(s) = input.parse::<syn::LitStr>() {
instr.push_str(&s.value());
continue;
}
println!("{:?}", input.cursor().token_stream());
return Err(input.error("expected an instruction"));
}
if instr.is_empty() {
return Err(input.error("expected an instruction before comma"));
}
let mut args = Vec::new();
while !input.is_empty() {
let name = input.parse::<syn::Ident>()?;
input.parse::<Token![=]>()?;
let expr = input.parse::<syn::Expr>()?;
args.push((name, expr));
if input.parse::<Token![,]>().is_err() {
if !input.is_empty() {
return Err(input.error("extra tokens at end"));
}
break;
}
}
Ok(Self { instr, args })
}
}
struct Append<T>(T);
impl<T> quote::ToTokens for Append<T>
where
T: Clone + IntoIterator,
T::Item: quote::ToTokens,
{
fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
for item in self.0.clone() {
item.to_tokens(tokens);
}
}
}

View File

@ -0,0 +1,27 @@
[package]
name = "core_arch"
version = "0.1.5"
authors = [
"Alex Crichton <alex@alexcrichton.com>",
"Andrew Gallant <jamslam@gmail.com>",
"Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
]
description = "`core::arch` - Rust's core library architecture-specific intrinsics."
documentation = "https://docs.rs/core_arch"
homepage = "https://github.com/rust-lang/stdarch"
repository = "https://github.com/rust-lang/stdarch"
readme = "README.md"
keywords = ["core", "simd", "arch", "intrinsics"]
categories = ["hardware-support", "no-std"]
license = "MIT/Apache-2.0"
build = "build.rs"
edition = "2018"
[badges]
is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
is-it-maintained-open-issues = { repository = "rust-lang/stdarch" }
maintenance = { status = "experimental" }
[dev-dependencies]
stdarch-test = { version = "0.*", path = "../stdarch-test" }
std_detect = { version = "0.*", path = "../std_detect" }

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
Copyright (c) 2017 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,68 @@
`core::arch` - Rust's core library architecture-specific intrinsics
=======
[![core_arch_crate_badge]][core_arch_crate_link] [![core_arch_docs_badge]][core_arch_docs_link]
The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD).
# Usage
`core::arch` is available as part of `libcore` and it is re-exported by
`libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
Unstable features are often available in nightly Rust via the
`feature(stdsimd)`.
Using `core::arch` via this crate requires nightly Rust, and it can (and does)
break often. The only cases in which you should consider using it via this crate
are:
* if you need to re-compile `core::arch` yourself, e.g., with particular
target-features enabled that are not enabled for `libcore`/`libstd`. Note: if
you need to re-compile it for a non-standard target, please prefer using
`xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using
this crate.
* using some features that might not be available even behind unstable Rust
features. We try to keep these to a minimum. If you need to use some of these
features, please open an issue so that we can expose them in nightly Rust and
you can use them from there.
# Documentation
* [Documentation - i686][i686]
* [Documentation - x86\_64][x86_64]
* [Documentation - arm][arm]
* [Documentation - aarch64][aarch64]
* [Documentation - powerpc][powerpc]
* [Documentation - powerpc64][powerpc64]
* [How to get started][contrib]
* [How to help implement intrinsics][help-implement]
[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
[help-implement]: https://github.com/rust-lang/stdarch/issues/40
[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/
[arm]: https://rust-lang.github.io/stdarch/arm/core_arch/
[aarch64]: https://rust-lang.github.io/stdarch/aarch64/core_arch/
[powerpc]: https://rust-lang.github.io/stdarch/powerpc/core_arch/
[powerpc64]: https://rust-lang.github.io/stdarch/powerpc64/core_arch/
# License
`core_arch` is primarily distributed under the terms of both the MIT license and
the Apache License (Version 2.0), with portions covered by various BSD-like
licenses.
See LICENSE-APACHE, and LICENSE-MIT for details.
# Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in `core_arch` by you, as defined in the Apache-2.0 license,
shall be dual licensed as above, without any additional terms or conditions.
[core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg
[core_arch_crate_link]: https://crates.io/crates/core_arch
[core_arch_docs_badge]: https://docs.rs/core_arch/badge.svg
[core_arch_docs_link]: https://docs.rs/core_arch/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,17 @@
use std::env;
fn main() {
println!("cargo:rustc-cfg=core_arch_docs");
// Used to tell our `#[assert_instr]` annotations that all simd intrinsics
// are available to test their codegen, since some are gated behind an extra
// `-Ctarget-feature=+unimplemented-simd128` that doesn't have any
// equivalent in `#[target_feature]` right now.
println!("cargo:rerun-if-env-changed=RUSTFLAGS");
if env::var("RUSTFLAGS")
.unwrap_or_default()
.contains("unimplemented-simd128")
{
println!("cargo:rustc-cfg=all_simd");
}
}

View File

@ -0,0 +1,3 @@
ignore = [
"src/simd.rs",
]

View File

@ -0,0 +1,45 @@
extern "C" {
#[link_name = "llvm.aarch64.crc32x"]
fn crc32x_(crc: u32, data: u64) -> u32;
#[link_name = "llvm.aarch64.crc32cx"]
fn crc32cx_(crc: u32, data: u64) -> u32;
}
#[cfg(test)]
use stdarch_test::assert_instr;
/// CRC32 single round checksum for quad words (64 bits).
#[inline]
#[target_feature(enable = "crc")]
#[cfg_attr(test, assert_instr(crc32x))]
pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
crc32x_(crc, data)
}
/// CRC32-C single round checksum for quad words (64 bits).
#[inline]
#[target_feature(enable = "crc")]
#[cfg_attr(test, assert_instr(crc32cx))]
pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
crc32cx_(crc, data)
}
#[cfg(test)]
mod tests {
use crate::core_arch::{aarch64::*, simd::*};
use std::mem;
use stdarch_test::simd_test;
#[simd_test(enable = "crc")]
unsafe fn test_crc32d() {
assert_eq!(__crc32d(0, 0), 0);
assert_eq!(__crc32d(0, 18446744073709551615), 1147535477);
}
#[simd_test(enable = "crc")]
unsafe fn test_crc32cd() {
assert_eq!(__crc32cd(0, 0), 0);
assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501);
}
}

View File

@ -0,0 +1,331 @@
use crate::core_arch::arm::{uint32x4_t, uint8x16_t};
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.aarch64.crypto.aese"]
fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
#[link_name = "llvm.aarch64.crypto.aesd"]
fn vaesdq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
#[link_name = "llvm.aarch64.crypto.aesmc"]
fn vaesmcq_u8_(data: uint8x16_t) -> uint8x16_t;
#[link_name = "llvm.aarch64.crypto.aesimc"]
fn vaesimcq_u8_(data: uint8x16_t) -> uint8x16_t;
#[link_name = "llvm.aarch64.crypto.sha1h"]
fn vsha1h_u32_(hash_e: u32) -> u32;
#[link_name = "llvm.aarch64.crypto.sha1su0"]
fn vsha1su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1su1"]
fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1c"]
fn vsha1cq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1p"]
fn vsha1pq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha1m"]
fn vsha1mq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256h"]
fn vsha256hq_u32_(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256h2"]
fn vsha256h2q_u32_(hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256su0"]
fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.crypto.sha256su1"]
fn vsha256su1q_u32_(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
}
#[cfg(test)]
use stdarch_test::assert_instr;
/// AES single round encryption.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(aese))]
pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
vaeseq_u8_(data, key)
}
/// AES single round decryption.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(aesd))]
pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
vaesdq_u8_(data, key)
}
/// AES mix columns.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(aesmc))]
pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t {
vaesmcq_u8_(data)
}
/// AES inverse mix columns.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(aesimc))]
pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t {
vaesimcq_u8_(data)
}
/// SHA1 fixed rotate.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1h))]
pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 {
vsha1h_u32_(hash_e)
}
/// SHA1 hash update accelerator, choose.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1c))]
pub unsafe fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
vsha1cq_u32_(hash_abcd, hash_e, wk)
}
/// SHA1 hash update accelerator, majority.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1m))]
pub unsafe fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
vsha1mq_u32_(hash_abcd, hash_e, wk)
}
/// SHA1 hash update accelerator, parity.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1p))]
pub unsafe fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
vsha1pq_u32_(hash_abcd, hash_e, wk)
}
/// SHA1 schedule update accelerator, first part.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1su0))]
pub unsafe fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t {
vsha1su0q_u32_(w0_3, w4_7, w8_11)
}
/// SHA1 schedule update accelerator, second part.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha1su1))]
pub unsafe fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
vsha1su1q_u32_(tw0_3, w12_15)
}
/// SHA256 hash update accelerator.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256h))]
pub unsafe fn vsha256hq_u32(
hash_abcd: uint32x4_t,
hash_efgh: uint32x4_t,
wk: uint32x4_t,
) -> uint32x4_t {
vsha256hq_u32_(hash_abcd, hash_efgh, wk)
}
/// SHA256 hash update accelerator, upper part.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256h2))]
pub unsafe fn vsha256h2q_u32(
hash_efgh: uint32x4_t,
hash_abcd: uint32x4_t,
wk: uint32x4_t,
) -> uint32x4_t {
vsha256h2q_u32_(hash_efgh, hash_abcd, wk)
}
/// SHA256 schedule update accelerator, first part.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256su0))]
pub unsafe fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
vsha256su0q_u32_(w0_3, w4_7)
}
/// SHA256 schedule update accelerator, second part.
#[inline]
#[target_feature(enable = "crypto")]
#[cfg_attr(test, assert_instr(sha256su1))]
pub unsafe fn vsha256su1q_u32(
tw0_3: uint32x4_t,
w8_11: uint32x4_t,
w12_15: uint32x4_t,
) -> uint32x4_t {
vsha256su1q_u32_(tw0_3, w8_11, w12_15)
}
#[cfg(test)]
mod tests {
use crate::core_arch::{aarch64::*, simd::*};
use std::mem;
use stdarch_test::simd_test;
#[simd_test(enable = "crypto")]
unsafe fn test_vaeseq_u8() {
let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
let key = mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
let r: u8x16 = mem::transmute(vaeseq_u8(data, key));
assert_eq!(
r,
u8x16::new(
124, 123, 124, 118, 124, 123, 124, 197, 124, 123, 124, 118, 124, 123, 124, 197
)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vaesdq_u8() {
let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
let key = mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
let r: u8x16 = mem::transmute(vaesdq_u8(data, key));
assert_eq!(
r,
u8x16::new(9, 213, 9, 251, 9, 213, 9, 56, 9, 213, 9, 251, 9, 213, 9, 56)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vaesmcq_u8() {
let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
let r: u8x16 = mem::transmute(vaesmcq_u8(data));
assert_eq!(
r,
u8x16::new(3, 4, 9, 10, 15, 8, 21, 30, 3, 4, 9, 10, 15, 8, 21, 30)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vaesimcq_u8() {
let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
let r: u8x16 = mem::transmute(vaesimcq_u8(data));
assert_eq!(
r,
u8x16::new(43, 60, 33, 50, 103, 80, 125, 70, 43, 60, 33, 50, 103, 80, 125, 70)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha1h_u32() {
assert_eq!(vsha1h_u32(0x1234), 0x048d);
assert_eq!(vsha1h_u32(0x5678), 0x159e);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha1su0q_u32() {
let r: u32x4 = mem::transmute(vsha1su0q_u32(
mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
));
assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678));
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha1su1q_u32() {
let r: u32x4 = mem::transmute(vsha1su1q_u32(
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
));
assert_eq!(
r,
u32x4::new(0x00008898, 0x00019988, 0x00008898, 0x0000acd0)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha1cq_u32() {
let r: u32x4 = mem::transmute(vsha1cq_u32(
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
0x1234,
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
));
assert_eq!(
r,
u32x4::new(0x8a32cbd8, 0x0c518a96, 0x0018a081, 0x0000c168)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha1pq_u32() {
let r: u32x4 = mem::transmute(vsha1pq_u32(
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
0x1234,
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
));
assert_eq!(
r,
u32x4::new(0x469f0ba3, 0x0a326147, 0x80145d7f, 0x00009f47)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha1mq_u32() {
let r: u32x4 = mem::transmute(vsha1mq_u32(
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
0x1234,
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
));
assert_eq!(
r,
u32x4::new(0xaa39693b, 0x0d51bf84, 0x001aa109, 0x0000d278)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha256hq_u32() {
let r: u32x4 = mem::transmute(vsha256hq_u32(
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
));
assert_eq!(
r,
u32x4::new(0x05e9aaa8, 0xec5f4c02, 0x20a1ea61, 0x28738cef)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha256h2q_u32() {
let r: u32x4 = mem::transmute(vsha256h2q_u32(
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
));
assert_eq!(
r,
u32x4::new(0x3745362e, 0x2fb51d00, 0xbd4c529b, 0x968b8516)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha256su0q_u32() {
let r: u32x4 = mem::transmute(vsha256su0q_u32(
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
));
assert_eq!(
r,
u32x4::new(0xe59e1c97, 0x5eaf68da, 0xd7bcb51f, 0x6c8de152)
);
}
#[simd_test(enable = "crypto")]
unsafe fn test_vsha256su1q_u32() {
let r: u32x4 = mem::transmute(vsha256su1q_u32(
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
));
assert_eq!(
r,
u32x4::new(0x5e09e8d2, 0x74a6f16b, 0xc966606b, 0xa686ee9f)
);
}
}

View File

@ -0,0 +1,40 @@
//! AArch64 intrinsics.
//!
//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
//!
//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
mod v8;
pub use self::v8::*;
mod neon;
pub use self::neon::*;
mod crypto;
pub use self::crypto::*;
mod tme;
pub use self::tme::*;
mod crc;
pub use self::crc::*;
mod prefetch;
pub use self::prefetch::*;
pub use super::acle::*;
#[cfg(test)]
use stdarch_test::assert_instr;
/// Generates the trap instruction `BRK 1`
#[cfg_attr(test, assert_instr(brk))]
#[inline]
pub unsafe fn brk() -> ! {
crate::intrinsics::abort()
}
#[cfg(test)]
pub(crate) mod test_support;

View File

@ -0,0 +1,666 @@
// This code is automatically generated. DO NOT MODIFY.
//
// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
//
// ```
// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
// ```
use super::*;
#[cfg(test)]
use stdarch_test::assert_instr;
/// Compare bitwise Equal (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmeq))]
pub unsafe fn vceq_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
simd_eq(a, b)
}
/// Compare bitwise Equal (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmeq))]
pub unsafe fn vceqq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
simd_eq(a, b)
}
/// Compare bitwise Equal (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmeq))]
pub unsafe fn vceq_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
simd_eq(a, b)
}
/// Compare bitwise Equal (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmeq))]
pub unsafe fn vceqq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
simd_eq(a, b)
}
/// Compare bitwise Equal (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmeq))]
pub unsafe fn vceq_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
simd_eq(a, b)
}
/// Compare bitwise Equal (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmeq))]
pub unsafe fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
simd_eq(a, b)
}
/// Floating-point compare equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmeq))]
pub unsafe fn vceq_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
simd_eq(a, b)
}
/// Floating-point compare equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmeq))]
pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
simd_eq(a, b)
}
/// Compare signed greater than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
simd_gt(a, b)
}
/// Compare signed greater than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
simd_gt(a, b)
}
/// Compare unsigned highe
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmhi))]
pub unsafe fn vcgt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
simd_gt(a, b)
}
/// Compare unsigned highe
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmhi))]
pub unsafe fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
simd_gt(a, b)
}
/// Floating-point compare greater than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmgt))]
pub unsafe fn vcgt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
simd_gt(a, b)
}
/// Floating-point compare greater than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmgt))]
pub unsafe fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
simd_gt(a, b)
}
/// Compare signed less than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
simd_lt(a, b)
}
/// Compare signed less than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcltq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
simd_lt(a, b)
}
/// Compare unsigned less than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmhi))]
pub unsafe fn vclt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
simd_lt(a, b)
}
/// Compare unsigned less than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmhi))]
pub unsafe fn vcltq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
simd_lt(a, b)
}
/// Floating-point compare less than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmgt))]
pub unsafe fn vclt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
simd_lt(a, b)
}
/// Floating-point compare less than
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmgt))]
pub unsafe fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
simd_lt(a, b)
}
/// Compare signed less than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcle_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
simd_le(a, b)
}
/// Compare signed less than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
simd_le(a, b)
}
/// Compare unsigned less than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmhs))]
pub unsafe fn vcle_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
simd_le(a, b)
}
/// Compare unsigned less than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmhs))]
pub unsafe fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
simd_le(a, b)
}
/// Floating-point compare less than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmge))]
pub unsafe fn vcle_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
simd_le(a, b)
}
/// Floating-point compare less than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmge))]
pub unsafe fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
simd_le(a, b)
}
/// Compare signed greater than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
simd_ge(a, b)
}
/// Compare signed greater than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
simd_ge(a, b)
}
/// Compare unsigned greater than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmhs))]
pub unsafe fn vcge_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
simd_ge(a, b)
}
/// Compare unsigned greater than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmhs))]
pub unsafe fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
simd_ge(a, b)
}
/// Floating-point compare greater than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmge))]
pub unsafe fn vcge_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
simd_ge(a, b)
}
/// Floating-point compare greater than or equal
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmge))]
pub unsafe fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
simd_ge(a, b)
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul))]
pub unsafe fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
simd_mul(a, b)
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul))]
pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
simd_mul(a, b)
}
/// Subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fsub))]
pub unsafe fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
simd_sub(a, b)
}
/// Subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fsub))]
pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
simd_sub(a, b)
}
#[cfg(test)]
mod test {
use super::*;
use crate::core_arch::simd::*;
use std::mem::transmute;
use stdarch_test::simd_test;
#[simd_test(enable = "neon")]
unsafe fn test_vceq_u64() {
let a: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
let a: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vceqq_u64() {
let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0x01);
let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0x01);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vceq_s64() {
let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vceqq_s64() {
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01);
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF);
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vceq_p64() {
let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vceqq_p64() {
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01);
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF);
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vceq_f64() {
let a: f64 = 1.2;
let b: f64 = 1.2;
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vceq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vceqq_f64() {
let a: f64x2 = f64x2::new(1.2, 3.4);
let b: f64x2 = f64x2::new(1.2, 3.4);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vceqq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgt_s64() {
let a: i64x1 = i64x1::new(1);
let b: i64x1 = i64x1::new(0);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcgt_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtq_s64() {
let a: i64x2 = i64x2::new(1, 2);
let b: i64x2 = i64x2::new(0, 1);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcgtq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgt_u64() {
let a: u64x1 = u64x1::new(1);
let b: u64x1 = u64x1::new(0);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcgt_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtq_u64() {
let a: u64x2 = u64x2::new(1, 2);
let b: u64x2 = u64x2::new(0, 1);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcgtq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgt_f64() {
let a: f64 = 1.2;
let b: f64 = 0.1;
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcgt_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtq_f64() {
let a: f64x2 = f64x2::new(1.2, 2.3);
let b: f64x2 = f64x2::new(0.1, 1.2);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcgtq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclt_s64() {
let a: i64x1 = i64x1::new(0);
let b: i64x1 = i64x1::new(1);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vclt_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltq_s64() {
let a: i64x2 = i64x2::new(0, 1);
let b: i64x2 = i64x2::new(1, 2);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcltq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclt_u64() {
let a: u64x1 = u64x1::new(0);
let b: u64x1 = u64x1::new(1);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vclt_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltq_u64() {
let a: u64x2 = u64x2::new(0, 1);
let b: u64x2 = u64x2::new(1, 2);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcltq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclt_f64() {
let a: f64 = 0.1;
let b: f64 = 1.2;
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vclt_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltq_f64() {
let a: f64x2 = f64x2::new(0.1, 1.2);
let b: f64x2 = f64x2::new(1.2, 2.3);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcltq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcle_s64() {
let a: i64x1 = i64x1::new(0);
let b: i64x1 = i64x1::new(1);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcle_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcleq_s64() {
let a: i64x2 = i64x2::new(0, 1);
let b: i64x2 = i64x2::new(1, 2);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcleq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcle_u64() {
let a: u64x1 = u64x1::new(0);
let b: u64x1 = u64x1::new(1);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcle_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcleq_u64() {
let a: u64x2 = u64x2::new(0, 1);
let b: u64x2 = u64x2::new(1, 2);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcleq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcle_f64() {
let a: f64 = 0.1;
let b: f64 = 1.2;
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcle_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcleq_f64() {
let a: f64x2 = f64x2::new(0.1, 1.2);
let b: f64x2 = f64x2::new(1.2, 2.3);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcleq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcge_s64() {
let a: i64x1 = i64x1::new(1);
let b: i64x1 = i64x1::new(0);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcge_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgeq_s64() {
let a: i64x2 = i64x2::new(1, 2);
let b: i64x2 = i64x2::new(0, 1);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcgeq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcge_u64() {
let a: u64x1 = u64x1::new(1);
let b: u64x1 = u64x1::new(0);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcge_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgeq_u64() {
let a: u64x2 = u64x2::new(1, 2);
let b: u64x2 = u64x2::new(0, 1);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcgeq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcge_f64() {
let a: f64 = 1.2;
let b: f64 = 0.1;
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcge_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgeq_f64() {
let a: f64x2 = f64x2::new(1.2, 2.3);
let b: f64x2 = f64x2::new(0.1, 1.2);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcgeq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_f64() {
let a: f64 = 1.0;
let b: f64 = 2.0;
let e: f64 = 2.0;
let r: f64 = transmute(vmul_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(2.0, 3.0);
let e: f64x2 = f64x2::new(2.0, 6.0);
let r: f64x2 = transmute(vmulq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsub_f64() {
let a: f64 = 1.0;
let b: f64 = 1.0;
let e: f64 = 0.0;
let r: f64 = transmute(vsub_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubq_f64() {
let a: f64x2 = f64x2::new(1.0, 4.0);
let b: f64x2 = f64x2::new(1.0, 2.0);
let e: f64x2 = f64x2::new(0.0, 2.0);
let r: f64x2 = transmute(vsubq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
#[cfg(test)]
use stdarch_test::assert_instr;
extern "C" {
#[link_name = "llvm.prefetch"]
fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
}
/// See [`prefetch`](fn._prefetch.html).
pub const _PREFETCH_READ: i32 = 0;
/// See [`prefetch`](fn._prefetch.html).
pub const _PREFETCH_WRITE: i32 = 1;
/// See [`prefetch`](fn._prefetch.html).
pub const _PREFETCH_LOCALITY0: i32 = 0;
/// See [`prefetch`](fn._prefetch.html).
pub const _PREFETCH_LOCALITY1: i32 = 1;
/// See [`prefetch`](fn._prefetch.html).
pub const _PREFETCH_LOCALITY2: i32 = 2;
/// See [`prefetch`](fn._prefetch.html).
pub const _PREFETCH_LOCALITY3: i32 = 3;
/// Fetch the cache line that contains address `p` using the given `rw` and `locality`.
///
/// The `rw` must be one of:
///
/// * [`_PREFETCH_READ`](constant._PREFETCH_READ.html): the prefetch is preparing
/// for a read.
///
/// * [`_PREFETCH_WRITE`](constant._PREFETCH_WRITE.html): the prefetch is preparing
/// for a write.
///
/// The `locality` must be one of:
///
/// * [`_PREFETCH_LOCALITY0`](constant._PREFETCH_LOCALITY0.html): Streaming or
/// non-temporal prefetch, for data that is used only once.
///
/// * [`_PREFETCH_LOCALITY1`](constant._PREFETCH_LOCALITY1.html): Fetch into level 3 cache.
///
/// * [`_PREFETCH_LOCALITY2`](constant._PREFETCH_LOCALITY2.html): Fetch into level 2 cache.
///
/// * [`_PREFETCH_LOCALITY3`](constant._PREFETCH_LOCALITY3.html): Fetch into level 1 cache.
///
/// The prefetch memory instructions signal to the memory system that memory accesses
/// from a specified address are likely to occur in the near future. The memory system
/// can respond by taking actions that are expected to speed up the memory access when
/// they do occur, such as preloading the specified address into one or more caches.
/// Because these signals are only hints, it is valid for a particular CPU to treat
/// any or all prefetch instructions as a NOP.
///
///
/// [Arm's documentation](https://developer.arm.com/documentation/den0024/a/the-a64-instruction-set/memory-access-instructions/prefetching-memory?lang=en)
#[inline(always)]
#[cfg_attr(test, assert_instr("prfm pldl1strm", rw = _PREFETCH_READ, locality = _PREFETCH_LOCALITY0))]
#[cfg_attr(test, assert_instr("prfm pldl3keep", rw = _PREFETCH_READ, locality = _PREFETCH_LOCALITY1))]
#[cfg_attr(test, assert_instr("prfm pldl2keep", rw = _PREFETCH_READ, locality = _PREFETCH_LOCALITY2))]
#[cfg_attr(test, assert_instr("prfm pldl1keep", rw = _PREFETCH_READ, locality = _PREFETCH_LOCALITY3))]
#[cfg_attr(test, assert_instr("prfm pstl1strm", rw = _PREFETCH_WRITE, locality = _PREFETCH_LOCALITY0))]
#[cfg_attr(test, assert_instr("prfm pstl3keep", rw = _PREFETCH_WRITE, locality = _PREFETCH_LOCALITY1))]
#[cfg_attr(test, assert_instr("prfm pstl2keep", rw = _PREFETCH_WRITE, locality = _PREFETCH_LOCALITY2))]
#[cfg_attr(test, assert_instr("prfm pstl1keep", rw = _PREFETCH_WRITE, locality = _PREFETCH_LOCALITY3))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _prefetch(p: *const i8, rw: i32, locality: i32) {
// We use the `llvm.prefetch` instrinsic with `cache type` = 1 (data cache).
// `rw` and `strategy` are based on the function parameters.
macro_rules! pref {
($rdwr:expr, $local:expr) => {
match ($rdwr, $local) {
(0, 0) => prefetch(p, 0, 0, 1),
(0, 1) => prefetch(p, 0, 1, 1),
(0, 2) => prefetch(p, 0, 2, 1),
(0, 3) => prefetch(p, 0, 3, 1),
(1, 0) => prefetch(p, 1, 0, 1),
(1, 1) => prefetch(p, 1, 1, 1),
(1, 2) => prefetch(p, 1, 2, 1),
(1, 3) => prefetch(p, 1, 3, 1),
(_, _) => panic!(
"Illegal (rw, locality) pair in prefetch, value ({}, {}).",
$rdwr, $local
),
}
};
}
pref!(rw, locality);
}

View File

@ -0,0 +1,184 @@
use crate::core_arch::{aarch64::neon::*, arm::*, simd::*};
use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec};
macro_rules! V_u64 {
() => {
vec![
0x0000000000000000u64,
0x0101010101010101u64,
0x0202020202020202u64,
0x0F0F0F0F0F0F0F0Fu64,
0x8080808080808080u64,
0xF0F0F0F0F0F0F0F0u64,
0xFFFFFFFFFFFFFFFFu64,
]
};
}
macro_rules! V_f64 {
() => {
vec![
0.0f64,
1.0f64,
-1.0f64,
1.2f64,
2.4f64,
std::f64::MAX,
std::f64::MIN,
std::f64::INFINITY,
std::f64::NEG_INFINITY,
std::f64::NAN,
]
};
}
macro_rules! to64 {
($t : ident) => {
|v: $t| -> u64 { transmute(v) }
};
}
macro_rules! to128 {
($t : ident) => {
|v: $t| -> u128 { transmute(v) }
};
}
pub(crate) fn test<T, U, V, W, X>(
vals: Vec<T>,
fill1: fn(T) -> V,
fill2: fn(U) -> W,
cast: fn(W) -> X,
test_fun: fn(V, V) -> W,
verify_fun: fn(T, T) -> U,
) where
T: Copy + core::fmt::Debug,
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
V: Copy + core::fmt::Debug,
W: Copy + core::fmt::Debug,
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
{
let pairs = vals.iter().zip(vals.iter());
for (i, j) in pairs {
let a: V = fill1(*i);
let b: V = fill1(*j);
let actual_pre: W = test_fun(a, b);
let expected_pre: W = fill2(verify_fun(*i, *j));
let actual: X = cast(actual_pre);
let expected: X = cast(expected_pre);
assert_eq!(
actual, expected,
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
);
}
}
macro_rules! gen_test_fn {
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
unsafe {
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
};
}
};
}
macro_rules! gen_fill_fn {
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
pub(crate) fn $id(val: $in_t) -> $out_t {
let initial: [$in_t; $num_els] = [val; $num_els];
let result: $cmp_t = unsafe { transmute(initial) };
let result_out: $out_t = unsafe { transmute(result) };
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
result_out
}
};
}
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
gen_fill_fn!(fill_f64, 64, 1, f64, float64x1_t, u64);
gen_fill_fn!(fillq_f64, 64, 2, f64, float64x2_t, u128);
gen_fill_fn!(fill_p64, 64, 1, u64, poly64x1_t, u64);
gen_fill_fn!(fillq_p64, 64, 2, u64, poly64x2_t, u128);
gen_test_fn!(
test_ari_f64,
f64,
f64,
float64x1_t,
float64x1_t,
u64,
V_f64!(),
fill_f64,
fill_f64,
to64!(float64x1_t)
);
gen_test_fn!(
test_cmp_f64,
f64,
u64,
float64x1_t,
uint64x1_t,
u64,
V_f64!(),
fill_f64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_ari_f64,
f64,
f64,
float64x2_t,
float64x2_t,
u128,
V_f64!(),
fillq_f64,
fillq_f64,
to128!(float64x2_t)
);
gen_test_fn!(
testq_cmp_f64,
f64,
u64,
float64x2_t,
uint64x2_t,
u128,
V_f64!(),
fillq_f64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
test_cmp_p64,
u64,
u64,
poly64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_p64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_cmp_p64,
u64,
u64,
poly64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_p64,
fillq_u64,
to128!(uint64x2_t)
);

View File

@ -0,0 +1,183 @@
//! ARM's Transactional Memory Extensions (TME).
//!
//! This CPU feature is available on Aarch64 - A architecture profile.
//! This feature is in the non-neon feature set. TME specific vendor documentation can
//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
//!
//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
//!
//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
//!
//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
#[cfg(test)]
use stdarch_test::assert_instr;
extern "C" {
#[link_name = "llvm.aarch64.tstart"]
fn aarch64_tstart() -> u64;
#[link_name = "llvm.aarch64.tcommit"]
fn aarch64_tcommit() -> ();
#[link_name = "llvm.aarch64.tcancel"]
fn aarch64_tcancel(imm0: u64) -> ();
#[link_name = "llvm.aarch64.ttest"]
fn aarch64_ttest() -> u64;
}
/// Transaction successfully started.
pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
/// Extraction mask for failure reason
pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
/// Transaction retry is possible.
pub const _TMFAILURE_RTRY: u64 = 1 << 15;
/// Transaction executed a TCANCEL instruction
pub const _TMFAILURE_CNCL: u64 = 1 << 16;
/// Transaction aborted because a conflict occurred
pub const _TMFAILURE_MEM: u64 = 1 << 17;
/// Fallback error type for any other reason
pub const _TMFAILURE_IMP: u64 = 1 << 18;
/// Transaction aborted because a non-permissible operation was attempted
pub const _TMFAILURE_ERR: u64 = 1 << 19;
/// Transaction aborted due to read or write set limit was exceeded
pub const _TMFAILURE_SIZE: u64 = 1 << 20;
/// Transaction aborted due to transactional nesting level was exceeded
pub const _TMFAILURE_NEST: u64 = 1 << 21;
/// Transaction aborted due to a debug trap.
pub const _TMFAILURE_DBG: u64 = 1 << 22;
/// Transaction failed from interrupt
pub const _TMFAILURE_INT: u64 = 1 << 23;
/// Indicates a TRIVIAL version of TM is available
pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
/// Starts a new transaction. When the transaction starts successfully the return value is 0.
/// If the transaction fails, all state modifications are discarded and a cause of the failure
/// is encoded in the return value.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(test, assert_instr(tstart))]
pub unsafe fn __tstart() -> u64 {
aarch64_tstart()
}
/// Commits the current transaction. For a nested transaction, the only effect is that the
/// transactional nesting depth is decreased. For an outer transaction, the state modifications
/// performed transactionally are committed to the architectural state.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(test, assert_instr(tcommit))]
pub unsafe fn __tcommit() {
aarch64_tcommit()
}
/// Cancels the current transaction and discards all state modifications that were performed transactionally.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(test, assert_instr(tcancel, imm0 = 0x0))]
#[rustc_args_required_const(0)]
pub unsafe fn __tcancel(imm0: u64) {
macro_rules! call {
($imm0:expr) => {
aarch64_tcancel($imm0)
};
}
constify_imm8!(imm0, call)
}
/// Tests if executing inside a transaction. If no transaction is currently executing,
/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(test, assert_instr(ttest))]
pub unsafe fn __ttest() -> u64 {
aarch64_ttest()
}
#[cfg(test)]
mod tests {
use stdarch_test::simd_test;
use crate::core_arch::aarch64::*;
const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
#[simd_test(enable = "tme")]
unsafe fn test_tstart() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
break;
}
assert_eq!(x, 0);
}
}
#[simd_test(enable = "tme")]
unsafe fn test_tcommit() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
tme::__tcommit();
}
assert_eq!(x, i + 1);
}
}
#[simd_test(enable = "tme")]
unsafe fn test_tcancel() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
tme::__tcancel(CANCEL_CODE);
break;
}
}
assert_eq!(x, 0);
}
#[simd_test(enable = "tme")]
unsafe fn test_ttest() {
for _ in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
if tme::__ttest() == 2 {
tme::__tcancel(CANCEL_CODE);
break;
}
}
}
}
}

View File

@ -0,0 +1,104 @@
//! ARMv8 intrinsics.
//!
//! The reference is [ARMv8-A Reference Manual][armv8].
//!
//! [armv8]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.
//! ddi0487a.k_10775/index.html
#[cfg(test)]
use stdarch_test::assert_instr;
/// Reverse the order of the bytes.
#[inline]
#[cfg_attr(test, assert_instr(rev))]
pub unsafe fn _rev_u64(x: u64) -> u64 {
x.swap_bytes() as u64
}
/// Count Leading Zeros.
#[inline]
#[cfg_attr(test, assert_instr(clz))]
pub unsafe fn _clz_u64(x: u64) -> u64 {
x.leading_zeros() as u64
}
/// Reverse the bit order.
#[inline]
#[cfg_attr(test, assert_instr(rbit))]
pub unsafe fn _rbit_u64(x: u64) -> u64 {
crate::intrinsics::bitreverse(x)
}
/// Counts the leading most significant bits set.
///
/// When all bits of the operand are set it returns the size of the operand in
/// bits.
#[inline]
#[cfg_attr(test, assert_instr(cls))]
pub unsafe fn _cls_u32(x: u32) -> u32 {
u32::leading_zeros((((((x as i32) >> 31) as u32) ^ x) << 1) | 1) as u32
}
/// Counts the leading most significant bits set.
///
/// When all bits of the operand are set it returns the size of the operand in
/// bits.
#[inline]
#[cfg_attr(test, assert_instr(cls))]
pub unsafe fn _cls_u64(x: u64) -> u64 {
u64::leading_zeros((((((x as i64) >> 63) as u64) ^ x) << 1) | 1) as u64
}
#[cfg(test)]
mod tests {
use crate::core_arch::aarch64::v8;
#[test]
fn _rev_u64() {
unsafe {
assert_eq!(
v8::_rev_u64(0b0000_0000_1111_1111_0000_0000_1111_1111_u64),
0b1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
);
}
}
#[test]
fn _clz_u64() {
unsafe {
assert_eq!(v8::_clz_u64(0b0000_1010u64), 60u64);
}
}
#[test]
fn _rbit_u64() {
unsafe {
assert_eq!(
v8::_rbit_u64(0b0000_0000_1111_1101_0000_0000_1111_1111_u64),
0b1111_1111_0000_0000_1011_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
);
}
}
#[test]
fn _cls_u32() {
unsafe {
assert_eq!(
v8::_cls_u32(0b1111_1111_1111_1111_0000_0000_1111_1111_u32),
15_u32
);
}
}
#[test]
fn _cls_u64() {
unsafe {
assert_eq!(
v8::_cls_u64(
0b1111_1111_1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_u64
),
15_u64
);
}
}
}

View File

@ -0,0 +1,14 @@
//! Access types available on all architectures
/// Full system is the required shareability domain, reads and writes are the
/// required access types
pub struct SY;
dmb_dsb!(SY);
impl super::super::sealed::Isb for SY {
#[inline(always)]
unsafe fn __isb(&self) {
super::isb(super::arg::SY)
}
}

View File

@ -0,0 +1,27 @@
// Reference: ARM11 MPCore Processor Technical Reference Manual (ARM DDI 0360E) Section 3.5 "Summary
// of CP15 instructions"
/// Full system is the required shareability domain, reads and writes are the
/// required access types
pub struct SY;
impl super::super::sealed::Dmb for SY {
#[inline(always)]
unsafe fn __dmb(&self) {
llvm_asm!("mcr p15, 0, r0, c7, c10, 5" : : : "memory" : "volatile")
}
}
impl super::super::sealed::Dsb for SY {
#[inline(always)]
unsafe fn __dsb(&self) {
llvm_asm!("mcr p15, 0, r0, c7, c10, 4" : : : "memory" : "volatile")
}
}
impl super::super::sealed::Isb for SY {
#[inline(always)]
unsafe fn __isb(&self) {
llvm_asm!("mcr p15, 0, r0, c7, c5, 4" : : : "memory" : "volatile")
}
}

View File

@ -0,0 +1,154 @@
// Reference: Section 7.4 "Hints" of ACLE
// CP15 instruction
#[cfg(not(any(
// v8
target_arch = "aarch64",
// v7
target_feature = "v7",
// v6-M
target_feature = "mclass"
)))]
mod cp15;
#[cfg(not(any(
target_arch = "aarch64",
target_feature = "v7",
target_feature = "mclass"
)))]
pub use self::cp15::*;
// Dedicated instructions
#[cfg(any(
target_arch = "aarch64",
target_feature = "v7",
target_feature = "mclass"
))]
macro_rules! dmb_dsb {
($A:ident) => {
impl super::super::sealed::Dmb for $A {
#[inline(always)]
unsafe fn __dmb(&self) {
super::dmb(super::arg::$A)
}
}
impl super::super::sealed::Dsb for $A {
#[inline(always)]
unsafe fn __dsb(&self) {
super::dsb(super::arg::$A)
}
}
};
}
#[cfg(any(
target_arch = "aarch64",
target_feature = "v7",
target_feature = "mclass"
))]
mod common;
#[cfg(any(
target_arch = "aarch64",
target_feature = "v7",
target_feature = "mclass"
))]
pub use self::common::*;
#[cfg(any(target_arch = "aarch64", target_feature = "v7",))]
mod not_mclass;
#[cfg(any(target_arch = "aarch64", target_feature = "v7",))]
pub use self::not_mclass::*;
#[cfg(target_arch = "aarch64")]
mod v8;
#[cfg(target_arch = "aarch64")]
pub use self::v8::*;
/// Generates a DMB (data memory barrier) instruction or equivalent CP15 instruction.
///
/// DMB ensures the observed ordering of memory accesses. Memory accesses of the specified type
/// issued before the DMB are guaranteed to be observed (in the specified scope) before memory
/// accesses issued after the DMB.
///
/// For example, DMB should be used between storing data, and updating a flag variable that makes
/// that data available to another core.
///
/// The __dmb() intrinsic also acts as a compiler memory barrier of the appropriate type.
#[inline(always)]
pub unsafe fn __dmb<A>(arg: A)
where
A: super::sealed::Dmb,
{
arg.__dmb()
}
/// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction.
///
/// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has
/// additional properties. After a DSB instruction completes, all memory accesses of the specified
/// type issued before the DSB are guaranteed to have completed.
///
/// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type.
#[inline(always)]
pub unsafe fn __dsb<A>(arg: A)
where
A: super::sealed::Dsb,
{
arg.__dsb()
}
/// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15
/// instruction.
///
/// This instruction flushes the processor pipeline fetch buffers, so that following instructions
/// are fetched from cache or memory.
///
/// An ISB is needed after some system maintenance operations. An ISB is also needed before
/// transferring control to code that has been loaded or modified in memory, for example by an
/// overlay mechanism or just-in-time code generator. (Note that if instruction and data caches are
/// separate, privileged cache maintenance operations would be needed in order to unify the caches.)
///
/// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full
/// system) scope of the ISB instruction.
#[inline(always)]
pub unsafe fn __isb<A>(arg: A)
where
A: super::sealed::Isb,
{
arg.__isb()
}
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dmb")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")]
fn dmb(_: i32);
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dsb")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")]
fn dsb(_: i32);
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.isb")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")]
fn isb(_: i32);
}
// we put these in a module to prevent weirdness with glob re-exports
mod arg {
// See Section 7.3 Memory barriers of ACLE
pub const SY: i32 = 15;
pub const ST: i32 = 14;
pub const LD: i32 = 13;
pub const ISH: i32 = 11;
pub const ISHST: i32 = 10;
pub const ISHLD: i32 = 9;
pub const NSH: i32 = 7;
pub const NSHST: i32 = 6;
pub const NSHLD: i32 = 5;
pub const OSH: i32 = 3;
pub const OSHST: i32 = 2;
pub const OSHLD: i32 = 1;
}

View File

@ -0,0 +1,43 @@
//! Access types available on v7 and v8 but not on v7(E)-M or v8-M
/// Full system is the required shareability domain, writes are the required
/// access type
pub struct ST;
dmb_dsb!(ST);
/// Inner Shareable is the required shareability domain, reads and writes are
/// the required access types
pub struct ISH;
dmb_dsb!(ISH);
/// Inner Shareable is the required shareability domain, writes are the required
/// access type
pub struct ISHST;
dmb_dsb!(ISHST);
/// Non-shareable is the required shareability domain, reads and writes are the
/// required access types
pub struct NSH;
dmb_dsb!(NSH);
/// Non-shareable is the required shareability domain, writes are the required
/// access type
pub struct NSHST;
dmb_dsb!(NSHST);
/// Outer Shareable is the required shareability domain, reads and writes are
/// the required access types
pub struct OSH;
dmb_dsb!(OSH);
/// Outer Shareable is the required shareability domain, writes are the required
/// access type
pub struct OSHST;
dmb_dsb!(OSHST);

View File

@ -0,0 +1,23 @@
/// Full system is the required shareability domain, reads are the required
/// access type
pub struct LD;
dmb_dsb!(LD);
/// Inner Shareable is the required shareability domain, reads are the required
/// access type
pub struct ISHLD;
dmb_dsb!(ISHLD);
/// Non-shareable is the required shareability domain, reads are the required
/// access type
pub struct NSHLD;
dmb_dsb!(NSHLD);
/// Outher Shareable is the required shareability domain, reads are the required
/// access type
pub struct OSHLD;
dmb_dsb!(OSHLD);

View File

@ -0,0 +1,384 @@
//! # References:
//!
//! - Section 8.3 "16-bit multiplications"
//!
//! Intrinsics that could live here:
//!
//! - \[x\] __smulbb
//! - \[x\] __smulbt
//! - \[x\] __smultb
//! - \[x\] __smultt
//! - \[x\] __smulwb
//! - \[x\] __smulwt
//! - \[x\] __qadd
//! - \[x\] __qsub
//! - \[x\] __qdbl
//! - \[x\] __smlabb
//! - \[x\] __smlabt
//! - \[x\] __smlatb
//! - \[x\] __smlatt
//! - \[x\] __smlawb
//! - \[x\] __smlawt
#[cfg(test)]
use stdarch_test::assert_instr;
use crate::mem::transmute;
types! {
/// ARM-specific 32-bit wide vector of two packed `i16`.
pub struct int16x2_t(i16, i16);
/// ARM-specific 32-bit wide vector of two packed `u16`.
pub struct uint16x2_t(u16, u16);
}
extern "C" {
#[link_name = "llvm.arm.smulbb"]
fn arm_smulbb(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smulbt"]
fn arm_smulbt(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smultb"]
fn arm_smultb(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smultt"]
fn arm_smultt(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smulwb"]
fn arm_smulwb(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smulwt"]
fn arm_smulwt(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qadd"]
fn arm_qadd(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qsub"]
fn arm_qsub(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smlabb"]
fn arm_smlabb(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlabt"]
fn arm_smlabt(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlatb"]
fn arm_smlatb(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlatt"]
fn arm_smlatt(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlawb"]
fn arm_smlawb(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlawt"]
fn arm_smlawt(a: i32, b: i32, c: i32) -> i32;
}
/// Insert a SMULBB instruction
///
/// Returns the equivalent of a\[0\] * b\[0\]
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
#[inline]
#[cfg_attr(test, assert_instr(smulbb))]
pub unsafe fn __smulbb(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smulbb(transmute(a), transmute(b))
}
/// Insert a SMULTB instruction
///
/// Returns the equivalent of a\[0\] * b\[1\]
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
#[inline]
#[cfg_attr(test, assert_instr(smultb))]
pub unsafe fn __smultb(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smultb(transmute(a), transmute(b))
}
/// Insert a SMULTB instruction
///
/// Returns the equivalent of a\[1\] * b\[0\]
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
#[inline]
#[cfg_attr(test, assert_instr(smulbt))]
pub unsafe fn __smulbt(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smulbt(transmute(a), transmute(b))
}
/// Insert a SMULTT instruction
///
/// Returns the equivalent of a\[1\] * b\[1\]
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
#[inline]
#[cfg_attr(test, assert_instr(smultt))]
pub unsafe fn __smultt(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smultt(transmute(a), transmute(b))
}
/// Insert a SMULWB instruction
///
/// Multiplies the 32-bit signed first operand with the low halfword
/// (as a 16-bit signed integer) of the second operand.
/// Return the top 32 bits of the 48-bit product
#[inline]
#[cfg_attr(test, assert_instr(smulwb))]
pub unsafe fn __smulwb(a: int16x2_t, b: i32) -> i32 {
arm_smulwb(transmute(a), b)
}
/// Insert a SMULWT instruction
///
/// Multiplies the 32-bit signed first operand with the high halfword
/// (as a 16-bit signed integer) of the second operand.
/// Return the top 32 bits of the 48-bit product
#[inline]
#[cfg_attr(test, assert_instr(smulwt))]
pub unsafe fn __smulwt(a: int16x2_t, b: i32) -> i32 {
arm_smulwt(transmute(a), b)
}
/// Signed saturating addition
///
/// Returns the 32-bit saturating signed equivalent of a + b.
/// Sets the Q flag if saturation occurs.
#[inline]
#[cfg_attr(test, assert_instr(qadd))]
pub unsafe fn __qadd(a: i32, b: i32) -> i32 {
arm_qadd(a, b)
}
/// Signed saturating subtraction
///
/// Returns the 32-bit saturating signed equivalent of a - b.
/// Sets the Q flag if saturation occurs.
#[inline]
#[cfg_attr(test, assert_instr(qsub))]
pub unsafe fn __qsub(a: i32, b: i32) -> i32 {
arm_qsub(a, b)
}
/// Insert a QADD instruction
///
/// Returns the 32-bit saturating signed equivalent of a + a
/// Sets the Q flag if saturation occurs.
#[inline]
#[cfg_attr(test, assert_instr(qadd))]
pub unsafe fn __qdbl(a: i32) -> i32 {
arm_qadd(a, a)
}
/// Insert a SMLABB instruction
///
/// Returns the equivalent of a\[0\] * b\[0\] + c
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlabb))]
pub unsafe fn __smlabb(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
arm_smlabb(transmute(a), transmute(b), c)
}
/// Insert a SMLABT instruction
///
/// Returns the equivalent of a\[0\] * b\[1\] + c
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlabt))]
pub unsafe fn __smlabt(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
arm_smlabt(transmute(a), transmute(b), c)
}
/// Insert a SMLATB instruction
///
/// Returns the equivalent of a\[1\] * b\[0\] + c
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlatb))]
pub unsafe fn __smlatb(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
arm_smlatb(transmute(a), transmute(b), c)
}
/// Insert a SMLATT instruction
///
/// Returns the equivalent of a\[1\] * b\[1\] + c
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlatt))]
pub unsafe fn __smlatt(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
arm_smlatt(transmute(a), transmute(b), c)
}
/// Insert a SMLAWB instruction
///
/// Returns the equivalent of (a * b\[0\] + (c << 16)) >> 16
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlawb))]
pub unsafe fn __smlawb(a: i32, b: int16x2_t, c: i32) -> i32 {
arm_smlawb(a, transmute(b), c)
}
/// Insert a SMLAWT instruction
///
/// Returns the equivalent of (a * b\[1\] + (c << 16)) >> 16
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
/// Sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smlawt))]
pub unsafe fn __smlawt(a: i32, b: int16x2_t, c: i32) -> i32 {
arm_smlawt(a, transmute(b), c)
}
#[cfg(test)]
mod tests {
use crate::core_arch::{
arm::*,
simd::{i16x2, i8x4, u8x4},
};
use std::mem::transmute;
use stdarch_test::simd_test;
#[test]
fn smulbb() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
assert_eq!(super::__smulbb(transmute(a), transmute(b)), 10 * 30);
}
}
#[test]
fn smulbt() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
assert_eq!(super::__smulbt(transmute(a), transmute(b)), 10 * 40);
}
}
#[test]
fn smultb() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
assert_eq!(super::__smultb(transmute(a), transmute(b)), 20 * 30);
}
}
#[test]
fn smultt() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
assert_eq!(super::__smultt(transmute(a), transmute(b)), 20 * 40);
}
}
#[test]
fn smulwb() {
unsafe {
let a = i16x2::new(10, 20);
let b = 30;
assert_eq!(super::__smulwb(transmute(a), b), 20 * b);
}
}
#[test]
fn smulwt() {
unsafe {
let a = i16x2::new(10, 20);
let b = 30;
assert_eq!(super::__smulwt(transmute(a), b), (10 * b) >> 16);
}
}
#[test]
fn qadd() {
unsafe {
assert_eq!(super::__qadd(-10, 60), 50);
assert_eq!(super::__qadd(i32::MAX, 10), i32::MAX);
assert_eq!(super::__qadd(i32::MIN, -10), i32::MIN);
}
}
#[test]
fn qsub() {
unsafe {
assert_eq!(super::__qsub(10, 60), -50);
assert_eq!(super::__qsub(i32::MAX, -10), i32::MAX);
assert_eq!(super::__qsub(i32::MIN, 10), i32::MIN);
}
}
fn qdbl() {
unsafe {
assert_eq!(super::__qdbl(10), 20);
assert_eq!(super::__qdbl(i32::MAX), i32::MAX);
}
}
fn smlabb() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
let c = 50;
let r = (10 * 30) + c;
assert_eq!(super::__smlabb(transmute(a), transmute(b), c), r);
}
}
fn smlabt() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
let c = 50;
let r = (10 * 40) + c;
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
}
}
fn smlatb() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
let c = 50;
let r = (20 * 30) + c;
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
}
}
fn smlatt() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(30, 40);
let c = 50;
let r = (20 * 40) + c;
assert_eq!(super::__smlatt(transmute(a), transmute(b), c), r);
}
}
fn smlawb() {
unsafe {
let a: i32 = 10;
let b = i16x2::new(30, 40);
let c: i32 = 50;
let r: i32 = ((a * 30) + (c << 16)) >> 16;
assert_eq!(super::__smlawb(a, transmute(b), c), r);
}
}
fn smlawt() {
unsafe {
let a: i32 = 10;
let b = i16x2::new(30, 40);
let c: i32 = 50;
let r: i32 = ((a * 40) + (c << 16)) >> 16;
assert_eq!(super::__smlawt(a, transmute(b), c), r);
}
}
}

View File

@ -0,0 +1,117 @@
// Reference: Section 5.4.4 "LDREX / STREX" of ACLE
/// Removes the exclusive lock created by LDREX
// Supported: v6, v6K, v7-M, v7-A, v7-R
// Not supported: v5, v6-M
// NOTE: there's no dedicated CLREX instruction in v6 (<v6k); to clear the exclusive monitor users
// have to do a dummy STREX operation
#[cfg(any(
all(target_feature = "v6k", not(target_feature = "mclass")), // excludes v6-M
all(target_feature = "v7", target_feature = "mclass"), // v7-M
))]
pub unsafe fn __clrex() {
extern "C" {
#[link_name = "llvm.arm.clrex"]
fn clrex();
}
clrex()
}
/// Executes a exclusive LDR instruction for 8 bit value.
// Supported: v6K, v7-M, v7-A, v7-R
// Not supported: v5, v6, v6-M
#[cfg(
target_feature = "v6k", // includes v7-M but excludes v6-M
)]
pub unsafe fn __ldrexb(p: *const u8) -> u8 {
extern "C" {
#[link_name = "llvm.arm.ldrex.p0i8"]
fn ldrex8(p: *const u8) -> u32;
}
ldrex8(p) as u8
}
/// Executes a exclusive LDR instruction for 16 bit value.
// Supported: v6K, v7-M, v7-A, v7-R, v8
// Not supported: v5, v6, v6-M
#[cfg(
target_feature = "v6k", // includes v7-M but excludes v6-M
)]
pub unsafe fn __ldrexh(p: *const u16) -> u16 {
extern "C" {
#[link_name = "llvm.arm.ldrex.p0i16"]
fn ldrex16(p: *const u16) -> u32;
}
ldrex16(p) as u16
}
/// Executes a exclusive LDR instruction for 32 bit value.
// Supported: v6, v7-M, v6K, v7-A, v7-R, v8
// Not supported: v5, v6-M
#[cfg(any(
all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M
all(target_feature = "v7", target_feature = "mclass"), // v7-M
))]
pub unsafe fn __ldrex(p: *const u32) -> u32 {
extern "C" {
#[link_name = "llvm.arm.ldrex.p0i32"]
fn ldrex32(p: *const u32) -> u32;
}
ldrex32(p)
}
/// Executes a exclusive STR instruction for 8 bit values
///
/// Returns `0` if the operation succeeded, or `1` if it failed
// supported: v6K, v7-M, v7-A, v7-R
// Not supported: v5, v6, v6-M
#[cfg(
target_feature = "v6k", // includes v7-M but excludes v6-M
)]
pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 {
extern "C" {
#[link_name = "llvm.arm.strex.p0i8"]
fn strex8(value: u32, addr: *mut u8) -> u32;
}
strex8(value, addr)
}
/// Executes a exclusive STR instruction for 16 bit values
///
/// Returns `0` if the operation succeeded, or `1` if it failed
// Supported: v6K, v7-M, v7-A, v7-R, v8
// Not supported: v5, v6, v6-M
#[cfg(
target_feature = "v6k", // includes v7-M but excludes v6-M
)]
pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 {
extern "C" {
#[link_name = "llvm.arm.strex.p0i16"]
fn strex16(value: u32, addr: *mut u16) -> u32;
}
strex16(value as u32, addr)
}
/// Executes a exclusive STR instruction for 32 bit values
///
/// Returns `0` if the operation succeeded, or `1` if it failed
// Supported: v6, v7-M, v6K, v7-A, v7-R, v8
// Not supported: v5, v6-M
#[cfg(any(
all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M
all(target_feature = "v7", target_feature = "mclass"), // v7-M
))]
pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 {
extern "C" {
#[link_name = "llvm.arm.strex.p0i32"]
fn strex32(value: u32, addr: *mut u32) -> u32;
}
strex32(value, addr)
}

View File

@ -0,0 +1,135 @@
// # References
//
// - Section 7.4 "Hints" of ACLE
// - Section 7.7 "NOP" of ACLE
/// Generates a WFI (wait for interrupt) hint instruction, or nothing.
///
/// The WFI instruction allows (but does not require) the processor to enter a
/// low-power state until one of a number of asynchronous events occurs.
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
// LLVM says "instruction requires: armv6k"
#[cfg(any(target_feature = "v6", target_arch = "aarch64"))]
#[inline(always)]
pub unsafe fn __wfi() {
hint(HINT_WFI);
}
/// Generates a WFE (wait for event) hint instruction, or nothing.
///
/// The WFE instruction allows (but does not require) the processor to enter a
/// low-power state until some event occurs such as a SEV being issued by
/// another processor.
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
// LLVM says "instruction requires: armv6k"
#[cfg(any(target_feature = "v6", target_arch = "aarch64"))]
#[inline(always)]
pub unsafe fn __wfe() {
hint(HINT_WFE);
}
/// Generates a SEV (send a global event) hint instruction.
///
/// This causes an event to be signaled to all processors in a multiprocessor
/// system. It is a NOP on a uniprocessor system.
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M
// LLVM says "instruction requires: armv6k"
#[cfg(any(target_feature = "v6", target_arch = "aarch64"))]
#[inline(always)]
pub unsafe fn __sev() {
hint(HINT_SEV);
}
/// Generates a send a local event hint instruction.
///
/// This causes an event to be signaled to only the processor executing this
/// instruction. In a multiprocessor system, it is not required to affect the
/// other processors.
// LLVM says "instruction requires: armv8"
#[cfg(any(
target_feature = "v8", // 32-bit ARMv8
target_arch = "aarch64", // AArch64
))]
#[inline(always)]
pub unsafe fn __sevl() {
hint(HINT_SEVL);
}
/// Generates a YIELD hint instruction.
///
/// This enables multithreading software to indicate to the hardware that it is
/// performing a task, for example a spin-lock, that could be swapped out to
/// improve overall system performance.
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
// LLVM says "instruction requires: armv6k"
#[cfg(any(target_feature = "v6", target_arch = "aarch64"))]
#[inline(always)]
pub unsafe fn __yield() {
hint(HINT_YIELD);
}
/// Generates a DBG instruction.
///
/// This provides a hint to debugging and related systems. The argument must be
/// a constant integer from 0 to 15 inclusive. See implementation documentation
/// for the effect (if any) of this instruction and the meaning of the
/// argument. This is available only when compliling for AArch32.
// Section 10.1 of ACLE says that the supported arches are: 7, 7-M
// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and
// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A
// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2
// "Thumb instruction set support"
#[cfg(target_feature = "v7")]
#[inline(always)]
#[rustc_args_required_const(0)]
pub unsafe fn __dbg(imm4: u32) {
macro_rules! call {
($imm4:expr) => {
llvm_asm!(concat!("DBG ", stringify!($imm4)) : : : : "volatile")
}
}
match imm4 & 0b1111 {
0 => call!(0),
1 => call!(1),
2 => call!(2),
3 => call!(3),
4 => call!(4),
5 => call!(5),
6 => call!(6),
7 => call!(7),
8 => call!(8),
9 => call!(9),
10 => call!(10),
11 => call!(11),
12 => call!(12),
13 => call!(13),
14 => call!(14),
_ => call!(15),
}
}
/// Generates an unspecified no-op instruction.
///
/// Note that not all architectures provide a distinguished NOP instruction. On
/// those that do, it is unspecified whether this intrinsic generates it or
/// another instruction. It is not guaranteed that inserting this instruction
/// will increase execution time.
#[inline(always)]
pub unsafe fn __nop() {
llvm_asm!("NOP" : : : : "volatile")
}
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
fn hint(_: i32);
}
// from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td
const HINT_NOP: i32 = 0;
const HINT_YIELD: i32 = 1;
const HINT_WFE: i32 = 2;
const HINT_WFI: i32 = 3;
const HINT_SEV: i32 = 4;
const HINT_SEVL: i32 = 5;

View File

@ -0,0 +1,158 @@
//! ARM C Language Extensions (ACLE)
//!
//! # Developer notes
//!
//! Below is a list of built-in targets that are representative of the different ARM
//! architectures; the list includes the `target_feature`s they possess.
//!
//! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t`
//! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te`
//! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6`
//! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass`
//! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass`
//! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass`
//! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
//! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass`
//! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
//! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass`
//! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon`
//!
//! Section 10.1 of ACLE says:
//!
//! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes
//! its predecessor instruction set."
//!
//! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes
//! its predecessor instruction set."
//!
//! From that info and from looking at how LLVM features work (using custom targets) we can identify
//! features that are subsets of others:
//!
//! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is
//! enabled as well.
//!
//! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8`
//! - `v6 < v8m < v6t2`
//! - `v7 < v8m.main`
//!
//! *NOTE*: Section 5.4.7 of ACLE says:
//!
//! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the
//! intrinsics defined in Saturating intrinsics are available."
//!
//! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te
//! targets so we have to work around this difference.
//!
//! # References
//!
//! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest)
// 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported
// via CP15 instructions. See Section 10.1 of ACLE
mod barrier;
pub use self::barrier::*;
mod hints;
pub use self::hints::*;
mod registers;
pub use self::registers::*;
mod ex;
pub use self::ex::*;
// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD)
// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see
// section 5.4.7)
// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on
// '+v5te' rather than on '+dsp'
#[cfg(all(
not(target_arch = "aarch64"),
any(
// >= v5TE but excludes v7-M
all(target_feature = "v5te", not(target_feature = "mclass")),
// v7E-M
all(target_feature = "mclass", target_feature = "dsp"),
)
))]
mod dsp;
#[cfg(all(
not(target_arch = "aarch64"),
any(
all(target_feature = "v5te", not(target_feature = "mclass")),
all(target_feature = "mclass", target_feature = "dsp"),
)
))]
pub use self::dsp::*;
// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT)
#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))]
mod sat;
#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))]
pub use self::sat::*;
// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says
// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated
#[cfg(all(
not(target_arch = "aarch64"),
any(
// v7-A, v7-R
all(target_feature = "v6", not(target_feature = "mclass")),
// v7E-M
all(target_feature = "mclass", target_feature = "dsp")
)
))]
mod simd32;
#[cfg(all(
not(target_arch = "aarch64"),
any(
all(target_feature = "v6", not(target_feature = "mclass")),
all(target_feature = "mclass", target_feature = "dsp")
)
))]
pub use self::simd32::*;
mod sealed {
pub trait Dmb {
unsafe fn __dmb(&self);
}
pub trait Dsb {
unsafe fn __dsb(&self);
}
pub trait Isb {
unsafe fn __isb(&self);
}
pub trait Rsr {
unsafe fn __rsr(&self) -> u32;
}
pub trait Rsr64 {
unsafe fn __rsr64(&self) -> u64;
}
pub trait Rsrp {
unsafe fn __rsrp(&self) -> *const u8;
}
pub trait Wsr {
unsafe fn __wsr(&self, value: u32);
}
pub trait Wsr64 {
unsafe fn __wsr64(&self, value: u64);
}
pub trait Wsrp {
unsafe fn __wsrp(&self, value: *const u8);
}
}

View File

@ -0,0 +1,9 @@
/// Application Program Status Register
pub struct APSR;
// Note (@Lokathor): Because this breaks the use of Rust on the Game Boy
// Advance, this change must be reverted until Rust learns to handle cpu state
// properly. See also: https://github.com/rust-lang/stdarch/issues/702
//#[cfg(any(not(target_feature = "thumb-state"), target_feature = "v6t2"))]
//rsr!(APSR);

View File

@ -0,0 +1,121 @@
#[allow(unused_macros)]
macro_rules! rsr {
($R:ident) => {
impl super::super::sealed::Rsr for $R {
unsafe fn __rsr(&self) -> u32 {
let r: u32;
llvm_asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
r
}
}
};
}
#[allow(unused_macros)]
macro_rules! rsrp {
($R:ident) => {
impl super::super::sealed::Rsrp for $R {
unsafe fn __rsrp(&self) -> *const u8 {
let r: *const u8;
llvm_asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
r
}
}
};
}
#[allow(unused_macros)]
macro_rules! wsr {
($R:ident) => {
impl super::super::sealed::Wsr for $R {
unsafe fn __wsr(&self, value: u32) {
llvm_asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
}
}
};
}
#[allow(unused_macros)]
macro_rules! wsrp {
($R:ident) => {
impl super::super::sealed::Wsrp for $R {
unsafe fn __wsrp(&self, value: *const u8) {
llvm_asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
}
}
};
}
#[cfg(target_feature = "mclass")]
mod v6m;
#[cfg(target_feature = "mclass")]
pub use self::v6m::*;
#[cfg(all(target_feature = "v7", target_feature = "mclass"))]
mod v7m;
#[cfg(all(target_feature = "v7", target_feature = "mclass"))]
pub use self::v7m::*;
#[cfg(not(target_arch = "aarch64"))]
mod aarch32;
#[cfg(not(target_arch = "aarch64"))]
pub use self::aarch32::*;
/// Reads a 32-bit system register
#[inline(always)]
pub unsafe fn __rsr<R>(reg: R) -> u32
where
R: super::sealed::Rsr,
{
reg.__rsr()
}
/// Reads a 64-bit system register
#[cfg(target_arch = "aarch64")]
#[inline(always)]
pub unsafe fn __rsr64<R>(reg: R) -> u64
where
R: super::sealed::Rsr64,
{
reg.__rsr64()
}
/// Reads a system register containing an address
#[inline(always)]
pub unsafe fn __rsrp<R>(reg: R) -> *const u8
where
R: super::sealed::Rsrp,
{
reg.__rsrp()
}
/// Writes a 32-bit system register
#[inline(always)]
pub unsafe fn __wsr<R>(reg: R, value: u32)
where
R: super::sealed::Wsr,
{
reg.__wsr(value)
}
/// Writes a 64-bit system register
#[cfg(target_arch = "aarch64")]
#[inline(always)]
pub unsafe fn __wsr64<R>(reg: R, value: u64)
where
R: super::sealed::Wsr64,
{
reg.__wsr64(value)
}
/// Writes a system register containing an address
#[inline(always)]
pub unsafe fn __wsrp<R>(reg: R, value: *const u8)
where
R: super::sealed::Wsrp,
{
reg.__wsrp(value)
}

View File

@ -0,0 +1,39 @@
/// CONTROL register
pub struct CONTROL;
rsr!(CONTROL);
wsr!(CONTROL);
/// Execution Program Status Register
pub struct EPSR;
rsr!(EPSR);
/// Interrupt Program Status Register
pub struct IPSR;
rsr!(IPSR);
/// Main Stack Pointer
pub struct MSP;
rsrp!(MSP);
wsrp!(MSP);
/// Priority Mask Register
pub struct PRIMASK;
rsr!(PRIMASK);
wsr!(PRIMASK);
/// Process Stack Pointer
pub struct PSP;
rsrp!(PSP);
wsrp!(PSP);
/// Program Status Register
#[allow(non_camel_case_types)]
pub struct xPSR;
rsr!(xPSR);

View File

@ -0,0 +1,17 @@
/// Base Priority Mask Register
pub struct BASEPRI;
rsr!(BASEPRI);
wsr!(BASEPRI);
/// Base Priority Mask Register (conditional write)
#[allow(non_camel_case_types)]
pub struct BASEPRI_MAX;
wsr!(BASEPRI_MAX);
/// Fault Mask Register
pub struct FAULTMASK;
rsr!(FAULTMASK);
wsr!(FAULTMASK);

View File

@ -0,0 +1,8 @@
//! # References:
//!
//! - Section 8.4 "Saturating intrinsics"
//!
//! Intrinsics that could live here:
//!
//! - __ssat
//! - __usat

View File

@ -0,0 +1,728 @@
//! # References
//!
//! - Section 8.5 "32-bit SIMD intrinsics" of ACLE
//!
//! Intrinsics that could live here
//!
//! - \[x\] __sel
//! - \[ \] __ssat16
//! - \[ \] __usat16
//! - \[ \] __sxtab16
//! - \[ \] __sxtb16
//! - \[ \] __uxtab16
//! - \[ \] __uxtb16
//! - \[x\] __qadd8
//! - \[x\] __qsub8
//! - \[x\] __sadd8
//! - \[x\] __shadd8
//! - \[x\] __shsub8
//! - \[x\] __ssub8
//! - \[ \] __uadd8
//! - \[ \] __uhadd8
//! - \[ \] __uhsub8
//! - \[ \] __uqadd8
//! - \[ \] __uqsub8
//! - \[x\] __usub8
//! - \[x\] __usad8
//! - \[x\] __usada8
//! - \[x\] __qadd16
//! - \[x\] __qasx
//! - \[x\] __qsax
//! - \[x\] __qsub16
//! - \[x\] __sadd16
//! - \[x\] __sasx
//! - \[x\] __shadd16
//! - \[ \] __shasx
//! - \[ \] __shsax
//! - \[x\] __shsub16
//! - \[ \] __ssax
//! - \[ \] __ssub16
//! - \[ \] __uadd16
//! - \[ \] __uasx
//! - \[ \] __uhadd16
//! - \[ \] __uhasx
//! - \[ \] __uhsax
//! - \[ \] __uhsub16
//! - \[ \] __uqadd16
//! - \[ \] __uqasx
//! - \[x\] __uqsax
//! - \[ \] __uqsub16
//! - \[ \] __usax
//! - \[ \] __usub16
//! - \[x\] __smlad
//! - \[ \] __smladx
//! - \[ \] __smlald
//! - \[ \] __smlaldx
//! - \[x\] __smlsd
//! - \[ \] __smlsdx
//! - \[ \] __smlsld
//! - \[ \] __smlsldx
//! - \[x\] __smuad
//! - \[x\] __smuadx
//! - \[x\] __smusd
//! - \[x\] __smusdx
#[cfg(test)]
use stdarch_test::assert_instr;
use crate::{core_arch::acle::dsp::int16x2_t, mem::transmute};
types! {
/// ARM-specific 32-bit wide vector of four packed `i8`.
pub struct int8x4_t(i8, i8, i8, i8);
/// ARM-specific 32-bit wide vector of four packed `u8`.
pub struct uint8x4_t(u8, u8, u8, u8);
}
macro_rules! dsp_call {
($name:expr, $a:expr, $b:expr) => {
transmute($name(transmute($a), transmute($b)))
};
}
extern "C" {
#[link_name = "llvm.arm.qadd8"]
fn arm_qadd8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qsub8"]
fn arm_qsub8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qsub16"]
fn arm_qsub16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qadd16"]
fn arm_qadd16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qasx"]
fn arm_qasx(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.qsax"]
fn arm_qsax(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.sadd16"]
fn arm_sadd16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.sadd8"]
fn arm_sadd8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smlad"]
fn arm_smlad(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.smlsd"]
fn arm_smlsd(a: i32, b: i32, c: i32) -> i32;
#[link_name = "llvm.arm.sasx"]
fn arm_sasx(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.sel"]
fn arm_sel(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.shadd8"]
fn arm_shadd8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.shadd16"]
fn arm_shadd16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.shsub8"]
fn arm_shsub8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.ssub8"]
fn arm_ssub8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.usub8"]
fn arm_usub8(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.shsub16"]
fn arm_shsub16(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smuad"]
fn arm_smuad(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smuadx"]
fn arm_smuadx(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smusd"]
fn arm_smusd(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.smusdx"]
fn arm_smusdx(a: i32, b: i32) -> i32;
#[link_name = "llvm.arm.usad8"]
fn arm_usad8(a: i32, b: i32) -> u32;
}
/// Saturating four 8-bit integer additions
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = a\[0\] + b\[0\]
/// res\[1\] = a\[1\] + b\[1\]
/// res\[2\] = a\[2\] + b\[2\]
/// res\[3\] = a\[3\] + b\[3\]
#[inline]
#[cfg_attr(test, assert_instr(qadd8))]
pub unsafe fn __qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_qadd8, a, b)
}
/// Saturating two 8-bit integer subtraction
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = a\[0\] - b\[0\]
/// res\[1\] = a\[1\] - b\[1\]
/// res\[2\] = a\[2\] - b\[2\]
/// res\[3\] = a\[3\] - b\[3\]
#[inline]
#[cfg_attr(test, assert_instr(qsub8))]
pub unsafe fn __qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_qsub8, a, b)
}
/// Saturating two 16-bit integer subtraction
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = a\[0\] - b\[0\]
/// res\[1\] = a\[1\] - b\[1\]
#[inline]
#[cfg_attr(test, assert_instr(qsub16))]
pub unsafe fn __qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_qsub16, a, b)
}
/// Saturating two 16-bit integer additions
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = a\[0\] + b\[0\]
/// res\[1\] = a\[1\] + b\[1\]
#[inline]
#[cfg_attr(test, assert_instr(qadd16))]
pub unsafe fn __qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_qadd16, a, b)
}
/// Returns the 16-bit signed saturated equivalent of
///
/// res\[0\] = a\[0\] - b\[1\]
/// res\[1\] = a\[1\] + b\[0\]
#[inline]
#[cfg_attr(test, assert_instr(qasx))]
pub unsafe fn __qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_qasx, a, b)
}
/// Returns the 16-bit signed saturated equivalent of
///
/// res\[0\] = a\[0\] + b\[1\]
/// res\[1\] = a\[1\] - b\[0\]
#[inline]
#[cfg_attr(test, assert_instr(qsax))]
pub unsafe fn __qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_qsax, a, b)
}
/// Returns the 16-bit signed saturated equivalent of
///
/// res\[0\] = a\[0\] + b\[1\]
/// res\[1\] = a\[1\] + b\[0\]
///
/// and the GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(sadd16))]
pub unsafe fn __sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_sadd16, a, b)
}
/// Returns the 8-bit signed saturated equivalent of
///
/// res\[0\] = a\[0\] + b\[1\]
/// res\[1\] = a\[1\] + b\[0\]
/// res\[2\] = a\[2\] + b\[2\]
/// res\[3\] = a\[3\] + b\[3\]
///
/// and the GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(sadd8))]
pub unsafe fn __sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_sadd8, a, b)
}
/// Dual 16-bit Signed Multiply with Addition of products
/// and 32-bit accumulation.
///
/// Returns the 16-bit signed equivalent of
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c
#[inline]
#[cfg_attr(test, assert_instr(smlad))]
pub unsafe fn __smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
arm_smlad(transmute(a), transmute(b), c)
}
/// Dual 16-bit Signed Multiply with Subtraction of products
/// and 32-bit accumulation and overflow detection.
///
/// Returns the 16-bit signed equivalent of
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c
#[inline]
#[cfg_attr(test, assert_instr(smlsd))]
pub unsafe fn __smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
arm_smlsd(transmute(a), transmute(b), c)
}
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = a\[0\] - b\[1\]
/// res\[1\] = a\[1\] + b\[0\]
///
/// and the GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(sasx))]
pub unsafe fn __sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_sasx, a, b)
}
/// Select bytes from each operand according to APSR GE flags
///
/// Returns the equivalent of
///
/// res\[0\] = GE\[0\] ? a\[0\] : b\[0\]
/// res\[1\] = GE\[1\] ? a\[1\] : b\[1\]
/// res\[2\] = GE\[2\] ? a\[2\] : b\[2\]
/// res\[3\] = GE\[3\] ? a\[3\] : b\[3\]
///
/// where GE are bits of APSR
#[inline]
#[cfg_attr(test, assert_instr(sel))]
pub unsafe fn __sel(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_sel, a, b)
}
/// Signed halving parallel byte-wise addition.
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = (a\[0\] + b\[0\]) / 2
/// res\[1\] = (a\[1\] + b\[1\]) / 2
/// res\[2\] = (a\[2\] + b\[2\]) / 2
/// res\[3\] = (a\[3\] + b\[3\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shadd8))]
pub unsafe fn __shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_shadd8, a, b)
}
/// Signed halving parallel halfword-wise addition.
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = (a\[0\] + b\[0\]) / 2
/// res\[1\] = (a\[1\] + b\[1\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shadd16))]
pub unsafe fn __shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_shadd16, a, b)
}
/// Signed halving parallel byte-wise subtraction.
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = (a\[0\] - b\[0\]) / 2
/// res\[1\] = (a\[1\] - b\[1\]) / 2
/// res\[2\] = (a\[2\] - b\[2\]) / 2
/// res\[3\] = (a\[3\] - b\[3\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shsub8))]
pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_shsub8, a, b)
}
/// Inserts a `USUB8` instruction.
///
/// Returns the 8-bit unsigned equivalent of
///
/// res\[0\] = a\[0\] - a\[0\]
/// res\[1\] = a\[1\] - a\[1\]
/// res\[2\] = a\[2\] - a\[2\]
/// res\[3\] = a\[3\] - a\[3\]
///
/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
/// The GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(usub8))]
pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t {
dsp_call!(arm_usub8, a, b)
}
/// Inserts a `SSUB8` instruction.
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = a\[0\] - a\[0\]
/// res\[1\] = a\[1\] - a\[1\]
/// res\[2\] = a\[2\] - a\[2\]
/// res\[3\] = a\[3\] - a\[3\]
///
/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
/// The GE bits of the APSR are set.
#[inline]
#[cfg_attr(test, assert_instr(ssub8))]
pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_ssub8, a, b)
}
/// Signed halving parallel halfword-wise subtraction.
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = (a\[0\] - b\[0\]) / 2
/// res\[1\] = (a\[1\] - b\[1\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shsub16))]
pub unsafe fn __shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_shsub16, a, b)
}
/// Signed Dual Multiply Add.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smuad))]
pub unsafe fn __smuad(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smuad(transmute(a), transmute(b))
}
/// Signed Dual Multiply Add Reversed.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[1\] + a\[1\] * b\[0\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smuadx))]
pub unsafe fn __smuadx(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smuadx(transmute(a), transmute(b))
}
/// Signed Dual Multiply Subtract.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smusd))]
pub unsafe fn __smusd(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smusd(transmute(a), transmute(b))
}
/// Signed Dual Multiply Subtract Reversed.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[1\] - a\[1\] * b\[0\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smusdx))]
pub unsafe fn __smusdx(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smusdx(transmute(a), transmute(b))
}
/// Sum of 8-bit absolute differences.
///
/// Returns the 8-bit unsigned equivalent of
///
/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\])
#[inline]
#[cfg_attr(test, assert_instr(usad8))]
pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 {
arm_usad8(transmute(a), transmute(b))
}
/// Sum of 8-bit absolute differences and constant.
///
/// Returns the 8-bit unsigned equivalent of
///
/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c
#[inline]
#[cfg_attr(test, assert_instr(usad8))]
pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
__usad8(a, b) + c
}
#[cfg(test)]
mod tests {
use crate::core_arch::simd::{i16x2, i8x4, u8x4};
use std::mem::transmute;
use stdarch_test::simd_test;
#[test]
fn qadd8() {
unsafe {
let a = i8x4::new(1, 2, 3, i8::MAX);
let b = i8x4::new(2, -1, 0, 1);
let c = i8x4::new(3, 1, 3, i8::MAX);
let r: i8x4 = dsp_call!(super::__qadd8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qsub8() {
unsafe {
let a = i8x4::new(1, 2, 3, i8::MIN);
let b = i8x4::new(2, -1, 0, 1);
let c = i8x4::new(-1, 3, 3, i8::MIN);
let r: i8x4 = dsp_call!(super::__qsub8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qadd16() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(2, -1);
let c = i16x2::new(3, 1);
let r: i16x2 = dsp_call!(super::__qadd16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qsub16() {
unsafe {
let a = i16x2::new(10, 20);
let b = i16x2::new(20, -10);
let c = i16x2::new(-10, 30);
let r: i16x2 = dsp_call!(super::__qsub16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qasx() {
unsafe {
let a = i16x2::new(1, i16::MAX);
let b = i16x2::new(2, 2);
let c = i16x2::new(-1, i16::MAX);
let r: i16x2 = dsp_call!(super::__qasx, a, b);
assert_eq!(r, c);
}
}
#[test]
fn qsax() {
unsafe {
let a = i16x2::new(1, i16::MAX);
let b = i16x2::new(2, 2);
let c = i16x2::new(3, i16::MAX - 2);
let r: i16x2 = dsp_call!(super::__qsax, a, b);
assert_eq!(r, c);
}
}
#[test]
fn sadd16() {
unsafe {
let a = i16x2::new(1, i16::MAX);
let b = i16x2::new(2, 2);
let c = i16x2::new(3, -i16::MAX);
let r: i16x2 = dsp_call!(super::__sadd16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn sadd8() {
unsafe {
let a = i8x4::new(1, 2, 3, i8::MAX);
let b = i8x4::new(4, 3, 2, 2);
let c = i8x4::new(5, 5, 5, -i8::MAX);
let r: i8x4 = dsp_call!(super::__sadd8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn sasx() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(2, 1);
let c = i16x2::new(0, 4);
let r: i16x2 = dsp_call!(super::__sasx, a, b);
assert_eq!(r, c);
}
}
#[test]
fn smlad() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(3, 4);
let r = super::__smlad(transmute(a), transmute(b), 10);
assert_eq!(r, (1 * 3) + (2 * 4) + 10);
}
}
#[test]
fn smlsd() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(3, 4);
let r = super::__smlsd(transmute(a), transmute(b), 10);
assert_eq!(r, ((1 * 3) - (2 * 4)) + 10);
}
}
#[test]
fn sel() {
unsafe {
let a = i8x4::new(1, 2, 3, i8::MAX);
let b = i8x4::new(4, 3, 2, 2);
// call sadd8() to set GE bits
super::__sadd8(transmute(a), transmute(b));
let c = i8x4::new(1, 2, 3, i8::MAX);
let r: i8x4 = dsp_call!(super::__sel, a, b);
assert_eq!(r, c);
}
}
#[test]
fn shadd8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(5, 4, 3, 2);
let c = i8x4::new(3, 3, 3, 3);
let r: i8x4 = dsp_call!(super::__shadd8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn shadd16() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let c = i16x2::new(3, 3);
let r: i16x2 = dsp_call!(super::__shadd16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn shsub8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(5, 4, 3, 2);
let c = i8x4::new(-2, -1, 0, 1);
let r: i8x4 = dsp_call!(super::__shsub8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn ssub8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(5, 4, 3, 2);
let c = i8x4::new(-4, -2, 0, 2);
let r: i8x4 = dsp_call!(super::__ssub8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn usub8() {
unsafe {
let a = u8x4::new(1, 2, 3, 4);
let b = u8x4::new(5, 4, 3, 2);
let c = u8x4::new(252, 254, 0, 2);
let r: u8x4 = dsp_call!(super::__usub8, a, b);
assert_eq!(r, c);
}
}
#[test]
fn shsub16() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let c = i16x2::new(-2, -1);
let r: i16x2 = dsp_call!(super::__shsub16, a, b);
assert_eq!(r, c);
}
}
#[test]
fn smuad() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = super::__smuad(transmute(a), transmute(b));
assert_eq!(r, 13);
}
}
#[test]
fn smuadx() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = super::__smuadx(transmute(a), transmute(b));
assert_eq!(r, 14);
}
}
#[test]
fn smusd() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = super::__smusd(transmute(a), transmute(b));
assert_eq!(r, -3);
}
}
#[test]
fn smusdx() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = super::__smusdx(transmute(a), transmute(b));
assert_eq!(r, -6);
}
}
#[test]
fn usad8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(4, 3, 2, 1);
let r = super::__usad8(transmute(a), transmute(b));
assert_eq!(r, 8);
}
}
#[test]
fn usad8a() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(4, 3, 2, 1);
let c = 10;
let r = super::__usada8(transmute(a), transmute(b), c);
assert_eq!(r, 8 + c);
}
}
}

View File

@ -0,0 +1,68 @@
//! ARM compiler specific intrinsics
//!
//! # References
//!
//! - [ARM Compiler v 6.10 - armclang Reference Guide][arm_comp_ref]
//!
//! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610
#[cfg(test)]
use stdarch_test::assert_instr;
/// Inserts a breakpoint instruction.
///
/// `val` is a compile-time constant integer in range `[0, 255]`.
///
/// The breakpoint instruction inserted is:
///
/// * `BKPT` when compiling as T32,
/// * `BRK` when compiling as A32 or A64.
///
/// # Safety
///
/// If `val` is out-of-range the behavior is **undefined**.
///
/// # Note
///
/// [ARM's documentation][arm_docs] defines that `__breakpoint` accepts the
/// following values for `val`:
///
/// - `0...65535` when compiling as A32 or A64,
/// - `0...255` when compiling as T32.
///
/// The current implementation only accepts values in range `[0, 255]` - if the
/// value is out-of-range the behavior is **undefined**.
///
/// [arm_docs]: https://developer.arm.com/docs/100067/latest/compiler-specific-intrinsics/__breakpoint-intrinsic
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(bkpt, val = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(brk, val = 0))]
#[inline(always)]
#[rustc_args_required_const(0)]
pub unsafe fn __breakpoint(val: i32) {
// Ensure that this compiles correctly on non-arm architectures, so libstd
// doc builds work. The proper macro will shadow this definition below.
#[allow(unused_macros)]
macro_rules! call {
($e:expr) => {
()
};
}
#[cfg(target_arch = "arm")]
macro_rules! call {
($imm8:expr) => {
llvm_asm!(concat!("BKPT ", stringify!($imm8)) : : : : "volatile")
}
}
#[cfg(target_arch = "aarch64")]
macro_rules! call {
($imm8:expr) => {
llvm_asm!(concat!("BRK ", stringify!($imm8)) : : : : "volatile")
}
}
// We can't `panic!` inside this intrinsic, so we can't really validate the
// arguments here. If `val` is out-of-range this macro uses `val == 255`:
constify_imm8!(val, call);
}

View File

@ -0,0 +1,121 @@
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32b")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32b")]
fn crc32b_(crc: u32, data: u32) -> u32;
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32h")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32h")]
fn crc32h_(crc: u32, data: u32) -> u32;
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32w")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")]
fn crc32w_(crc: u32, data: u32) -> u32;
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cb")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cb")]
fn crc32cb_(crc: u32, data: u32) -> u32;
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32ch")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32ch")]
fn crc32ch_(crc: u32, data: u32) -> u32;
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cw")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")]
fn crc32cw_(crc: u32, data: u32) -> u32;
}
#[cfg(test)]
use stdarch_test::assert_instr;
/// CRC32 single round checksum for bytes (8 bits).
#[inline]
#[target_feature(enable = "crc")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(test, assert_instr(crc32b))]
pub unsafe fn __crc32b(crc: u32, data: u8) -> u32 {
crc32b_(crc, data as u32)
}
/// CRC32 single round checksum for half words (16 bits).
#[inline]
#[target_feature(enable = "crc")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(test, assert_instr(crc32h))]
pub unsafe fn __crc32h(crc: u32, data: u16) -> u32 {
crc32h_(crc, data as u32)
}
/// CRC32 single round checksum for words (32 bits).
#[inline]
#[target_feature(enable = "crc")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(test, assert_instr(crc32w))]
pub unsafe fn __crc32w(crc: u32, data: u32) -> u32 {
crc32w_(crc, data)
}
/// CRC32-C single round checksum for bytes (8 bits).
#[inline]
#[target_feature(enable = "crc")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(test, assert_instr(crc32cb))]
pub unsafe fn __crc32cb(crc: u32, data: u8) -> u32 {
crc32cb_(crc, data as u32)
}
/// CRC32-C single round checksum for half words (16 bits).
#[inline]
#[target_feature(enable = "crc")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(test, assert_instr(crc32ch))]
pub unsafe fn __crc32ch(crc: u32, data: u16) -> u32 {
crc32ch_(crc, data as u32)
}
/// CRC32-C single round checksum for words (32 bits).
#[inline]
#[target_feature(enable = "crc")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
#[cfg_attr(test, assert_instr(crc32cw))]
pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 {
crc32cw_(crc, data)
}
#[cfg(test)]
mod tests {
use crate::core_arch::{arm::*, simd::*};
use std::mem;
use stdarch_test::simd_test;
#[simd_test(enable = "crc")]
unsafe fn test_crc32b() {
assert_eq!(__crc32b(0, 0), 0);
assert_eq!(__crc32b(0, 255), 755167117);
}
#[simd_test(enable = "crc")]
unsafe fn test_crc32h() {
assert_eq!(__crc32h(0, 0), 0);
assert_eq!(__crc32h(0, 16384), 1994146192);
}
#[simd_test(enable = "crc")]
unsafe fn test_crc32w() {
assert_eq!(__crc32w(0, 0), 0);
assert_eq!(__crc32w(0, 4294967295), 3736805603);
}
#[simd_test(enable = "crc")]
unsafe fn test_crc32cb() {
assert_eq!(__crc32cb(0, 0), 0);
assert_eq!(__crc32cb(0, 255), 2910671697);
}
#[simd_test(enable = "crc")]
unsafe fn test_crc32ch() {
assert_eq!(__crc32ch(0, 0), 0);
assert_eq!(__crc32ch(0, 16384), 1098587580);
}
#[simd_test(enable = "crc")]
unsafe fn test_crc32cw() {
assert_eq!(__crc32cw(0, 0), 0);
assert_eq!(__crc32cw(0, 4294967295), 3080238136);
}
}

View File

@ -0,0 +1,47 @@
//! ARM intrinsics.
//!
//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
//!
//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
#![allow(non_camel_case_types)]
mod armclang;
pub use self::armclang::*;
mod v6;
pub use self::v6::*;
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
mod v7;
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
pub use self::v7::*;
#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
mod neon;
#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
pub use self::neon::*;
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
mod crc;
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
pub use self::crc::*;
pub use crate::core_arch::acle::*;
#[cfg(test)]
use stdarch_test::assert_instr;
/// Generates the trap instruction `UDF`
#[cfg(target_arch = "arm")]
#[cfg_attr(test, assert_instr(udf))]
#[inline]
pub unsafe fn udf() -> ! {
crate::intrinsics::abort()
}
#[cfg(test)]
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
pub(crate) mod test_support;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,830 @@
use crate::core_arch::{arm::*, simd::*};
use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec};
macro_rules! V_u8 {
() => {
vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8]
};
}
macro_rules! V_u16 {
() => {
vec![
0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16,
]
};
}
macro_rules! V_u32 {
() => {
vec![
0x00000000u32,
0x01010101u32,
0x02020202u32,
0x0F0F0F0Fu32,
0x80000000u32,
0xF0F0F0F0u32,
0xFFFFFFFFu32,
]
};
}
macro_rules! V_u64 {
() => {
vec![
0x0000000000000000u64,
0x0101010101010101u64,
0x0202020202020202u64,
0x0F0F0F0F0F0F0F0Fu64,
0x8080808080808080u64,
0xF0F0F0F0F0F0F0F0u64,
0xFFFFFFFFFFFFFFFFu64,
]
};
}
macro_rules! V_i8 {
() => {
vec![
0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */
-16i8, /* 0xF0 */
-1i8, /* 0xFF */
]
};
}
macro_rules! V_i16 {
() => {
vec![
0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */
-3856i16, /* 0xF0F0 */
-1i16, /* 0xFFF */
]
};
}
macro_rules! V_i32 {
() => {
vec![
0x00000000i32,
0x01010101i32,
0x02020202i32,
0x0F0F0F0Fi32,
-2139062144i32, /* 0x80000000 */
-252645136i32, /* 0xF0F0F0F0 */
-1i32, /* 0xFFFFFFFF */
]
};
}
macro_rules! V_i64 {
() => {
vec![
0x0000000000000000i64,
0x0101010101010101i64,
0x0202020202020202i64,
0x0F0F0F0F0F0F0F0Fi64,
-9223372036854775808i64, /* 0x8000000000000000 */
-1152921504606846976i64, /* 0xF000000000000000 */
-1i64, /* 0xFFFFFFFFFFFFFFFF */
]
};
}
macro_rules! V_f32 {
() => {
vec![
0.0f32,
1.0f32,
-1.0f32,
1.2f32,
2.4f32,
std::f32::MAX,
std::f32::MIN,
std::f32::INFINITY,
std::f32::NEG_INFINITY,
std::f32::NAN,
]
};
}
macro_rules! to64 {
($t : ident) => {
|v: $t| -> u64 { transmute(v) }
};
}
macro_rules! to128 {
($t : ident) => {
|v: $t| -> u128 { transmute(v) }
};
}
pub(crate) fn test<T, U, V, W, X>(
vals: Vec<T>,
fill1: fn(T) -> V,
fill2: fn(U) -> W,
cast: fn(W) -> X,
test_fun: fn(V, V) -> W,
verify_fun: fn(T, T) -> U,
) where
T: Copy + core::fmt::Debug + std::cmp::PartialEq,
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
V: Copy + core::fmt::Debug,
W: Copy + core::fmt::Debug,
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
{
let pairs = vals.iter().zip(vals.iter());
for (i, j) in pairs {
let a: V = fill1(*i);
let b: V = fill1(*j);
let actual_pre: W = test_fun(a, b);
let expected_pre: W = fill2(verify_fun(*i, *j));
let actual: X = cast(actual_pre);
let expected: X = cast(expected_pre);
assert_eq!(
actual, expected,
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
);
}
}
macro_rules! gen_test_fn {
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
unsafe {
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
};
}
};
}
macro_rules! gen_fill_fn {
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
pub(crate) fn $id(val: $in_t) -> $out_t {
let initial: [$in_t; $num_els] = [val; $num_els];
let result: $cmp_t = unsafe { transmute(initial) };
let result_out: $out_t = unsafe { transmute(result) };
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
result_out
}
};
}
gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64);
gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64);
gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128);
gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128);
gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64);
gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64);
gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128);
gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128);
gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64);
gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64);
gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128);
gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128);
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64);
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128);
gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64);
gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128);
gen_test_fn!(
test_ari_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
test_bit_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
test_cmp_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
testq_ari_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
testq_bit_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
testq_cmp_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
test_ari_s8,
i8,
i8,
int8x8_t,
int8x8_t,
u64,
V_i8!(),
fill_s8,
fill_s8,
to64!(int8x8_t)
);
gen_test_fn!(
test_bit_s8,
i8,
i8,
int8x8_t,
int8x8_t,
u64,
V_i8!(),
fill_s8,
fill_s8,
to64!(int8x8_t)
);
gen_test_fn!(
test_cmp_s8,
i8,
u8,
int8x8_t,
uint8x8_t,
u64,
V_i8!(),
fill_s8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
testq_ari_s8,
i8,
i8,
int8x16_t,
int8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_s8,
to128!(int8x16_t)
);
gen_test_fn!(
testq_bit_s8,
i8,
i8,
int8x16_t,
int8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_s8,
to128!(int8x16_t)
);
gen_test_fn!(
testq_cmp_s8,
i8,
u8,
int8x16_t,
uint8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
test_ari_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
test_bit_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
test_cmp_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
testq_ari_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
testq_bit_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
testq_cmp_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
test_ari_s16,
i16,
i16,
int16x4_t,
int16x4_t,
u64,
V_i16!(),
fill_s16,
fill_s16,
to64!(int16x4_t)
);
gen_test_fn!(
test_bit_s16,
i16,
i16,
int16x4_t,
int16x4_t,
u64,
V_i16!(),
fill_s16,
fill_s16,
to64!(int16x4_t)
);
gen_test_fn!(
test_cmp_s16,
i16,
u16,
int16x4_t,
uint16x4_t,
u64,
V_i16!(),
fill_s16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
testq_ari_s16,
i16,
i16,
int16x8_t,
int16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_s16,
to128!(int16x8_t)
);
gen_test_fn!(
testq_bit_s16,
i16,
i16,
int16x8_t,
int16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_s16,
to128!(int16x8_t)
);
gen_test_fn!(
testq_cmp_s16,
i16,
u16,
int16x8_t,
uint16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
test_ari_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
test_bit_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
test_cmp_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
testq_bit_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
testq_cmp_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
test_ari_s32,
i32,
i32,
int32x2_t,
int32x2_t,
u64,
V_i32!(),
fill_s32,
fill_s32,
to64!(int32x2_t)
);
gen_test_fn!(
test_bit_s32,
i32,
i32,
int32x2_t,
int32x2_t,
u64,
V_i32!(),
fill_s32,
fill_s32,
to64!(int32x2_t)
);
gen_test_fn!(
test_cmp_s32,
i32,
u32,
int32x2_t,
uint32x2_t,
u64,
V_i32!(),
fill_s32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_s32,
i32,
i32,
int32x4_t,
int32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_s32,
to128!(int32x4_t)
);
gen_test_fn!(
testq_bit_s32,
i32,
i32,
int32x4_t,
int32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_s32,
to128!(int32x4_t)
);
gen_test_fn!(
testq_cmp_s32,
i32,
u32,
int32x4_t,
uint32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
test_ari_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
test_bit_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
test_cmp_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_ari_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
testq_bit_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
testq_cmp_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
test_ari_s64,
i64,
i64,
int64x1_t,
int64x1_t,
u64,
V_i64!(),
fill_s64,
fill_s64,
to64!(int64x1_t)
);
gen_test_fn!(
test_bit_s64,
i64,
i64,
int64x1_t,
int64x1_t,
u64,
V_i64!(),
fill_s64,
fill_s64,
to64!(int64x1_t)
);
gen_test_fn!(
test_cmp_s64,
i64,
u64,
int64x1_t,
uint64x1_t,
u64,
V_i64!(),
fill_s64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_ari_s64,
i64,
i64,
int64x2_t,
int64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_s64,
to128!(int64x2_t)
);
gen_test_fn!(
testq_bit_s64,
i64,
i64,
int64x2_t,
int64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_s64,
to128!(int64x2_t)
);
gen_test_fn!(
testq_cmp_s64,
i64,
u64,
int64x2_t,
uint64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
test_ari_f32,
f32,
f32,
float32x2_t,
float32x2_t,
u64,
V_f32!(),
fill_f32,
fill_f32,
to64!(float32x2_t)
);
gen_test_fn!(
test_cmp_f32,
f32,
u32,
float32x2_t,
uint32x2_t,
u64,
V_f32!(),
fill_f32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_f32,
f32,
f32,
float32x4_t,
float32x4_t,
u128,
V_f32!(),
fillq_f32,
fillq_f32,
to128!(float32x4_t)
);
gen_test_fn!(
testq_cmp_f32,
f32,
u32,
float32x4_t,
uint32x4_t,
u128,
V_f32!(),
fillq_f32,
fillq_u32,
to128!(uint32x4_t)
);

View File

@ -0,0 +1,49 @@
//! ARMv6 intrinsics.
//!
//! The reference is [ARMv6-M Architecture Reference Manual][armv6m].
//!
//! [armv6m]:
//! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.
//! html
#[cfg(test)]
use stdarch_test::assert_instr;
/// Reverse the order of the bytes.
#[inline]
#[cfg_attr(test, assert_instr(rev))]
pub unsafe fn _rev_u16(x: u16) -> u16 {
x.swap_bytes() as u16
}
/// Reverse the order of the bytes.
#[inline]
#[cfg_attr(test, assert_instr(rev))]
pub unsafe fn _rev_u32(x: u32) -> u32 {
x.swap_bytes() as u32
}
#[cfg(test)]
mod tests {
use crate::core_arch::arm::v6;
#[test]
fn _rev_u16() {
unsafe {
assert_eq!(
v6::_rev_u16(0b0000_0000_1111_1111_u16),
0b1111_1111_0000_0000_u16
);
}
}
#[test]
fn _rev_u32() {
unsafe {
assert_eq!(
v6::_rev_u32(0b0000_0000_1111_1111_0000_0000_1111_1111_u32),
0b1111_1111_0000_0000_1111_1111_0000_0000_u32
);
}
}
}

View File

@ -0,0 +1,88 @@
//! ARMv7 intrinsics.
//!
//! The reference is [ARMv7-M Architecture Reference Manual (Issue
//! E.b)][armv7m].
//!
//! [armv7m]:
//! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.
//! b/index.html
pub use super::v6::*;
#[cfg(test)]
use stdarch_test::assert_instr;
/// Count Leading Zeros.
#[inline]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
// FIXME: https://github.com/rust-lang/stdarch/issues/382
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
pub unsafe fn _clz_u8(x: u8) -> u8 {
x.leading_zeros() as u8
}
/// Count Leading Zeros.
#[inline]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
// FIXME: https://github.com/rust-lang/stdarch/issues/382
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
pub unsafe fn _clz_u16(x: u16) -> u16 {
x.leading_zeros() as u16
}
/// Count Leading Zeros.
#[inline]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
// FIXME: https://github.com/rust-lang/stdarch/issues/382
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
pub unsafe fn _clz_u32(x: u32) -> u32 {
x.leading_zeros() as u32
}
/// Reverse the bit order.
#[inline]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(test, assert_instr(rbit))]
pub unsafe fn _rbit_u32(x: u32) -> u32 {
crate::intrinsics::bitreverse(x)
}
#[cfg(test)]
mod tests {
use crate::core_arch::arm::v7;
#[test]
fn _clz_u8() {
unsafe {
assert_eq!(v7::_clz_u8(0b0000_1010u8), 4u8);
}
}
#[test]
fn _clz_u16() {
unsafe {
assert_eq!(v7::_clz_u16(0b0000_1010u16), 12u16);
}
}
#[test]
fn _clz_u32() {
unsafe {
assert_eq!(v7::_clz_u32(0b0000_1010u32), 28u32);
}
}
#[test]
#[cfg(dont_compile_me)] // FIXME need to add `v7` upstream in rustc
fn _rbit_u32() {
unsafe {
assert_eq!(
v7::_rbit_u32(0b0000_1010u32),
0b0101_0000_0000_0000_0000_0000_0000_0000u32
);
}
}
}

View File

@ -0,0 +1,340 @@
SIMD and vendor intrinsics module.
This module is intended to be the gateway to architecture-specific
intrinsic functions, typically related to SIMD (but not always!). Each
architecture that Rust compiles to may contain a submodule here, which
means that this is not a portable module! If you're writing a portable
library take care when using these APIs!
Under this module you'll find an architecture-named module, such as
`x86_64`. Each `#[cfg(target_arch)]` that Rust can compile to may have a
module entry here, only present on that particular target. For example the
`i686-pc-windows-msvc` target will have an `x86` module here, whereas
`x86_64-pc-windows-msvc` has `x86_64`.
[rfc]: https://github.com/rust-lang/rfcs/pull/2325
[tracked]: https://github.com/rust-lang/rust/issues/48556
# Overview
This module exposes vendor-specific intrinsics that typically correspond to
a single machine instruction. These intrinsics are not portable: their
availability is architecture-dependent, and not all machines of that
architecture might provide the intrinsic.
The `arch` module is intended to be a low-level implementation detail for
higher-level APIs. Using it correctly can be quite tricky as you need to
ensure at least a few guarantees are upheld:
* The correct architecture's module is used. For example the `arm` module
isn't available on the `x86_64-unknown-linux-gnu` target. This is
typically done by ensuring that `#[cfg]` is used appropriately when using
this module.
* The CPU the program is currently running on supports the function being
called. For example it is unsafe to call an AVX2 function on a CPU that
doesn't actually support AVX2.
As a result of the latter of these guarantees all intrinsics in this module
are `unsafe` and extra care needs to be taken when calling them!
# CPU Feature Detection
In order to call these APIs in a safe fashion there's a number of
mechanisms available to ensure that the correct CPU feature is available
to call an intrinsic. Let's consider, for example, the `_mm256_add_epi64`
intrinsics on the `x86` and `x86_64` architectures. This function requires
the AVX2 feature as [documented by Intel][intel-dox] so to correctly call
this function we need to (a) guarantee we only call it on `x86`/`x86_64`
and (b) ensure that the CPU feature is available
[intel-dox]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64&expand=100
## Static CPU Feature Detection
The first option available to us is to conditionally compile code via the
`#[cfg]` attribute. CPU features correspond to the `target_feature` cfg
available, and can be used like so:
```ignore
#[cfg(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx2"
)
)]
fn foo() {
#[cfg(target_arch = "x86")]
use std::arch::x86::_mm256_add_epi64;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_mm256_add_epi64;
unsafe {
_mm256_add_epi64(...);
}
}
```
Here we're using `#[cfg(target_feature = "avx2")]` to conditionally compile
this function into our module. This means that if the `avx2` feature is
*enabled statically* then we'll use the `_mm256_add_epi64` function at
runtime. The `unsafe` block here can be justified through the usage of
`#[cfg]` to only compile the code in situations where the safety guarantees
are upheld.
Statically enabling a feature is typically done with the `-C
target-feature` or `-C target-cpu` flags to the compiler. For example if
your local CPU supports AVX2 then you can compile the above function with:
```sh
$ RUSTFLAGS='-C target-cpu=native' cargo build
```
Or otherwise you can specifically enable just the AVX2 feature:
```sh
$ RUSTFLAGS='-C target-feature=+avx2' cargo build
```
Note that when you compile a binary with a particular feature enabled it's
important to ensure that you only run the binary on systems which satisfy
the required feature set.
## Dynamic CPU Feature Detection
Sometimes statically dispatching isn't quite what you want. Instead you
might want to build a portable binary that runs across a variety of CPUs,
but at runtime it selects the most optimized implementation available. This
allows you to build a "least common denominator" binary which has certain
sections more optimized for different CPUs.
Taking our previous example from before, we're going to compile our binary
*without* AVX2 support, but we'd like to enable it for just one function.
We can do that in a manner like:
```ignore
fn foo() {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") {
return unsafe { foo_avx2() };
}
}
// fallback implementation without using AVX2
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn foo_avx2() {
#[cfg(target_arch = "x86")]
use std::arch::x86::_mm256_add_epi64;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_mm256_add_epi64;
_mm256_add_epi64(...);
}
```
There's a couple of components in play here, so let's go through them in
detail!
* First up we notice the `is_x86_feature_detected!` macro. Provided by
the standard library, this macro will perform necessary runtime detection
to determine whether the CPU the program is running on supports the
specified feature. In this case the macro will expand to a boolean
expression evaluating to whether the local CPU has the AVX2 feature or
not.
Note that this macro, like the `arch` module, is platform-specific. For
example calling `is_x86_feature_detected!("avx2")` on ARM will be a
compile time error. To ensure we don't hit this error a statement level
`#[cfg]` is used to only compile usage of the macro on `x86`/`x86_64`.
* Next up we see our AVX2-enabled function, `foo_avx2`. This function is
decorated with the `#[target_feature]` attribute which enables a CPU
feature for just this one function. Using a compiler flag like `-C
target-feature=+avx2` will enable AVX2 for the entire program, but using
an attribute will only enable it for the one function. Usage of the
`#[target_feature]` attribute currently requires the function to also be
`unsafe`, as we see here. This is because the function can only be
correctly called on systems which have the AVX2 (like the intrinsics
themselves).
And with all that we should have a working program! This program will run
across all machines and it'll use the optimized AVX2 implementation on
machines where support is detected.
# Ergonomics
It's important to note that using the `arch` module is not the easiest
thing in the world, so if you're curious to try it out you may want to
brace yourself for some wordiness!
The primary purpose of this module is to enable stable crates on crates.io
to build up much more ergonomic abstractions which end up using SIMD under
the hood. Over time these abstractions may also move into the standard
library itself, but for now this module is tasked with providing the bare
minimum necessary to use vendor intrinsics on stable Rust.
# Other architectures
This documentation is only for one particular architecture, you can find
others at:
* [`x86`]
* [`x86_64`]
* [`arm`]
* [`aarch64`]
* [`mips`]
* [`mips64`]
* [`powerpc`]
* [`powerpc64`]
* [`nvptx`]
* [`wasm32`]
[`x86`]: x86/index.html
[`x86_64`]: x86_64/index.html
[`arm`]: arm/index.html
[`aarch64`]: aarch64/index.html
[`mips`]: mips/index.html
[`mips64`]: mips64/index.html
[`powerpc`]: powerpc/index.html
[`powerpc64`]: powerpc64/index.html
[`nvptx`]: nvptx/index.html
[`wasm32`]: wasm32/index.html
# Examples
First let's take a look at not actually using any intrinsics but instead
using LLVM's auto-vectorization to produce optimized vectorized code for
AVX2 and also for the default platform.
```rust
fn main() {
let mut dst = [0];
add_quickly(&[1], &[2], &mut dst);
assert_eq!(dst[0], 3);
}
fn add_quickly(a: &[u8], b: &[u8], c: &mut [u8]) {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
// Note that this `unsafe` block is safe because we're testing
// that the `avx2` feature is indeed available on our CPU.
if is_x86_feature_detected!("avx2") {
return unsafe { add_quickly_avx2(a, b, c) };
}
}
add_quickly_fallback(a, b, c)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn add_quickly_avx2(a: &[u8], b: &[u8], c: &mut [u8]) {
add_quickly_fallback(a, b, c) // the function below is inlined here
}
fn add_quickly_fallback(a: &[u8], b: &[u8], c: &mut [u8]) {
for ((a, b), c) in a.iter().zip(b).zip(c) {
*c = *a + *b;
}
}
```
Next up let's take a look at an example of manually using intrinsics. Here
we'll be using SSE4.1 features to implement hex encoding.
```
fn main() {
let mut dst = [0; 32];
hex_encode(b"\x01\x02\x03", &mut dst);
assert_eq!(&dst[..6], b"010203");
let mut src = [0; 16];
for i in 0..16 {
src[i] = (i + 1) as u8;
}
hex_encode(&src, &mut dst);
assert_eq!(&dst, b"0102030405060708090a0b0c0d0e0f10");
}
pub fn hex_encode(src: &[u8], dst: &mut [u8]) {
let len = src.len().checked_mul(2).unwrap();
assert!(dst.len() >= len);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("sse4.1") {
return unsafe { hex_encode_sse41(src, dst) };
}
}
hex_encode_fallback(src, dst)
}
// translated from
// https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
#[target_feature(enable = "sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
let ascii_zero = _mm_set1_epi8(b'0' as i8);
let nines = _mm_set1_epi8(9);
let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
let and4bits = _mm_set1_epi8(0xf);
let mut i = 0_isize;
while src.len() >= 16 {
let invec = _mm_loadu_si128(src.as_ptr() as *const _);
let masked1 = _mm_and_si128(invec, and4bits);
let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
// return 0xff corresponding to the elements > 9, or 0x00 otherwise
let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
// add '0' or the offset depending on the masks
let masked1 = _mm_add_epi8(
masked1,
_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
);
let masked2 = _mm_add_epi8(
masked2,
_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
);
// interleave masked1 and masked2 bytes
let res1 = _mm_unpacklo_epi8(masked2, masked1);
let res2 = _mm_unpackhi_epi8(masked2, masked1);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
_mm_storeu_si128(
dst.as_mut_ptr().offset(i * 2 + 16) as *mut _,
res2,
);
src = &src[16..];
i += 16;
}
let i = i as usize;
hex_encode_fallback(src, &mut dst[i * 2..]);
}
fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) {
fn hex(byte: u8) -> u8 {
static TABLE: &[u8] = b"0123456789abcdef";
TABLE[byte as usize]
}
for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
slots[0] = hex((*byte >> 4) & 0xf);
slots[1] = hex(*byte & 0xf);
}
}
```

View File

@ -0,0 +1,81 @@
#![doc(include = "core_arch_docs.md")]
#![allow(improper_ctypes_definitions)]
#![allow(dead_code)]
#![allow(unused_features)]
#![allow(incomplete_features)]
#![feature(
const_fn,
const_fn_union,
const_fn_transmute,
const_generics,
custom_inner_attributes,
link_llvm_intrinsics,
platform_intrinsics,
repr_simd,
simd_ffi,
llvm_asm,
proc_macro_hygiene,
stmt_expr_attributes,
core_intrinsics,
no_core,
rustc_attrs,
stdsimd,
staged_api,
doc_cfg,
tbm_target_feature,
sse4a_target_feature,
arm_target_feature,
aarch64_target_feature,
cmpxchg16b_target_feature,
avx512_target_feature,
mips_target_feature,
powerpc_target_feature,
wasm_target_feature,
abi_unadjusted,
adx_target_feature,
rtm_target_feature,
f16c_target_feature,
external_doc,
allow_internal_unstable,
decl_macro
)]
#![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))]
#![cfg_attr(all(test, target_arch = "wasm32"), feature(wasm_simd))]
#![deny(clippy::missing_inline_in_public_items)]
#![allow(
clippy::inline_always,
clippy::too_many_arguments,
clippy::cast_sign_loss,
clippy::cast_lossless,
clippy::cast_possible_wrap,
clippy::cast_possible_truncation,
clippy::cast_precision_loss,
clippy::shadow_reuse,
clippy::cognitive_complexity,
clippy::similar_names,
clippy::many_single_char_names
)]
#![cfg_attr(test, allow(unused_imports))]
#![no_std]
#![unstable(feature = "stdsimd", issue = "27731")]
#![doc(
test(attr(deny(warnings))),
test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
)]
#[cfg(test)]
#[macro_use]
extern crate std;
#[cfg(test)]
#[macro_use]
extern crate std_detect;
#[cfg(test)]
extern crate stdarch_test;
#[path = "mod.rs"]
mod core_arch;
pub use self::core_arch::arch;
#[allow(unused_imports)]
use core::{ffi, hint, intrinsics, marker, mem, ops, ptr, sync};

View File

@ -0,0 +1,409 @@
//! Utility macros.
#[allow(unused)]
macro_rules! constify_imm8 {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1111_1111 {
0 => $expand!(0),
1 => $expand!(1),
2 => $expand!(2),
3 => $expand!(3),
4 => $expand!(4),
5 => $expand!(5),
6 => $expand!(6),
7 => $expand!(7),
8 => $expand!(8),
9 => $expand!(9),
10 => $expand!(10),
11 => $expand!(11),
12 => $expand!(12),
13 => $expand!(13),
14 => $expand!(14),
15 => $expand!(15),
16 => $expand!(16),
17 => $expand!(17),
18 => $expand!(18),
19 => $expand!(19),
20 => $expand!(20),
21 => $expand!(21),
22 => $expand!(22),
23 => $expand!(23),
24 => $expand!(24),
25 => $expand!(25),
26 => $expand!(26),
27 => $expand!(27),
28 => $expand!(28),
29 => $expand!(29),
30 => $expand!(30),
31 => $expand!(31),
32 => $expand!(32),
33 => $expand!(33),
34 => $expand!(34),
35 => $expand!(35),
36 => $expand!(36),
37 => $expand!(37),
38 => $expand!(38),
39 => $expand!(39),
40 => $expand!(40),
41 => $expand!(41),
42 => $expand!(42),
43 => $expand!(43),
44 => $expand!(44),
45 => $expand!(45),
46 => $expand!(46),
47 => $expand!(47),
48 => $expand!(48),
49 => $expand!(49),
50 => $expand!(50),
51 => $expand!(51),
52 => $expand!(52),
53 => $expand!(53),
54 => $expand!(54),
55 => $expand!(55),
56 => $expand!(56),
57 => $expand!(57),
58 => $expand!(58),
59 => $expand!(59),
60 => $expand!(60),
61 => $expand!(61),
62 => $expand!(62),
63 => $expand!(63),
64 => $expand!(64),
65 => $expand!(65),
66 => $expand!(66),
67 => $expand!(67),
68 => $expand!(68),
69 => $expand!(69),
70 => $expand!(70),
71 => $expand!(71),
72 => $expand!(72),
73 => $expand!(73),
74 => $expand!(74),
75 => $expand!(75),
76 => $expand!(76),
77 => $expand!(77),
78 => $expand!(78),
79 => $expand!(79),
80 => $expand!(80),
81 => $expand!(81),
82 => $expand!(82),
83 => $expand!(83),
84 => $expand!(84),
85 => $expand!(85),
86 => $expand!(86),
87 => $expand!(87),
88 => $expand!(88),
89 => $expand!(89),
90 => $expand!(90),
91 => $expand!(91),
92 => $expand!(92),
93 => $expand!(93),
94 => $expand!(94),
95 => $expand!(95),
96 => $expand!(96),
97 => $expand!(97),
98 => $expand!(98),
99 => $expand!(99),
100 => $expand!(100),
101 => $expand!(101),
102 => $expand!(102),
103 => $expand!(103),
104 => $expand!(104),
105 => $expand!(105),
106 => $expand!(106),
107 => $expand!(107),
108 => $expand!(108),
109 => $expand!(109),
110 => $expand!(110),
111 => $expand!(111),
112 => $expand!(112),
113 => $expand!(113),
114 => $expand!(114),
115 => $expand!(115),
116 => $expand!(116),
117 => $expand!(117),
118 => $expand!(118),
119 => $expand!(119),
120 => $expand!(120),
121 => $expand!(121),
122 => $expand!(122),
123 => $expand!(123),
124 => $expand!(124),
125 => $expand!(125),
126 => $expand!(126),
127 => $expand!(127),
128 => $expand!(128),
129 => $expand!(129),
130 => $expand!(130),
131 => $expand!(131),
132 => $expand!(132),
133 => $expand!(133),
134 => $expand!(134),
135 => $expand!(135),
136 => $expand!(136),
137 => $expand!(137),
138 => $expand!(138),
139 => $expand!(139),
140 => $expand!(140),
141 => $expand!(141),
142 => $expand!(142),
143 => $expand!(143),
144 => $expand!(144),
145 => $expand!(145),
146 => $expand!(146),
147 => $expand!(147),
148 => $expand!(148),
149 => $expand!(149),
150 => $expand!(150),
151 => $expand!(151),
152 => $expand!(152),
153 => $expand!(153),
154 => $expand!(154),
155 => $expand!(155),
156 => $expand!(156),
157 => $expand!(157),
158 => $expand!(158),
159 => $expand!(159),
160 => $expand!(160),
161 => $expand!(161),
162 => $expand!(162),
163 => $expand!(163),
164 => $expand!(164),
165 => $expand!(165),
166 => $expand!(166),
167 => $expand!(167),
168 => $expand!(168),
169 => $expand!(169),
170 => $expand!(170),
171 => $expand!(171),
172 => $expand!(172),
173 => $expand!(173),
174 => $expand!(174),
175 => $expand!(175),
176 => $expand!(176),
177 => $expand!(177),
178 => $expand!(178),
179 => $expand!(179),
180 => $expand!(180),
181 => $expand!(181),
182 => $expand!(182),
183 => $expand!(183),
184 => $expand!(184),
185 => $expand!(185),
186 => $expand!(186),
187 => $expand!(187),
188 => $expand!(188),
189 => $expand!(189),
190 => $expand!(190),
191 => $expand!(191),
192 => $expand!(192),
193 => $expand!(193),
194 => $expand!(194),
195 => $expand!(195),
196 => $expand!(196),
197 => $expand!(197),
198 => $expand!(198),
199 => $expand!(199),
200 => $expand!(200),
201 => $expand!(201),
202 => $expand!(202),
203 => $expand!(203),
204 => $expand!(204),
205 => $expand!(205),
206 => $expand!(206),
207 => $expand!(207),
208 => $expand!(208),
209 => $expand!(209),
210 => $expand!(210),
211 => $expand!(211),
212 => $expand!(212),
213 => $expand!(213),
214 => $expand!(214),
215 => $expand!(215),
216 => $expand!(216),
217 => $expand!(217),
218 => $expand!(218),
219 => $expand!(219),
220 => $expand!(220),
221 => $expand!(221),
222 => $expand!(222),
223 => $expand!(223),
224 => $expand!(224),
225 => $expand!(225),
226 => $expand!(226),
227 => $expand!(227),
228 => $expand!(228),
229 => $expand!(229),
230 => $expand!(230),
231 => $expand!(231),
232 => $expand!(232),
233 => $expand!(233),
234 => $expand!(234),
235 => $expand!(235),
236 => $expand!(236),
237 => $expand!(237),
238 => $expand!(238),
239 => $expand!(239),
240 => $expand!(240),
241 => $expand!(241),
242 => $expand!(242),
243 => $expand!(243),
244 => $expand!(244),
245 => $expand!(245),
246 => $expand!(246),
247 => $expand!(247),
248 => $expand!(248),
249 => $expand!(249),
250 => $expand!(250),
251 => $expand!(251),
252 => $expand!(252),
253 => $expand!(253),
254 => $expand!(254),
_ => $expand!(255),
}
};
}
//immediate value: 0:31
#[allow(unused)]
macro_rules! constify_imm5 {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1_1111 {
0 => $expand!(0),
1 => $expand!(1),
2 => $expand!(2),
3 => $expand!(3),
4 => $expand!(4),
5 => $expand!(5),
6 => $expand!(6),
7 => $expand!(7),
8 => $expand!(8),
9 => $expand!(9),
10 => $expand!(10),
11 => $expand!(11),
12 => $expand!(12),
13 => $expand!(13),
14 => $expand!(14),
15 => $expand!(15),
16 => $expand!(16),
17 => $expand!(17),
18 => $expand!(18),
19 => $expand!(19),
20 => $expand!(20),
21 => $expand!(21),
22 => $expand!(22),
23 => $expand!(23),
24 => $expand!(24),
25 => $expand!(25),
26 => $expand!(26),
27 => $expand!(27),
28 => $expand!(28),
29 => $expand!(29),
30 => $expand!(30),
_ => $expand!(31),
}
};
}
//immediate value: -16:15
#[allow(unused)]
macro_rules! constify_imm5 {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1_1111 {
0 => $expand!(0),
1 => $expand!(1),
2 => $expand!(2),
3 => $expand!(3),
4 => $expand!(4),
5 => $expand!(5),
6 => $expand!(6),
7 => $expand!(7),
8 => $expand!(8),
9 => $expand!(9),
10 => $expand!(10),
11 => $expand!(11),
12 => $expand!(12),
13 => $expand!(13),
14 => $expand!(14),
15 => $expand!(15),
16 => $expand!(16),
17 => $expand!(17),
18 => $expand!(18),
19 => $expand!(19),
20 => $expand!(20),
21 => $expand!(21),
22 => $expand!(22),
23 => $expand!(23),
24 => $expand!(24),
25 => $expand!(25),
26 => $expand!(26),
27 => $expand!(27),
28 => $expand!(28),
29 => $expand!(29),
30 => $expand!(30),
_ => $expand!(31),
}
};
}
//immediate value: 0:16
#[allow(unused)]
macro_rules! constify_imm4 {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1111 {
0 => $expand!(0),
1 => $expand!(1),
2 => $expand!(2),
3 => $expand!(3),
4 => $expand!(4),
5 => $expand!(5),
6 => $expand!(6),
7 => $expand!(7),
8 => $expand!(8),
9 => $expand!(9),
10 => $expand!(10),
11 => $expand!(11),
12 => $expand!(12),
13 => $expand!(13),
14 => $expand!(14),
_ => $expand!(15),
}
};
}
//immediate value: 0:7
#[allow(unused)]
macro_rules! constify_imm3 {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b111 {
0 => $expand!(0),
1 => $expand!(1),
2 => $expand!(2),
3 => $expand!(3),
4 => $expand!(4),
5 => $expand!(5),
6 => $expand!(6),
_ => $expand!(7),
}
};
}
#[allow(unused)]
macro_rules! types {
($(
$(#[$doc:meta])*
pub struct $name:ident($($fields:tt)*);
)*) => ($(
$(#[$doc])*
#[derive(Copy, Clone, Debug)]
#[allow(non_camel_case_types)]
#[repr(simd)]
#[allow(clippy::missing_inline_in_public_items)]
pub struct $name($($fields)*);
)*)
}

View File

@ -0,0 +1,18 @@
//! MIPS
// Building this module (even if unused) for non-fp64 targets fails with an LLVM
// error.
#[cfg(target_feature = "fp64")]
mod msa;
#[cfg(target_feature = "fp64")]
pub use self::msa::*;
#[cfg(test)]
use stdarch_test::assert_instr;
/// Generates the trap instruction `BREAK`
#[cfg_attr(test, assert_instr(break))]
#[inline]
pub unsafe fn break_() -> ! {
crate::intrinsics::abort()
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,257 @@
//! `core_arch`
#[macro_use]
mod macros;
#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))]
mod acle;
mod simd;
#[doc(include = "core_arch_docs.md")]
#[stable(feature = "simd_arch", since = "1.27.0")]
pub mod arch {
/// Platform-specific intrinsics for the `x86` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "x86", doc))]
#[doc(cfg(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub mod x86 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use crate::core_arch::x86::*;
}
/// Platform-specific intrinsics for the `x86_64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "x86_64", doc))]
#[doc(cfg(target_arch = "x86_64"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub mod x86_64 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use crate::core_arch::x86::*;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use crate::core_arch::x86_64::*;
}
/// Platform-specific intrinsics for the `arm` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "arm", doc))]
#[doc(cfg(target_arch = "arm"))]
#[unstable(feature = "stdsimd", issue = "27731")]
pub mod arm {
pub use crate::core_arch::arm::*;
}
/// Platform-specific intrinsics for the `aarch64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "aarch64", doc))]
#[doc(cfg(target_arch = "aarch64"))]
#[unstable(feature = "stdsimd", issue = "27731")]
pub mod aarch64 {
pub use crate::core_arch::{aarch64::*, arm::*};
}
/// Platform-specific intrinsics for the `wasm32` platform.
///
/// This module provides intrinsics specific to the WebAssembly
/// architecture. Here you'll find intrinsics necessary for leveraging
/// WebAssembly proposals such as [atomics] and [simd]. These proposals are
/// evolving over time and as such the support here is unstable and requires
/// the nightly channel. As WebAssembly proposals stabilize these functions
/// will also become stable.
///
/// [atomics]: https://github.com/webassembly/threads
/// [simd]: https://github.com/webassembly/simd
///
/// See the [module documentation](../index.html) for general information
/// about the `arch` module and platform intrinsics.
///
/// ## Atomics
///
/// The [threads proposal][atomics] for WebAssembly adds a number of
/// instructions for dealing with multithreaded programs. Atomic
/// instructions can all be generated through `std::sync::atomic` types, but
/// some instructions have no equivalent in Rust such as
/// `memory.atomic.notify` so this module will provide these intrinsics.
///
/// At this time, however, these intrinsics are only available **when the
/// standard library itself is compiled with atomics**. Compiling with
/// atomics is not enabled by default and requires passing
/// `-Ctarget-feature=+atomics` to rustc. The standard library shipped via
/// `rustup` is not compiled with atomics. To get access to these intrinsics
/// you'll need to compile the standard library from source with the
/// requisite compiler flags.
///
/// ## SIMD
///
/// The [simd proposal][simd] for WebAssembly adds a new `v128` type for a
/// 128-bit SIMD register. It also adds a large array of instructions to
/// operate on the `v128` type to perform data processing. The SIMD proposal
/// has been in progress for quite some time and many instructions have come
/// and gone. This module attempts to keep up with the proposal, but if you
/// notice anything awry please feel free to [open an
/// issue](https://github.com/rust-lang/stdarch/issues/new).
///
/// It's important to be aware that the current state of development of SIMD
/// in WebAssembly is still somewhat early days. There's lots of pieces to
/// demo and prototype with, but discussions and support are still in
/// progress. There's a number of pitfalls and gotchas in various places,
/// which will attempt to be documented here, but there may be others
/// lurking!
///
/// Using SIMD is intended to be similar to as you would on `x86_64`, for
/// example. You'd write a function such as:
///
/// ```rust,ignore
/// #[cfg(target_arch = "wasm32")]
/// #[target_feature(enable = "simd128")]
/// unsafe fn uses_simd() {
/// use std::arch::wasm32::*;
/// // ...
/// }
/// ```
///
/// Unlike `x86_64`, however, WebAssembly does not currently have dynamic
/// detection at runtime as to whether SIMD is supported (this is one of the
/// motivators for the [conditional sections proposal][condsections], but
/// that is still pretty early days). This means that your binary will
/// either have SIMD and can only run on engines which support SIMD, or it
/// will not have SIMD at all. For compatibility the standard library itself
/// does not use any SIMD internally. Determining how best to ship your
/// WebAssembly binary with SIMD is largely left up to you as it can can be
/// pretty nuanced depending on your situation.
///
/// [condsections]: https://github.com/webassembly/conditional-sections
///
/// To enable SIMD support at compile time you need to do one of two things:
///
/// * First you can annotate functions with `#[target_feature(enable =
/// "simd128")]`. This causes just that one function to have SIMD support
/// available to it, and intrinsics will get inlined as usual in this
/// situation.
///
/// * Second you can compile your program with `-Ctarget-feature=+simd128`.
/// This compilation flag blanket enables SIMD support for your entire
/// compilation. Note that this does not include the standard library
/// unless you recompile the standard library.
///
/// If you enable SIMD via either of these routes then you'll have a
/// WebAssembly binary that uses SIMD instructions, and you'll need to ship
/// that accordingly. Also note that if you call SIMD intrinsics but don't
/// enable SIMD via either of these mechanisms, you'll still have SIMD
/// generated in your program. This means to generate a binary without SIMD
/// you'll need to avoid both options above plus calling into any intrinsics
/// in this module.
///
/// > **Note**: Due to
/// > [rust-lang/rust#74320](https://github.com/rust-lang/rust/issues/74320)
/// > it's recommended to compile your entire program with SIMD support
/// > (using `RUSTFLAGS`) or otherwise functions may not be inlined
/// > correctly.
///
/// > **Note**: LLVM's SIMD support is actually split into two features:
/// > `simd128` and `unimplemented-simd128`. Rust code can enable `simd128`
/// > with `#[target_feature]` (and test for it with `#[cfg(target_feature =
/// > "simd128")]`, but it cannot enable `unimplemented-simd128`. The only
/// > way to enable this feature is to compile with
/// > `-Ctarget-feature=+simd128,+unimplemented-simd128`. This second
/// > feature enables more recent instructions implemented in LLVM which
/// > haven't always had enough time to make their way to runtimes.
#[cfg(any(target_arch = "wasm32", doc))]
#[doc(cfg(target_arch = "wasm32"))]
#[stable(feature = "simd_wasm32", since = "1.33.0")]
pub mod wasm32 {
#[stable(feature = "simd_wasm32", since = "1.33.0")]
pub use crate::core_arch::wasm32::*;
}
/// Platform-specific intrinsics for the `mips` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "mips", doc))]
#[doc(cfg(target_arch = "mips"))]
#[unstable(feature = "stdsimd", issue = "27731")]
pub mod mips {
pub use crate::core_arch::mips::*;
}
/// Platform-specific intrinsics for the `mips64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "mips64", doc))]
#[doc(cfg(target_arch = "mips64"))]
#[unstable(feature = "stdsimd", issue = "27731")]
pub mod mips64 {
pub use crate::core_arch::mips::*;
}
/// Platform-specific intrinsics for the `PowerPC` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "powerpc", doc))]
#[doc(cfg(target_arch = "powerpc"))]
#[unstable(feature = "stdsimd", issue = "27731")]
pub mod powerpc {
pub use crate::core_arch::powerpc::*;
}
/// Platform-specific intrinsics for the `PowerPC64` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "powerpc64", doc))]
#[doc(cfg(target_arch = "powerpc64"))]
#[unstable(feature = "stdsimd", issue = "27731")]
pub mod powerpc64 {
pub use crate::core_arch::powerpc64::*;
}
/// Platform-specific intrinsics for the `NVPTX` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64", doc))]
#[doc(cfg(any(target_arch = "nvptx", target_arch = "nvptx64")))]
#[unstable(feature = "stdsimd", issue = "27731")]
pub mod nvptx {
pub use crate::core_arch::nvptx::*;
}
}
mod simd_llvm;
#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))]
#[doc(cfg(any(target_arch = "x86", target_arch = "x86_64")))]
mod x86;
#[cfg(any(target_arch = "x86_64", doc))]
#[doc(cfg(target_arch = "x86_64"))]
mod x86_64;
#[cfg(any(target_arch = "aarch64", doc))]
#[doc(cfg(target_arch = "aarch64"))]
mod aarch64;
#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))]
#[doc(cfg(any(target_arch = "arm", target_arch = "aarch64")))]
mod arm;
#[cfg(any(target_arch = "wasm32", doc))]
#[doc(cfg(target_arch = "wasm32"))]
mod wasm32;
#[cfg(any(target_arch = "mips", target_arch = "mips64", doc))]
#[doc(cfg(any(target_arch = "mips", target_arch = "mips64")))]
mod mips;
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", doc))]
#[doc(cfg(any(target_arch = "powerpc", target_arch = "powerpc64")))]
mod powerpc;
#[cfg(any(target_arch = "powerpc64", doc))]
#[doc(cfg(target_arch = "powerpc64"))]
mod powerpc64;
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64", doc))]
#[doc(cfg(any(target_arch = "nvptx", target_arch = "nvptx64")))]
mod nvptx;

View File

@ -0,0 +1,213 @@
//! NVPTX intrinsics (experimental)
//!
//! These intrinsics form the foundation of the CUDA
//! programming model.
//!
//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
//! the [LLVM NVPTX Backend documentation][llvm_docs].
//!
//! [cuda_c]:
//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
//! [llvm_docs]:
//! https://llvm.org/docs/NVPTXUsage.html
use crate::ffi::c_void;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.nvvm.barrier0"]
fn syncthreads() -> ();
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
fn block_dim_x() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
fn block_dim_y() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
fn block_dim_z() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
fn block_idx_x() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
fn block_idx_y() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
fn block_idx_z() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
fn grid_dim_x() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
fn grid_dim_y() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
fn grid_dim_z() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
fn thread_idx_x() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
fn thread_idx_y() -> i32;
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
fn thread_idx_z() -> i32;
}
/// Synchronizes all threads in the block.
#[inline]
pub unsafe fn _syncthreads() -> () {
syncthreads()
}
/// x-th thread-block dimension.
#[inline]
pub unsafe fn _block_dim_x() -> i32 {
block_dim_x()
}
/// y-th thread-block dimension.
#[inline]
pub unsafe fn _block_dim_y() -> i32 {
block_dim_y()
}
/// z-th thread-block dimension.
#[inline]
pub unsafe fn _block_dim_z() -> i32 {
block_dim_z()
}
/// x-th thread-block index.
#[inline]
pub unsafe fn _block_idx_x() -> i32 {
block_idx_x()
}
/// y-th thread-block index.
#[inline]
pub unsafe fn _block_idx_y() -> i32 {
block_idx_y()
}
/// z-th thread-block index.
#[inline]
pub unsafe fn _block_idx_z() -> i32 {
block_idx_z()
}
/// x-th block-grid dimension.
#[inline]
pub unsafe fn _grid_dim_x() -> i32 {
grid_dim_x()
}
/// y-th block-grid dimension.
#[inline]
pub unsafe fn _grid_dim_y() -> i32 {
grid_dim_y()
}
/// z-th block-grid dimension.
#[inline]
pub unsafe fn _grid_dim_z() -> i32 {
grid_dim_z()
}
/// x-th thread index.
#[inline]
pub unsafe fn _thread_idx_x() -> i32 {
thread_idx_x()
}
/// y-th thread index.
#[inline]
pub unsafe fn _thread_idx_y() -> i32 {
thread_idx_y()
}
/// z-th thread index.
#[inline]
pub unsafe fn _thread_idx_z() -> i32 {
thread_idx_z()
}
/// Generates the trap instruction `TRAP`
#[inline]
pub unsafe fn trap() -> ! {
crate::intrinsics::abort()
}
// Basic CUDA syscall declarations.
extern "C" {
/// Print formatted output from a kernel to a host-side output stream.
///
/// Syscall arguments:
/// * `status`: The status value that is returned by `vprintf`.
/// * `format`: A pointer to the format specifier input (uses common `printf` format).
/// * `valist`: A pointer to the valist input.
///
/// ```
/// #[repr(C)]
/// struct PrintArgs(f32, f32, f32, i32);
///
/// vprintf(
/// "int(%f + %f) = int(%f) = %d\n".as_ptr(),
/// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
/// );
/// ```
///
/// Sources:
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;
/// Allocate memory dynamically from a fixed-size heap in global memory.
///
/// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
/// from the device heap and returns a pointer to the allocated memory
/// or `NULL` if insufficient memory exists to fulfill the request.
///
/// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
///
/// The memory allocated by a given CUDA thread via `malloc()` remains allocated
/// for the lifetime of the CUDA context, or until it is explicitly released
/// by a call to `free()`. It can be used by any other CUDA threads
/// even from subsequent kernel launches.
///
/// Sources:
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
// FIXME(denzp): assign `malloc` and `nothrow` attributes.
pub fn malloc(size: usize) -> *mut c_void;
/// Free previously dynamically allocated memory.
///
/// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
/// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
/// the call to `free()` is ignored.
///
/// Any CUDA thread may free memory allocated by another thread, but care should be taken
/// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
/// with the same `ptr` has undefined behavior.
///
/// Sources:
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
// FIXME(denzp): assign `nothrow` attribute.
pub fn free(ptr: *mut c_void);
// Internal declaration of the syscall. Exported variant has
// the `char_size` parameter set to `1` (single char size in bytes).
fn __assertfail(
message: *const u8,
file: *const u8,
line: u32,
function: *const u8,
char_size: usize,
);
}
/// Syscall to be used whenever the *assert expression produces a `false` value*.
///
/// Syscall arguments:
/// * `message`: The pointer to the string that should be output.
/// * `file`: The pointer to the file name string associated with the assert.
/// * `line`: The line number associated with the assert.
/// * `function`: The pointer to the function name string associated with the assert.
///
/// Source:
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
#[inline]
pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
__assertfail(message, file, line, function, 1)
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,19 @@
//! PowerPC intrinsics
#[cfg(target_feature = "altivec")]
mod altivec;
#[cfg(target_feature = "altivec")]
pub use self::altivec::*;
mod vsx;
pub use self::vsx::*;
#[cfg(test)]
use stdarch_test::assert_instr;
/// Generates the trap instruction `TRAP`
#[cfg_attr(test, assert_instr(trap))]
#[inline]
pub unsafe fn trap() -> ! {
crate::intrinsics::abort()
}

View File

@ -0,0 +1,117 @@
//! PowerPC Vector Scalar eXtensions (VSX) intrinsics.
//!
//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
//! NVlink)] and [POWER ISA v3.0B (for POWER9)].
//!
//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
#![allow(non_camel_case_types)]
use crate::core_arch::simd_llvm::*;
#[cfg(test)]
use stdarch_test::assert_instr;
use crate::mem;
types! {
// pub struct vector_Float16 = f16x8;
/// PowerPC-specific 128-bit wide vector of two packed `i64`
pub struct vector_signed_long(i64, i64);
/// PowerPC-specific 128-bit wide vector of two packed `u64`
pub struct vector_unsigned_long(u64, u64);
/// PowerPC-specific 128-bit wide vector mask of two elements
pub struct vector_bool_long(i64, i64);
/// PowerPC-specific 128-bit wide vector of two packed `f64`
pub struct vector_double(f64, f64);
// pub struct vector_signed_long_long = vector_signed_long;
// pub struct vector_unsigned_long_long = vector_unsigned_long;
// pub struct vector_bool_long_long = vector_bool_long;
// pub struct vector_signed___int128 = i128x1;
// pub struct vector_unsigned___int128 = i128x1;
}
mod sealed {
use super::*;
use crate::core_arch::simd::*;
pub trait VectorPermDI {
unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self;
}
// xxpermdi has an big-endian bias and extended mnemonics
#[inline]
#[target_feature(enable = "vsx")]
#[cfg_attr(all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0))]
#[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))]
unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 {
match dm & 0b11 {
0 => simd_shuffle2(a, b, [0b00, 0b10]),
1 => simd_shuffle2(a, b, [0b01, 0b10]),
2 => simd_shuffle2(a, b, [0b00, 0b11]),
_ => simd_shuffle2(a, b, [0b01, 0b11]),
}
}
macro_rules! vec_xxpermdi {
{$impl: ident} => {
impl VectorPermDI for $impl {
#[inline]
#[target_feature(enable = "vsx")]
unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self {
mem::transmute(xxpermdi(mem::transmute(self), mem::transmute(b), dm))
}
}
}
}
vec_xxpermdi! { vector_unsigned_long }
vec_xxpermdi! { vector_signed_long }
vec_xxpermdi! { vector_bool_long }
vec_xxpermdi! { vector_double }
}
/// Vector permute.
#[inline]
#[target_feature(enable = "vsx")]
#[rustc_args_required_const(2)]
pub unsafe fn vec_xxpermdi<T>(a: T, b: T, dm: u8) -> T
where
T: sealed::VectorPermDI,
{
a.vec_xxpermdi(b, dm)
}
#[cfg(test)]
mod tests {
#[cfg(target_arch = "powerpc")]
use crate::core_arch::arch::powerpc::*;
#[cfg(target_arch = "powerpc64")]
use crate::core_arch::arch::powerpc64::*;
use super::mem;
use crate::core_arch::simd::*;
use stdarch_test::simd_test;
macro_rules! test_vec_xxpermdi {
{$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
#[simd_test(enable = "vsx")]
unsafe fn $name() {
let a: $longtype = mem::transmute($shorttype::new($($a),+, $($b),+));
let b = mem::transmute($shorttype::new($($c),+, $($d),+));
assert_eq!($shorttype::new($($a),+, $($c),+), mem::transmute(vec_xxpermdi(a, b, 0)));
assert_eq!($shorttype::new($($b),+, $($c),+), mem::transmute(vec_xxpermdi(a, b, 1)));
assert_eq!($shorttype::new($($a),+, $($d),+), mem::transmute(vec_xxpermdi(a, b, 2)));
assert_eq!($shorttype::new($($b),+, $($d),+), mem::transmute(vec_xxpermdi(a, b, 3)));
}
}
}
test_vec_xxpermdi! {test_vec_xxpermdi_u64x2, u64x2, vector_unsigned_long, [0], [1], [2], [3]}
test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]}
test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]}
test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]}
}

View File

@ -0,0 +1,8 @@
//! PowerPC 64
//!
//! The reference is the [64-Bit ELF V2 ABI Specification - Power
//! Architecture].
//!
//! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
pub use crate::core_arch::powerpc::*;

View File

@ -0,0 +1,711 @@
//! Internal `#[repr(simd)]` types
#![allow(non_camel_case_types)]
macro_rules! simd_ty {
($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct $id($(pub $elem_ty),*);
#[allow(clippy::use_self)]
impl $id {
#[inline]
pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
$id($($elem_name),*)
}
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) const fn splat(value: $ety) -> Self {
$id($({
#[allow(non_camel_case_types, dead_code)]
struct $elem_name;
value
}),*)
}
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) fn extract(self, index: usize) -> $ety {
unsafe {
crate::core_arch::simd_llvm::simd_extract(self, index as u32)
}
}
}
}
}
macro_rules! simd_m_ty {
($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct $id($(pub $elem_ty),*);
#[allow(clippy::use_self)]
impl $id {
#[inline]
const fn bool_to_internal(x: bool) -> $ety {
[0 as $ety, !(0 as $ety)][x as usize]
}
#[inline]
pub(crate) const fn new($($elem_name: bool),*) -> Self {
$id($(Self::bool_to_internal($elem_name)),*)
}
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) const fn splat(value: bool) -> Self {
$id($({
#[allow(non_camel_case_types, dead_code)]
struct $elem_name;
Self::bool_to_internal(value)
}),*)
}
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) fn extract(self, index: usize) -> bool {
let r: $ety = unsafe {
crate::core_arch::simd_llvm::simd_extract(self, index as u32)
};
r != 0
}
}
}
}
// 16-bit wide types:
simd_ty!(u8x2[u8]: u8, u8 | x0, x1);
simd_ty!(i8x2[i8]: i8, i8 | x0, x1);
// 32-bit wide types:
simd_ty!(u8x4[u8]: u8, u8, u8, u8 | x0, x1, x2, x3);
simd_ty!(u16x2[u16]: u16, u16 | x0, x1);
simd_ty!(i8x4[i8]: i8, i8, i8, i8 | x0, x1, x2, x3);
simd_ty!(i16x2[i16]: i16, i16 | x0, x1);
// 64-bit wide types:
simd_ty!(
u8x8[u8]: u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3);
simd_ty!(u32x2[u32]: u32, u32 | x0, x1);
simd_ty!(u64x1[u64]: u64 | x1);
simd_ty!(
i8x8[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3);
simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
simd_ty!(i64x1[i64]: i64 | x1);
simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
// 128-bit wide types:
simd_ty!(
u8x16[u8]: u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
u16x8[u16]: u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3);
simd_ty!(u64x2[u64]: u64, u64 | x0, x1);
simd_ty!(
i8x16[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
i16x8[i16]: i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
simd_ty!(i64x2[i64]: i64, i64 | x0, x1);
simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3);
simd_ty!(f64x2[f64]: f64, f64 | x0, x1);
simd_m_ty!(
m8x16[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_m_ty!(
m16x8[i16]: i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1);
// 256-bit wide types:
simd_ty!(
u8x32[u8]: u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_ty!(
u16x16[u16]: u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
u32x8[u32]: u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3);
simd_ty!(
i8x32[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15,
x16,
x17,
x18,
x19,
x20,
x21,
x22,
x23,
x24,
x25,
x26,
x27,
x28,
x29,
x30,
x31
);
simd_ty!(
i16x16[i16]: i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
i32x8[i32]: i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
simd_ty!(
f32x8[f32]: f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
// 512-bit wide types:
simd_ty!(
i32x16[i32]: i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
u32x16[u32]: u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
f32x16[f32]: f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7,
x8,
x9,
x10,
x11,
x12,
x13,
x14,
x15
);
simd_ty!(
i64x8[i64]: i64,
i64,
i64,
i64,
i64,
i64,
i64,
i64 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(
u64x8[u64]: u64,
u64,
u64,
u64,
u64,
u64,
u64,
u64 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);
simd_ty!(
f64x8[f64]: f64,
f64,
f64,
f64,
f64,
f64,
f64,
f64 | x0,
x1,
x2,
x3,
x4,
x5,
x6,
x7
);

View File

@ -0,0 +1,86 @@
//! LLVM's SIMD platform intrinsics
extern "platform-intrinsic" {
//pub fn simd_select_bitmask
pub fn simd_eq<T, U>(x: T, y: T) -> U;
pub fn simd_ne<T, U>(x: T, y: T) -> U;
pub fn simd_lt<T, U>(x: T, y: T) -> U;
pub fn simd_le<T, U>(x: T, y: T) -> U;
pub fn simd_gt<T, U>(x: T, y: T) -> U;
pub fn simd_ge<T, U>(x: T, y: T) -> U;
#[rustc_args_required_const(2)]
pub fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
#[rustc_args_required_const(2)]
pub fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
#[rustc_args_required_const(2)]
pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
#[rustc_args_required_const(2)]
pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
#[rustc_args_required_const(2)]
pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
#[rustc_args_required_const(2)]
pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
#[rustc_args_required_const(2)]
pub fn simd_shuffle128<T, U>(x: T, y: T, idx: [u32; 128]) -> U;
#[rustc_const_unstable(feature = "const_simd_insert", issue = "none")]
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
#[rustc_const_unstable(feature = "const_simd_extract", issue = "none")]
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
//pub fn simd_select
pub fn simd_bitmask<T, U>(x: T) -> U;
pub fn simd_cast<T, U>(x: T) -> U;
pub fn simd_add<T>(x: T, y: T) -> T;
pub fn simd_sub<T>(x: T, y: T) -> T;
pub fn simd_mul<T>(x: T, y: T) -> T;
pub fn simd_div<T>(x: T, y: T) -> T;
pub fn simd_shl<T>(x: T, y: T) -> T;
pub fn simd_shr<T>(x: T, y: T) -> T;
pub fn simd_and<T>(x: T, y: T) -> T;
pub fn simd_or<T>(x: T, y: T) -> T;
pub fn simd_xor<T>(x: T, y: T) -> T;
pub fn simd_saturating_add<T>(x: T, y: T) -> T;
pub fn simd_saturating_sub<T>(x: T, y: T) -> T;
pub fn simd_gather<T, U, V>(values: T, pointers: U, mask: V) -> T;
pub fn simd_scatter<T, U, V>(values: T, pointers: U, mask: V);
pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;
pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
pub fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
pub fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
pub fn simd_reduce_min<T, U>(x: T) -> U;
pub fn simd_reduce_max<T, U>(x: T) -> U;
pub fn simd_reduce_min_nanless<T, U>(x: T) -> U;
pub fn simd_reduce_max_nanless<T, U>(x: T) -> U;
pub fn simd_reduce_and<T, U>(x: T) -> U;
pub fn simd_reduce_or<T, U>(x: T) -> U;
pub fn simd_reduce_xor<T, U>(x: T) -> U;
pub fn simd_reduce_all<T>(x: T) -> bool;
pub fn simd_reduce_any<T>(x: T) -> bool;
pub fn simd_select<M, T>(m: M, a: T, b: T) -> T;
pub fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
pub fn simd_fmin<T>(a: T, b: T) -> T;
pub fn simd_fmax<T>(a: T, b: T) -> T;
pub fn simd_fsqrt<T>(a: T) -> T;
pub fn simd_fsin<T>(a: T) -> T;
pub fn simd_fcos<T>(a: T) -> T;
pub fn simd_fabs<T>(a: T) -> T;
pub fn simd_floor<T>(a: T) -> T;
pub fn simd_ceil<T>(a: T) -> T;
pub fn simd_fexp<T>(a: T) -> T;
pub fn simd_fexp2<T>(a: T) -> T;
pub fn simd_flog10<T>(a: T) -> T;
pub fn simd_flog2<T>(a: T) -> T;
pub fn simd_flog<T>(a: T) -> T;
//pub fn simd_fpowi
//pub fn simd_fpow
pub fn simd_fma<T>(a: T, b: T, c: T) -> T;
}

View File

@ -0,0 +1,85 @@
//! 64-bit wide vector types
use crate::prelude::v1::*;
use crate::core_arch::simd_llvm::*;
define_ty_doc! {
f32x2, f32, f32 |
/// A 64-bit vector with 2 `f32` lanes.
}
define_impl! { f32x2, f32, 2, i32x2, x0, x1 }
define_ty_doc! {
u32x2, u32, u32 |
/// A 64-bit vector with 2 `u32` lanes.
}
define_impl! { u32x2, u32, 2, i32x2, x0, x1 }
define_ty! { i32x2, i32, i32 }
define_impl! { i32x2, i32, 2, i32x2, x0, x1 }
define_ty! { u16x4, u16, u16, u16, u16 }
define_impl! { u16x4, u16, 4, i16x4, x0, x1, x2, x3 }
define_ty! { i16x4, i16, i16, i16, i16 }
define_impl! { i16x4, i16, 4, i16x4, x0, x1, x2, x3 }
define_ty! { u8x8, u8, u8, u8, u8, u8, u8, u8, u8 }
define_impl! { u8x8, u8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 }
define_ty! { i8x8, i8, i8, i8, i8, i8, i8, i8, i8 }
define_impl! { i8x8, i8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 }
define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8);
define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8);
define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8);
define_from!(i16x4, u32x2, i32x2, u16x4, u8x8, i8x8);
define_from!(u8x8, u32x2, i32x2, u16x4, i16x4, i8x8);
define_from!(i8x8, u32x2, i32x2, u16x4, i16x4, u8x8);
define_common_ops!(f32x2, u32x2, i32x2, u16x4, i16x4, u8x8, i8x8);
define_float_ops!(f32x2);
define_integer_ops!(
(u32x2, u32),
(i32x2, i32),
(u16x4, u16),
(i16x4, i16),
(u8x8, u8),
(i8x8, i8)
);
define_signed_integer_ops!(i32x2, i16x4, i8x8);
define_casts!(
(f32x2, f64x2, as_f64x2),
(f32x2, u32x2, as_u32x2),
(f32x2, i32x2, as_i32x2),
(u32x2, f32x2, as_f32x2),
(u32x2, i32x2, as_i32x2),
(i32x2, f32x2, as_f32x2),
(i32x2, u32x2, as_u32x2),
(u16x4, i16x4, as_i16x4),
(i16x4, u16x4, as_u16x4),
(u8x8, i8x8, as_i8x8),
(i8x8, u8x8, as_u8x8),
(i8x8, i16x8, as_i16x8),
(u8x8, i16x8, as_i16x8),
(i16x4, i32x4, as_i32x4),
(i32x2, i64x2, as_i64x2),
(u8x8, u16x8, as_u16x8),
(u16x4, u32x4, as_u32x4),
(u16x4, i32x4, as_i32x4),
(u32x2, u64x2, as_u64x2),
(u32x2, i64x2, as_i64x2)
);
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn operators() {
test_ops_si!(i8x8, i16x4, i32x2);
test_ops_ui!(u8x8, u16x4, u32x2);
test_ops_f!(f32x2);
}
}

View File

@ -0,0 +1,118 @@
//! Intrinsics associated with WebAssembly's upcoming threads proposal.
//!
//! These intrinsics are all unstable because they're not actually stable in
//! WebAssembly itself yet. The signatures may change as [the
//! specification][spec] is updated.
//!
//! [spec]: https://github.com/WebAssembly/threads
#![cfg(any(target_feature = "atomics", doc))]
#[cfg(test)]
use stdarch_test::assert_instr;
extern "C" {
#[link_name = "llvm.wasm.atomic.wait.i32"]
fn llvm_atomic_wait_i32(ptr: *mut i32, exp: i32, timeout: i64) -> i32;
#[link_name = "llvm.wasm.atomic.wait.i64"]
fn llvm_atomic_wait_i64(ptr: *mut i64, exp: i64, timeout: i64) -> i32;
#[link_name = "llvm.wasm.atomic.notify"]
fn llvm_atomic_notify(ptr: *mut i32, cnt: i32) -> i32;
}
/// Corresponding intrinsic to wasm's [`memory.atomic.wait32` instruction][instr]
///
/// This function, when called, will block the current thread if the memory
/// pointed to by `ptr` is equal to `expression` (performing this action
/// atomically).
///
/// The argument `timeout_ns` is a maxinum number of nanoseconds the calling
/// thread will be blocked for, if it blocks. If the timeout is negative then
/// the calling thread will be blocked forever.
///
/// The calling thread can only be woken up with a call to the `wake` intrinsic
/// once it has been blocked. Changing the memory behind `ptr` will not wake
/// the thread once it's blocked.
///
/// # Return value
///
/// * 0 - indicates that the thread blocked and then was woken up
/// * 1 - the loaded value from `ptr` didn't match `expression`, the thread
/// didn't block
/// * 2 - the thread blocked, but the timeout expired.
///
/// # Availability
///
/// This intrinsic is only available **when the standard library itself is
/// compiled with the `atomics` target feature**. This version of the standard
/// library is not obtainable via `rustup`, but rather will require the
/// standard library to be compiled from source.
///
/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
#[inline]
#[cfg_attr(test, assert_instr("i32.atomic.wait"))]
pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 {
llvm_atomic_wait_i32(ptr, expression, timeout_ns)
}
/// Corresponding intrinsic to wasm's [`memory.atomic.wait64` instruction][instr]
///
/// This function, when called, will block the current thread if the memory
/// pointed to by `ptr` is equal to `expression` (performing this action
/// atomically).
///
/// The argument `timeout_ns` is a maxinum number of nanoseconds the calling
/// thread will be blocked for, if it blocks. If the timeout is negative then
/// the calling thread will be blocked forever.
///
/// The calling thread can only be woken up with a call to the `wake` intrinsic
/// once it has been blocked. Changing the memory behind `ptr` will not wake
/// the thread once it's blocked.
///
/// # Return value
///
/// * 0 - indicates that the thread blocked and then was woken up
/// * 1 - the loaded value from `ptr` didn't match `expression`, the thread
/// didn't block
/// * 2 - the thread blocked, but the timeout expired.
///
/// # Availability
///
/// This intrinsic is only available **when the standard library itself is
/// compiled with the `atomics` target feature**. This version of the standard
/// library is not obtainable via `rustup`, but rather will require the
/// standard library to be compiled from source.
///
/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
#[inline]
#[cfg_attr(test, assert_instr("i64.atomic.wait"))]
pub unsafe fn memory_atomic_wait64(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 {
llvm_atomic_wait_i64(ptr, expression, timeout_ns)
}
/// Corresponding intrinsic to wasm's [`memory.atomic.notify` instruction][instr]
///
/// This function will notify a number of threads blocked on the address
/// indicated by `ptr`. Threads previously blocked with the `i32_atomic_wait`
/// and `i64_atomic_wait` functions above will be woken up.
///
/// The `waiters` argument indicates how many waiters should be woken up (a
/// maximum). If the value is zero no waiters are woken up.
///
/// # Return value
///
/// Returns the number of waiters which were actually notified.
///
/// # Availability
///
/// This intrinsic is only available **when the standard library itself is
/// compiled with the `atomics` target feature**. This version of the standard
/// library is not obtainable via `rustup`, but rather will require the
/// standard library to be compiled from source.
///
/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
#[inline]
#[cfg_attr(test, assert_instr("atomic.wake"))]
pub unsafe fn memory_atomic_notify(ptr: *mut i32, waiters: u32) -> u32 {
llvm_atomic_notify(ptr, waiters as i32) as u32
}

View File

@ -0,0 +1,62 @@
#[cfg(test)]
use stdarch_test::assert_instr;
extern "C" {
#[link_name = "llvm.wasm.memory.grow.i32"]
fn llvm_memory_grow(mem: i32, pages: i32) -> i32;
#[link_name = "llvm.wasm.memory.size.i32"]
fn llvm_memory_size(mem: i32) -> i32;
}
/// Corresponding intrinsic to wasm's [`memory.size` instruction][instr]
///
/// This function, when called, will return the current memory size in units of
/// pages. The current WebAssembly page size is 65536 bytes (64 KB).
///
/// The argument `mem` is the numerical index of which memory to return the
/// size of. Note that currently the WebAssembly specification only supports one
/// memory, so it is required that zero is passed in. The argument is present to
/// be forward-compatible with future WebAssembly revisions. If a nonzero
/// argument is passed to this function it will currently unconditionally abort.
///
/// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-size
#[inline]
#[cfg_attr(test, assert_instr("memory.size", mem = 0))]
#[rustc_args_required_const(0)]
#[stable(feature = "simd_wasm32", since = "1.33.0")]
pub fn memory_size(mem: u32) -> usize {
unsafe {
if mem != 0 {
crate::intrinsics::abort();
}
llvm_memory_size(0) as usize
}
}
/// Corresponding intrinsic to wasm's [`memory.grow` instruction][instr]
///
/// This function, when called, will attempt to grow the default linear memory
/// by the specified `delta` of pages. The current WebAssembly page size is
/// 65536 bytes (64 KB). If memory is successfully grown then the previous size
/// of memory, in pages, is returned. If memory cannot be grown then
/// `usize::MAX` is returned.
///
/// The argument `mem` is the numerical index of which memory to return the
/// size of. Note that currently the WebAssembly specification only supports one
/// memory, so it is required that zero is passed in. The argument is present to
/// be forward-compatible with future WebAssembly revisions. If a nonzero
/// argument is passed to this function it will currently unconditionally abort.
///
/// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-grow
#[inline]
#[cfg_attr(test, assert_instr("memory.grow", mem = 0))]
#[rustc_args_required_const(0)]
#[stable(feature = "simd_wasm32", since = "1.33.0")]
pub fn memory_grow(mem: u32, delta: usize) -> usize {
unsafe {
if mem != 0 {
crate::intrinsics::abort();
}
llvm_memory_grow(0, delta as i32) as isize as usize
}
}

View File

@ -0,0 +1,23 @@
//! WASM32 intrinsics
#[cfg(test)]
use stdarch_test::assert_instr;
#[cfg(any(target_feature = "atomics", doc))]
mod atomic;
#[cfg(any(target_feature = "atomics", doc))]
pub use self::atomic::*;
mod simd128;
pub use self::simd128::*;
mod memory;
pub use self::memory::*;
/// Generates the trap instruction `UNREACHABLE`
#[cfg_attr(test, assert_instr(unreachable))]
#[inline]
#[stable(feature = "unreachable_wasm32", since = "1.37.0")]
pub unsafe fn unreachable() -> ! {
crate::intrinsics::abort()
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,62 @@
//! Advanced Bit Manipulation (ABM) instructions
//!
//! The POPCNT and LZCNT have their own CPUID bits to indicate support.
//!
//! The references are:
//!
//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
//! Instruction Set Reference, A-Z][intel64_ref].
//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
//! System Instructions][amd64_ref].
//!
//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
//! available.
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
//! [wikipedia_bmi]:
//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
#[cfg(test)]
use stdarch_test::assert_instr;
/// Counts the leading most significant zero bits.
///
/// When the operand is zero, it returns its size in bits.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32)
#[inline]
#[target_feature(enable = "lzcnt")]
#[cfg_attr(test, assert_instr(lzcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
x.leading_zeros()
}
/// Counts the bits that are set.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt32)
#[inline]
#[target_feature(enable = "popcnt")]
#[cfg_attr(test, assert_instr(popcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _popcnt32(x: i32) -> i32 {
x.count_ones() as i32
}
#[cfg(test)]
mod tests {
use stdarch_test::simd_test;
use crate::core_arch::x86::*;
#[simd_test(enable = "lzcnt")]
unsafe fn test_lzcnt_u32() {
assert_eq!(_lzcnt_u32(0b0101_1010), 25);
}
#[simd_test(enable = "popcnt")]
unsafe fn test_popcnt32() {
assert_eq!(_popcnt32(0b0101_1010), 4);
}
}

Some files were not shown because too many files have changed in this diff Show More