mirror of git://gcc.gnu.org/git/gcc.git
gccrs: Import stdarch 1.49.0
This commit imports stdarch 1.49.0 into libgrust/rustc-lib/stdarch. This is necessary for compiling libcore, as libcore attempts to reuse some files from stdarch. libgrust/ChangeLog: * rustc-lib/stdarch/.cirrus.yml: New file. * rustc-lib/stdarch/.github/workflows/main.yml: New file. * rustc-lib/stdarch/.gitignore: New file. * rustc-lib/stdarch/CONTRIBUTING.md: New file. * rustc-lib/stdarch/Cargo.toml: New file. * rustc-lib/stdarch/LICENSE-APACHE: New file. * rustc-lib/stdarch/LICENSE-MIT: New file. * rustc-lib/stdarch/README.md: New file. * rustc-lib/stdarch/ci/android-install-ndk.sh: New file. * rustc-lib/stdarch/ci/android-install-sdk.sh: New file. * rustc-lib/stdarch/ci/android-sysimage.sh: New file. * rustc-lib/stdarch/ci/docker/aarch64-linux-android/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/arm-linux-androideabi/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/i586-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/i686-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/mips-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/mipsel-unknown-linux-musl/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/nvptx64-nvidia-cuda/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/powerpc-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/s390x-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/wasm32-wasi/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/x86_64-linux-android/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile: New file. * rustc-lib/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile: New file. * rustc-lib/stdarch/ci/dox.sh: New file. * rustc-lib/stdarch/ci/gba.json: New file. * rustc-lib/stdarch/ci/run-docker.sh: New file. * rustc-lib/stdarch/ci/run.sh: New file. * rustc-lib/stdarch/ci/runtest-android.rs: New file. * rustc-lib/stdarch/ci/style.sh: New file. * rustc-lib/stdarch/crates/assert-instr-macro/Cargo.toml: New file. * rustc-lib/stdarch/crates/assert-instr-macro/build.rs: New file. * rustc-lib/stdarch/crates/assert-instr-macro/src/lib.rs: New file. * rustc-lib/stdarch/crates/core_arch/Cargo.toml: New file. * rustc-lib/stdarch/crates/core_arch/LICENSE-APACHE: New file. * rustc-lib/stdarch/crates/core_arch/LICENSE-MIT: New file. * rustc-lib/stdarch/crates/core_arch/README.md: New file. * rustc-lib/stdarch/crates/core_arch/avx512f.md: New file. * rustc-lib/stdarch/crates/core_arch/build.rs: New file. * rustc-lib/stdarch/crates/core_arch/rustfmt.toml: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/crc.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/crypto.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/neon/generated.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/neon/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/prefetch.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/test_support.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/tme.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/aarch64/v8.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/barrier/common.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/barrier/cp15.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/barrier/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/barrier/not_mclass.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/barrier/v8.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/dsp.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/ex.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/hints.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/registers/aarch32.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/registers/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/registers/v6m.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/registers/v7m.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/sat.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/acle/simd32.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/armclang.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/crc.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/neon/generated.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/neon/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/neon/table_lookup_tests.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/test_support.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/v6.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/arm/v7.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/core_arch_docs.md: New file. * rustc-lib/stdarch/crates/core_arch/src/lib.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/macros.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/mips/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/mips/msa.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/mips/msa/macros.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/nvptx/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/powerpc/altivec.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/powerpc/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/powerpc/vsx.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/powerpc64/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/simd.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/simd_llvm.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/v64.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/wasm32/atomic.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/wasm32/memory.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/wasm32/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/wasm32/simd128.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/abm.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/adx.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/aes.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/avx.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/avx2.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/avx512f.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/avx512ifma.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/bmi1.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/bmi2.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/bswap.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/bt.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/cpuid.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/eflags.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/f16c.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/fma.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/fxsr.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/macros.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/pclmulqdq.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/rdrand.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/rdtsc.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/rtm.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/sha.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/sse.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/sse2.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/sse3.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/sse41.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/sse42.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/sse4a.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/ssse3.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/tbm.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/test.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86/xsave.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/abm.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/adx.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/avx.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/avx2.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/avx512f.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/bmi.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/bmi2.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/bswap.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/bt.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/fxsr.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/mod.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/rdrand.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/sse.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/sse2.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/sse41.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/sse42.rs: New file. * rustc-lib/stdarch/crates/core_arch/src/x86_64/xsave.rs: New file. * rustc-lib/stdarch/crates/core_arch/tests/cpu-detection.rs: New file. * rustc-lib/stdarch/crates/simd-test-macro/Cargo.toml: New file. * rustc-lib/stdarch/crates/simd-test-macro/src/lib.rs: New file. * rustc-lib/stdarch/crates/std_detect/Cargo.toml: New file. * rustc-lib/stdarch/crates/std_detect/LICENSE-APACHE: New file. * rustc-lib/stdarch/crates/std_detect/LICENSE-MIT: New file. * rustc-lib/stdarch/crates/std_detect/README.md: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/arch/aarch64.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/arch/arm.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/arch/mips.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/arch/mips64.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/arch/powerpc.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/arch/x86.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/bit.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/cache.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/error_macros.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/macros.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/mod.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/aarch64.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/arm.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/mips.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/mod.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/other.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/os/x86.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv: New file. * rustc-lib/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv: New file. * rustc-lib/stdarch/crates/std_detect/src/lib.rs: New file. * rustc-lib/stdarch/crates/std_detect/src/mod.rs: New file. * rustc-lib/stdarch/crates/std_detect/tests/cpu-detection.rs: New file. * rustc-lib/stdarch/crates/std_detect/tests/macro_trailing_commas.rs: New file. * rustc-lib/stdarch/crates/std_detect/tests/x86-specific.rs: New file. * rustc-lib/stdarch/crates/stdarch-gen/Cargo.toml: New file. * rustc-lib/stdarch/crates/stdarch-gen/README.md: New file. * rustc-lib/stdarch/crates/stdarch-gen/neon.spec: New file. * rustc-lib/stdarch/crates/stdarch-gen/src/main.rs: New file. * rustc-lib/stdarch/crates/stdarch-test/Cargo.toml: New file. * rustc-lib/stdarch/crates/stdarch-test/src/disassembly.rs: New file. * rustc-lib/stdarch/crates/stdarch-test/src/lib.rs: New file. * rustc-lib/stdarch/crates/stdarch-test/src/wasm.rs: New file. * rustc-lib/stdarch/crates/stdarch-verify/.gitattributes: New file. * rustc-lib/stdarch/crates/stdarch-verify/Cargo.toml: New file. * rustc-lib/stdarch/crates/stdarch-verify/arm-intrinsics.html: New file. * rustc-lib/stdarch/crates/stdarch-verify/build.rs: New file. * rustc-lib/stdarch/crates/stdarch-verify/mips-msa.h: New file. * rustc-lib/stdarch/crates/stdarch-verify/src/lib.rs: New file. * rustc-lib/stdarch/crates/stdarch-verify/tests/arm.rs: New file. * rustc-lib/stdarch/crates/stdarch-verify/tests/mips.rs: New file. * rustc-lib/stdarch/crates/stdarch-verify/tests/x86-intel.rs: New file. * rustc-lib/stdarch/crates/stdarch-verify/x86-intel.xml: New file. * rustc-lib/stdarch/examples/Cargo.toml: New file. * rustc-lib/stdarch/examples/hex.rs: New file. * rustc-lib/stdarch/examples/wasm.rs: New file. * rustc-lib/stdarch/triagebot.toml: New file. * rustc-lib/stdarch/vendor.yml: New file. Signed-off-by: Owen Avery <powerboat9.gamer@gmail.com>
This commit is contained in:
parent
bee204863e
commit
b65abf4b39
|
|
@ -0,0 +1,13 @@
|
|||
task:
|
||||
name: x86_64-unknown-freebsd
|
||||
freebsd_instance:
|
||||
image: freebsd-12-1-release-amd64
|
||||
setup_script:
|
||||
- pkg install -y curl
|
||||
- curl https://sh.rustup.rs -sSf --output rustup.sh
|
||||
- sh rustup.sh --default-toolchain nightly -y
|
||||
- . $HOME/.cargo/env
|
||||
- rustup default nightly
|
||||
test_script:
|
||||
- . $HOME/.cargo/env
|
||||
- cargo build --all
|
||||
|
|
@ -0,0 +1,209 @@
|
|||
name: CI
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- auto
|
||||
- try
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
style:
|
||||
name: Check Style
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Install Rust
|
||||
run: rustup update nightly && rustup default nightly
|
||||
- run: ci/style.sh
|
||||
|
||||
docs:
|
||||
name: Build Documentation
|
||||
needs: [style]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Install Rust
|
||||
run: rustup update nightly && rustup default nightly
|
||||
- run: ci/dox.sh
|
||||
env:
|
||||
CI: 1
|
||||
- name: Publish documentation
|
||||
run: |
|
||||
cd target/doc
|
||||
git init
|
||||
git add .
|
||||
git -c user.name='ci' -c user.email='ci' commit -m init
|
||||
git push -f -q https://git:${{ secrets.github_token }}@github.com/${{ github.repository }} HEAD:gh-pages
|
||||
if: github.event_name == 'push' && github.event.ref == 'refs/heads/master'
|
||||
|
||||
verify:
|
||||
name: Automatic intrinsic verification
|
||||
needs: [style]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Install Rust
|
||||
run: rustup update nightly && rustup default nightly
|
||||
- run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
|
||||
|
||||
env_override:
|
||||
name: Env Override
|
||||
needs: [style]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Install Rust
|
||||
run: rustup update nightly && rustup default nightly
|
||||
- run: RUST_STD_DETECT_UNSTABLE=avx cargo test --features=std_detect_env_override --manifest-path crates/std_detect/Cargo.toml env_override_no_avx
|
||||
|
||||
test:
|
||||
needs: [style]
|
||||
name: Test
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
target:
|
||||
# Dockers that are run through docker on linux
|
||||
- i686-unknown-linux-gnu
|
||||
- x86_64-unknown-linux-gnu
|
||||
- x86_64-unknown-linux-gnu-emulated
|
||||
- arm-unknown-linux-gnueabihf
|
||||
- armv7-unknown-linux-gnueabihf
|
||||
- aarch64-unknown-linux-gnu
|
||||
- powerpc64le-unknown-linux-gnu
|
||||
- mips-unknown-linux-gnu
|
||||
- mips64-unknown-linux-gnuabi64
|
||||
- mips64el-unknown-linux-gnuabi64
|
||||
- s390x-unknown-linux-gnu
|
||||
- wasm32-wasi
|
||||
- i586-unknown-linux-gnu
|
||||
- x86_64-linux-android
|
||||
- arm-linux-androideabi
|
||||
- mipsel-unknown-linux-musl
|
||||
- aarch64-linux-android
|
||||
- nvptx64-nvidia-cuda
|
||||
- thumbv6m-none-eabi
|
||||
- thumbv7m-none-eabi
|
||||
- thumbv7em-none-eabi
|
||||
- thumbv7em-none-eabihf
|
||||
|
||||
# macOS targets
|
||||
- x86_64-apple-darwin
|
||||
# FIXME: gh-actions build environment doesn't have linker support
|
||||
# - i686-apple-darwin
|
||||
|
||||
# Windows targets
|
||||
- x86_64-pc-windows-msvc
|
||||
# FIXME: Disassembly not implemented for the # following targets:
|
||||
# - x86_64-pc-windows-gnu:
|
||||
# - i686-pc-windows-gnu:
|
||||
# - i686-pc-windows-msvc:
|
||||
|
||||
include:
|
||||
- target: i686-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- target: x86_64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- target: x86_64-unknown-linux-gnu-emulated
|
||||
os: ubuntu-latest
|
||||
test_everything: true
|
||||
rustflags: --cfg stdarch_intel_sde
|
||||
- target: arm-unknown-linux-gnueabihf
|
||||
os: ubuntu-latest
|
||||
- target: armv7-unknown-linux-gnueabihf
|
||||
os: ubuntu-latest
|
||||
rustflags: -C target-feature=+neon
|
||||
- target: mips-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
norun: true
|
||||
- target: mips64-unknown-linux-gnuabi64
|
||||
os: ubuntu-latest
|
||||
norun: true
|
||||
- target: mips64el-unknown-linux-gnuabi64
|
||||
os: ubuntu-latest
|
||||
norun: true
|
||||
- target: powerpc64le-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
disable_assert_instr: true
|
||||
- target: s390x-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- target: wasm32-wasi
|
||||
os: ubuntu-latest
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- target: x86_64-apple-darwin
|
||||
os: macos-latest
|
||||
- target: x86_64-pc-windows-msvc
|
||||
os: windows-latest
|
||||
- target: i586-unknown-linux-gnu
|
||||
os: ubuntu-latest
|
||||
- target: x86_64-linux-android
|
||||
os: ubuntu-latest
|
||||
disable_assert_instr: 1
|
||||
- target: arm-linux-androideabi
|
||||
os: ubuntu-latest
|
||||
disable_assert_instr: 1
|
||||
- target: mipsel-unknown-linux-musl
|
||||
os: ubuntu-latest
|
||||
norun: 1
|
||||
- target: aarch64-linux-android
|
||||
os: ubuntu-latest
|
||||
disable_assert_instr: 1
|
||||
- target: nvptx64-nvidia-cuda
|
||||
os: ubuntu-latest
|
||||
- target: thumbv6m-none-eabi
|
||||
os: ubuntu-latest
|
||||
- target: thumbv7m-none-eabi
|
||||
os: ubuntu-latest
|
||||
- target: thumbv7em-none-eabi
|
||||
os: ubuntu-latest
|
||||
- target: thumbv7em-none-eabihf
|
||||
os: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Install Rust (rustup)
|
||||
run: |
|
||||
rustup update nightly --no-self-update
|
||||
rustup default nightly
|
||||
if: matrix.os != 'macos-latest'
|
||||
- name: Install Rust (macos)
|
||||
run: |
|
||||
curl https://sh.rustup.rs | sh -s -- -y --default-toolchain nightly
|
||||
echo "##[add-path]$HOME/.cargo/bin"
|
||||
rustup update nightly --no-self-update
|
||||
rustup default nightly
|
||||
if: matrix.os == 'macos-latest'
|
||||
- run: |
|
||||
rustup default nightly
|
||||
rustup target add ${{ matrix.target }}
|
||||
if: "!endsWith(matrix.target, 'emulated')"
|
||||
- run: cargo generate-lockfile
|
||||
|
||||
# Configure some env vars based on matrix configuration
|
||||
- run: echo "##[set-env name=NORUN]1"
|
||||
if: matrix.norun != '' || startsWith(matrix.target, 'thumb') || matrix.target == 'nvptx64-nvidia-cuda'
|
||||
- run: echo "##[set-env name=STDARCH_TEST_EVERYTHING]1"
|
||||
if: matrix.test_everything != ''
|
||||
- run: echo "##[set-env name=RUSTFLAGS]${{ matrix.rustflags }}"
|
||||
if: matrix.rustflags != ''
|
||||
- run: echo "##[set-env name=STDARCH_DISABLE_ASSERT_INSTR]1"
|
||||
if: matrix.disable_assert_instr != ''
|
||||
- run: echo "##[set-env name=NOSTD]1"
|
||||
if: startsWith(matrix.target, 'thumb') || matrix.target == 'nvptx64-nvidia-cuda'
|
||||
|
||||
# Windows & OSX go straight to `run.sh` ...
|
||||
- run: ./ci/run.sh
|
||||
shell: bash
|
||||
if: matrix.os != 'ubuntu-latest' || startsWith(matrix.target, 'thumb')
|
||||
env:
|
||||
TARGET: ${{ matrix.target }}
|
||||
|
||||
# ... while Linux goes to `run-docker.sh`
|
||||
- run: ./ci/run-docker.sh ${{ matrix.target }}
|
||||
shell: bash
|
||||
if: "matrix.os == 'ubuntu-latest' && !startsWith(matrix.target, 'thumb')"
|
||||
env:
|
||||
TARGET: ${{ matrix.target }}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
Cargo.lock
|
||||
.*.swp
|
||||
target
|
||||
tags
|
||||
crates/stdarch-gen/aarch64.rs
|
||||
crates/stdarch-gen/arm.rs
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
# Contributing to stdarch
|
||||
|
||||
The `stdarch` crate is more than willing to accept contributions! First you'll
|
||||
probably want to check out the repository and make sure that tests pass for you:
|
||||
|
||||
```
|
||||
$ git clone https://github.com/rust-lang/stdarch
|
||||
$ cd stdarch
|
||||
$ cargo +nightly test
|
||||
```
|
||||
|
||||
To run codegen tests, run in release mode:
|
||||
|
||||
```
|
||||
$ cargo +nightly test --release -p coresimd
|
||||
```
|
||||
|
||||
Remember that this repository requires the nightly channel of Rust! If any of
|
||||
the above steps don't work, [please let us know][new]!
|
||||
|
||||
Next up you can [find an issue][issues] to help out on, we've selected a few
|
||||
with the [`help wanted`][help] and [`impl-period`][impl] tags which could
|
||||
particularly use some help. You may be most interested in [#40][vendor],
|
||||
implementing all vendor intrinsics on x86. That issue's got some good pointers
|
||||
about where to get started!
|
||||
|
||||
If you've got general questions feel free to [join us on gitter][gitter] and ask
|
||||
around! Feel free to ping either @BurntSushi or @alexcrichton with questions.
|
||||
|
||||
[gitter]: https://gitter.im/rust-impl-period/WG-libs-simd
|
||||
|
||||
# How to write examples for stdarch intrinsics
|
||||
|
||||
There are a few features that must be enabled for the given intrinsic to work
|
||||
properly and the example must only be run by `cargo test --doc` when the feature
|
||||
is supported by the CPU. As a result, the default `fn main` that is generated by
|
||||
`rustdoc` will not work (in most cases). Consider using the following as a guide
|
||||
to ensure your example works as expected.
|
||||
|
||||
```rust
|
||||
/// # // We need cfg_target_feature to ensure the example is only
|
||||
/// # // run by `cargo test --doc` when the CPU supports the feature
|
||||
/// # #![feature(cfg_target_feature)]
|
||||
/// # // We need target_feature for the intrinsic to work
|
||||
/// # #![feature(target_feature)]
|
||||
/// #
|
||||
/// # // rustdoc by default uses `extern crate stdarch`, but we need the
|
||||
/// # // `#[macro_use]`
|
||||
/// # #[macro_use] extern crate stdarch;
|
||||
/// #
|
||||
/// # // The real main function
|
||||
/// # fn main() {
|
||||
/// # // Only run this if `<target feature>` is supported
|
||||
/// # if cfg_feature_enabled!("<target feature>") {
|
||||
/// # // Create a `worker` function that will only be run if the target feature
|
||||
/// # // is supported and ensure that `target_feature` is enabled for your worker
|
||||
/// # // function
|
||||
/// # #[target_feature(enable = "<target feature>")]
|
||||
/// # unsafe fn worker() {
|
||||
///
|
||||
/// // Write your example here. Feature specific intrinsics will work here! Go wild!
|
||||
///
|
||||
/// # }
|
||||
/// # unsafe { worker(); }
|
||||
/// # }
|
||||
/// # }
|
||||
```
|
||||
|
||||
If some of the above syntax does not look familiar, the [Documentation as tests] section
|
||||
of the [Rust Book] describes the `rustdoc` syntax quite well. As always, feel free
|
||||
to [join us on gitter][gitter] and ask us if you hit any snags, and thank you for helping
|
||||
to improve the documentation of `stdarch`!
|
||||
|
||||
[new]: https://github.com/rust-lang/stdarch/issues/new
|
||||
[issues]: https://github.com/rust-lang/stdarch/issues
|
||||
[help]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22
|
||||
[impl]: https://github.com/rust-lang/stdarch/issues?q=is%3Aissue+is%3Aopen+label%3Aimpl-period
|
||||
[vendor]: https://github.com/rust-lang/stdarch/issues/40
|
||||
[Documentation as tests]: https://doc.rust-lang.org/book/first-edition/documentation.html#documentation-as-tests
|
||||
[Rust Book]: https://doc.rust-lang.org/book/first-edition
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
[workspace]
|
||||
members = [
|
||||
"crates/stdarch-verify",
|
||||
"crates/core_arch",
|
||||
"crates/std_detect",
|
||||
"crates/stdarch-gen",
|
||||
"examples/"
|
||||
]
|
||||
exclude = [
|
||||
"crates/wasm-assert-instr-tests"
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
opt-level = 3
|
||||
incremental = true
|
||||
|
||||
[profile.bench]
|
||||
debug = 1
|
||||
opt-level = 3
|
||||
incremental = true
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2017 The Rust Project Developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
stdarch - Rust's standard library SIMD components
|
||||
=======
|
||||
|
||||
[](https://github.com/rust-lang/stdarch/actions)
|
||||
|
||||
|
||||
# Crates
|
||||
|
||||
This repository contains two main crates:
|
||||
|
||||
* [![core_arch_crate_badge]][core_arch_crate_link]
|
||||
[![core_arch_docs_badge]][core_arch_docs_link]
|
||||
[`core_arch`](crates/core_arch/README.md) implements `core::arch` - Rust's
|
||||
core library architecture-specific intrinsics, and
|
||||
|
||||
* [![std_detect_crate_badge]][std_detect_crate_link]
|
||||
[![std_detect_docs_badge]][std_detect_docs_link]
|
||||
[`std_detect`](crates/std_detect/README.md) implements `std::detect` - Rust's
|
||||
standard library run-time CPU feature detection.
|
||||
|
||||
The `std::simd` component now lives in the
|
||||
[`packed_simd`](https://github.com/rust-lang-nursery/packed_simd) crate.
|
||||
|
||||
# How to do a release
|
||||
|
||||
To do a release of the `core_arch` and `std_detect` crates,
|
||||
|
||||
* bump up the version appropriately,
|
||||
* comment out the `dev-dependencies` in their `Cargo.toml` files (due to
|
||||
https://github.com/rust-lang/cargo/issues/4242),
|
||||
* publish the crates.
|
||||
|
||||
[core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg
|
||||
[core_arch_crate_link]: https://crates.io/crates/core_arch
|
||||
[core_arch_docs_badge]: https://docs.rs/core_arch/badge.svg
|
||||
[core_arch_docs_link]: https://docs.rs/core_arch/
|
||||
[std_detect_crate_badge]: https://img.shields.io/crates/v/std_detect.svg
|
||||
[std_detect_crate_link]: https://crates.io/crates/std_detect
|
||||
[std_detect_docs_badge]: https://docs.rs/std_detect/badge.svg
|
||||
[std_detect_docs_link]: https://docs.rs/std_detect/
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env sh
|
||||
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
set -ex
|
||||
|
||||
curl --retry 5 -O \
|
||||
https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip
|
||||
unzip -q android-ndk-r15b-linux-x86_64.zip
|
||||
|
||||
case "${1}" in
|
||||
aarch64)
|
||||
arch=arm64
|
||||
;;
|
||||
|
||||
i686)
|
||||
arch=x86
|
||||
;;
|
||||
|
||||
*)
|
||||
arch="${1}"
|
||||
;;
|
||||
esac;
|
||||
|
||||
android-ndk-r15b/build/tools/make_standalone_toolchain.py \
|
||||
--unified-headers \
|
||||
--install-dir "/android/ndk-${1}" \
|
||||
--arch "${arch}" \
|
||||
--api 24
|
||||
|
||||
rm -rf ./android-ndk-r15b-linux-x86_64.zip ./android-ndk-r15b
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/env sh
|
||||
# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
set -ex
|
||||
|
||||
# Prep the SDK and emulator
|
||||
#
|
||||
# Note that the update process requires that we accept a bunch of licenses, and
|
||||
# we can't just pipe `yes` into it for some reason, so we take the same strategy
|
||||
# located in https://github.com/appunite/docker by just wrapping it in a script
|
||||
# which apparently magically accepts the licenses.
|
||||
|
||||
mkdir sdk
|
||||
curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O
|
||||
unzip -d sdk sdk-tools-linux-3859397.zip
|
||||
|
||||
case "$1" in
|
||||
arm | armv7)
|
||||
abi=armeabi-v7a
|
||||
;;
|
||||
|
||||
aarch64)
|
||||
abi=arm64-v8a
|
||||
;;
|
||||
|
||||
i686)
|
||||
abi=x86
|
||||
;;
|
||||
|
||||
x86_64)
|
||||
abi=x86_64
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "invalid arch: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac;
|
||||
|
||||
# --no_https avoids
|
||||
# javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found
|
||||
yes | ./sdk/tools/bin/sdkmanager --licenses --no_https
|
||||
yes | ./sdk/tools/bin/sdkmanager --no_https \
|
||||
"emulator" \
|
||||
"platform-tools" \
|
||||
"platforms;android-24" \
|
||||
"system-images;android-24;default;$abi"
|
||||
|
||||
echo "no" |
|
||||
./sdk/tools/bin/avdmanager create avd \
|
||||
--name "${1}" \
|
||||
--package "system-images;android-24;default;$abi"
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright 2017 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
set -ex
|
||||
|
||||
URL=https://dl.google.com/android/repository/sys-img/android
|
||||
|
||||
main() {
|
||||
local arch="${1}"
|
||||
local name="${2}"
|
||||
local dest=/system
|
||||
local td
|
||||
td="$(mktemp -d)"
|
||||
|
||||
apt-get install --no-install-recommends e2tools
|
||||
|
||||
pushd "$td"
|
||||
curl --retry 5 -O "${URL}/${name}"
|
||||
unzip -q "${name}"
|
||||
|
||||
local system
|
||||
system=$(find . -name system.img)
|
||||
mkdir -p $dest/{bin,lib,lib64}
|
||||
|
||||
# Extract android linker and libraries to /system
|
||||
# This allows android executables to be run directly (or with qemu)
|
||||
if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then
|
||||
e2cp -p "${system}:/bin/linker64" "${dest}/bin/"
|
||||
e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/"
|
||||
e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/"
|
||||
e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/"
|
||||
else
|
||||
e2cp -p "${system}:/bin/linker" "${dest}/bin/"
|
||||
e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/"
|
||||
e2cp -p "${system}:/lib/libc.so" "${dest}/lib/"
|
||||
e2cp -p "${system}:/lib/libm.so" "${dest}/lib/"
|
||||
fi
|
||||
|
||||
# clean up
|
||||
apt-get purge --auto-remove -y e2tools
|
||||
|
||||
popd
|
||||
|
||||
rm -rf "${td}"
|
||||
}
|
||||
|
||||
main "${@}"
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
FROM ubuntu:16.04
|
||||
|
||||
RUN dpkg --add-architecture i386 && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
file \
|
||||
make \
|
||||
curl \
|
||||
ca-certificates \
|
||||
python \
|
||||
unzip \
|
||||
expect \
|
||||
openjdk-9-jre \
|
||||
libstdc++6:i386 \
|
||||
libpulse0 \
|
||||
gcc \
|
||||
libc6-dev
|
||||
|
||||
WORKDIR /android/
|
||||
COPY android* /android/
|
||||
|
||||
ENV ANDROID_ARCH=aarch64
|
||||
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
|
||||
|
||||
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
||||
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
|
||||
RUN mv /root/.android /tmp
|
||||
RUN chmod 777 -R /tmp/.android
|
||||
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
|
||||
|
||||
ENV PATH=$PATH:/rust/bin \
|
||||
CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \
|
||||
CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \
|
||||
OBJDUMP=aarch64-linux-android-objdump \
|
||||
HOME=/tmp
|
||||
|
||||
ADD runtest-android.rs /tmp/runtest.rs
|
||||
ENTRYPOINT [ \
|
||||
"bash", \
|
||||
"-c", \
|
||||
# set SHELL so android can detect a 64bits system, see
|
||||
# http://stackoverflow.com/a/41789144
|
||||
"SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \
|
||||
rustc /tmp/runtest.rs -o /tmp/runtest && \
|
||||
exec \"$@\"", \
|
||||
"--" \
|
||||
]
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
FROM ubuntu:20.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-aarch64-linux-gnu \
|
||||
libc6-dev-arm64-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
|
||||
OBJDUMP=aarch64-linux-gnu-objdump
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
FROM ubuntu:16.04
|
||||
|
||||
RUN dpkg --add-architecture i386 && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
file \
|
||||
make \
|
||||
curl \
|
||||
ca-certificates \
|
||||
python \
|
||||
unzip \
|
||||
expect \
|
||||
openjdk-9-jre \
|
||||
libstdc++6:i386 \
|
||||
libpulse0 \
|
||||
gcc \
|
||||
libc6-dev
|
||||
|
||||
WORKDIR /android/
|
||||
COPY android* /android/
|
||||
|
||||
ENV ANDROID_ARCH=arm
|
||||
ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
|
||||
|
||||
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
||||
RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
|
||||
RUN mv /root/.android /tmp
|
||||
RUN chmod 777 -R /tmp/.android
|
||||
RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
|
||||
|
||||
ENV PATH=$PATH:/rust/bin \
|
||||
CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
|
||||
CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
|
||||
OBJDUMP=arm-linux-androideabi-objdump \
|
||||
HOME=/tmp
|
||||
|
||||
ADD runtest-android.rs /tmp/runtest.rs
|
||||
ENTRYPOINT [ \
|
||||
"bash", \
|
||||
"-c", \
|
||||
# set SHELL so android can detect a 64bits system, see
|
||||
# http://stackoverflow.com/a/41789144
|
||||
"SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
|
||||
rustc /tmp/runtest.rs -o /tmp/runtest && \
|
||||
exec \"$@\"", \
|
||||
"--" \
|
||||
]
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips-linux-gnu libc6-dev-mips-cross \
|
||||
qemu-system-mips \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
|
||||
CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
|
||||
OBJDUMP=mips-linux-gnu-objdump
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
|
||||
qemu-system-mips64 qemu-user
|
||||
|
||||
ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
|
||||
CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
|
||||
OBJDUMP=mips64-linux-gnuabi64-objdump
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
|
||||
qemu-system-mips64el
|
||||
|
||||
ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
|
||||
CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
|
||||
OBJDUMP=mips64el-linux-gnuabi64-objdump
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
make \
|
||||
qemu-user \
|
||||
qemu-system-mips \
|
||||
bzip2 \
|
||||
curl \
|
||||
file
|
||||
|
||||
RUN mkdir /toolchain
|
||||
|
||||
# Note that this originally came from:
|
||||
# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
|
||||
RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
|
||||
tar xjf - -C /toolchain --strip-components=2
|
||||
|
||||
ENV PATH=$PATH:/rust/bin:/toolchain/bin \
|
||||
CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
|
||||
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
|
||||
CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
ca-certificates
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
|
||||
qemu-system-ppc make file
|
||||
|
||||
ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \
|
||||
CC=powerpc-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc-linux-gnu-objdump
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-powerpc64-linux-gnu libc6-dev-ppc64-cross \
|
||||
qemu-system-ppc file make
|
||||
|
||||
ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -cpu power9 -L /usr/powerpc64-linux-gnu" \
|
||||
CC=powerpc64-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc64-linux-gnu-objdump
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libc6-dev qemu-user ca-certificates \
|
||||
gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
|
||||
qemu-system-ppc file make
|
||||
|
||||
# Work around qemu triggering a sigill on vec_subs if the cpu target is not defined.
|
||||
ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
|
||||
CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -cpu power9 -L /usr/powerpc64le-linux-gnu" \
|
||||
CC=powerpc64le-linux-gnu-gcc \
|
||||
OBJDUMP=powerpc64le-linux-gnu-objdump
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl ca-certificates \
|
||||
gcc libc6-dev \
|
||||
gcc-s390x-linux-gnu libc6-dev-s390x-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
|
||||
ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
|
||||
CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \
|
||||
OBJDUMP=s390x-linux-gnu-objdump
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
FROM ubuntu:20.04
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
xz-utils \
|
||||
clang
|
||||
|
||||
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v0.19.0/wasmtime-v0.19.0-x86_64-linux.tar.xz | tar xJf -
|
||||
ENV PATH=$PATH:/wasmtime-v0.19.0-x86_64-linux
|
||||
|
||||
ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime \
|
||||
--enable-simd \
|
||||
--mapdir .::/checkout/target/wasm32-wasi/release/deps \
|
||||
--"
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
FROM ubuntu:16.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
gcc \
|
||||
libc-dev \
|
||||
python \
|
||||
unzip \
|
||||
file \
|
||||
make
|
||||
|
||||
WORKDIR /android/
|
||||
ENV ANDROID_ARCH=x86_64
|
||||
COPY android-install-ndk.sh /android/
|
||||
RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
|
||||
|
||||
# We do not run x86_64-linux-android tests on an android emulator.
|
||||
# See ci/android-sysimage.sh for informations about how tests are run.
|
||||
COPY android-sysimage.sh /android/
|
||||
RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip
|
||||
|
||||
ENV PATH=$PATH:/rust/bin:/android/ndk-$ANDROID_ARCH/bin \
|
||||
CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android-gcc \
|
||||
CC_x86_64_linux_android=x86_64-linux-android-gcc \
|
||||
CXX_x86_64_linux_android=x86_64-linux-android-g++ \
|
||||
OBJDUMP=x86_64-linux-android-objdump \
|
||||
HOME=/tmp
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates \
|
||||
wget \
|
||||
bzip2
|
||||
|
||||
RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.35.0-2019-03-11-lin.tar.bz2
|
||||
RUN tar -xjf sde-external-8.35.0-2019-03-11-lin.tar.bz2
|
||||
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.35.0-2019-03-11-lin/sde64 -rtm_mode full --"
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
FROM ubuntu:18.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
file \
|
||||
make \
|
||||
ca-certificates
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Builds documentation for all target triples that we have a registered URL for
|
||||
# in liblibc. This scrapes the list of triples to document from `src/lib.rs`
|
||||
# which has a bunch of `html_root_url` directives we pick up.
|
||||
|
||||
set -ex
|
||||
|
||||
rm -rf target/doc
|
||||
mkdir -p target/doc
|
||||
|
||||
dox() {
|
||||
local arch=$1
|
||||
local target=$2
|
||||
|
||||
echo "documenting ${arch}"
|
||||
|
||||
if [ "$CI" != "" ]; then
|
||||
rustup target add "${target}" || true
|
||||
fi
|
||||
|
||||
rm -rf "target/doc/${arch}"
|
||||
mkdir "target/doc/${arch}"
|
||||
|
||||
export RUSTFLAGS="--cfg core_arch_docs"
|
||||
export RUSTDOCFLAGS="--cfg core_arch_docs"
|
||||
|
||||
cargo build --verbose --target "${target}" --manifest-path crates/core_arch/Cargo.toml
|
||||
cargo build --verbose --target "${target}" --manifest-path crates/std_detect/Cargo.toml
|
||||
|
||||
rustdoc --verbose --target "${target}" \
|
||||
-o "target/doc/${arch}" crates/core_arch/src/lib.rs \
|
||||
--edition=2018 \
|
||||
--crate-name core_arch \
|
||||
--library-path "target/${target}/debug/deps" \
|
||||
--cfg core_arch_docs
|
||||
rustdoc --verbose --target "${target}" \
|
||||
-o "target/doc/${arch}" crates/std_detect/src/lib.rs \
|
||||
--edition=2018 \
|
||||
--crate-name std_detect \
|
||||
--library-path "target/${target}/debug/deps" \
|
||||
--extern cfg_if="$(ls target/"${target}"/debug/deps/libcfg_if-*.rlib)" \
|
||||
--extern libc="$(ls target/"${target}"/debug/deps/liblibc-*.rlib)" \
|
||||
--cfg core_arch_docs
|
||||
}
|
||||
|
||||
dox i686 i686-unknown-linux-gnu
|
||||
dox x86_64 x86_64-unknown-linux-gnu
|
||||
dox arm armv7-unknown-linux-gnueabihf
|
||||
dox aarch64 aarch64-unknown-linux-gnu
|
||||
dox powerpc powerpc-unknown-linux-gnu
|
||||
dox powerpc64le powerpc64le-unknown-linux-gnu
|
||||
dox mips mips-unknown-linux-gnu
|
||||
dox mips64 mips64-unknown-linux-gnuabi64
|
||||
dox wasm32 wasm32-unknown-unknown
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
{
|
||||
"abi-blacklist": [
|
||||
"stdcall",
|
||||
"fastcall",
|
||||
"vectorcall",
|
||||
"thiscall",
|
||||
"win64",
|
||||
"sysv64"
|
||||
],
|
||||
"arch": "arm",
|
||||
"atomic-cas": false,
|
||||
"cpu": "arm7tdmi",
|
||||
"data-layout": "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64",
|
||||
"emit-debug-gdb-scripts": false,
|
||||
"env": "agb",
|
||||
"executables": true,
|
||||
"features": "+soft-float,+strict-align",
|
||||
"linker": "arm-none-eabi-ld",
|
||||
"linker-flavor": "ld",
|
||||
"linker-is-gnu": true,
|
||||
"llvm-target": "thumbv4-none-agb",
|
||||
"os": "none",
|
||||
"panic-strategy": "abort",
|
||||
"pre-link-args": {
|
||||
"ld": [
|
||||
"-Tlinker.ld"
|
||||
]
|
||||
},
|
||||
"relocation-model": "static",
|
||||
"target-c-int-width": "32",
|
||||
"target-endian": "little",
|
||||
"target-pointer-width": "32",
|
||||
"vendor": "nintendo"
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
# Small script to run tests for a target (or all targets) inside all the
|
||||
# respective docker images.
|
||||
|
||||
set -ex
|
||||
|
||||
run() {
|
||||
target=$(echo "${1}" | sed 's/-emulated//')
|
||||
echo "Building docker container for TARGET=${1}"
|
||||
docker build -t stdarch -f "ci/docker/${1}/Dockerfile" ci/
|
||||
mkdir -p target
|
||||
echo "Running docker"
|
||||
# shellcheck disable=SC2016
|
||||
docker run \
|
||||
--rm \
|
||||
--user "$(id -u)":"$(id -g)" \
|
||||
--env CARGO_HOME=/cargo \
|
||||
--env CARGO_TARGET_DIR=/checkout/target \
|
||||
--env TARGET="${target}" \
|
||||
--env STDARCH_TEST_EVERYTHING \
|
||||
--env STDARCH_ASSERT_INSTR_IGNORE \
|
||||
--env STDARCH_DISABLE_ASSERT_INSTR \
|
||||
--env NOSTD \
|
||||
--env NORUN \
|
||||
--env RUSTFLAGS \
|
||||
--env STDARCH_TEST_NORUN \
|
||||
--volume "$(dirname "$(dirname "$(command -v cargo)")")":/cargo \
|
||||
--volume "$(rustc --print sysroot)":/rust:ro \
|
||||
--volume "$(pwd)":/checkout:ro \
|
||||
--volume "$(pwd)"/target:/checkout/target \
|
||||
--init \
|
||||
--workdir /checkout \
|
||||
--privileged \
|
||||
stdarch \
|
||||
sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/run.sh ${1}"
|
||||
}
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
for d in ci/docker/*; do
|
||||
run "${d}"
|
||||
done
|
||||
else
|
||||
run "${1}"
|
||||
fi
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
set -ex
|
||||
|
||||
: "${TARGET?The TARGET environment variable must be set.}"
|
||||
|
||||
# Tests are all super fast anyway, and they fault often enough on travis that
|
||||
# having only one thread increases debuggability to be worth it.
|
||||
#export RUST_BACKTRACE=full
|
||||
#export RUST_TEST_NOCAPTURE=1
|
||||
#export RUST_TEST_THREADS=1
|
||||
|
||||
RUSTFLAGS="$RUSTFLAGS -D warnings "
|
||||
|
||||
case ${TARGET} in
|
||||
# On 32-bit use a static relocation model which avoids some extra
|
||||
# instructions when dealing with static data, notably allowing some
|
||||
# instruction assertion checks to pass below the 20 instruction limit. If
|
||||
# this is the default, dynamic, then too many instructions are generated
|
||||
# when we assert the instruction for a function and it causes tests to fail.
|
||||
#
|
||||
# It's not clear why `-Z plt=yes` is required here. Probably a bug in LLVM.
|
||||
# If you can remove it and CI passes, please feel free to do so!
|
||||
i686-* | i586-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static -Z plt=yes"
|
||||
;;
|
||||
#Unoptimized build uses fast-isel which breaks with msa
|
||||
mips-* | mipsel-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "RUSTFLAGS=${RUSTFLAGS}"
|
||||
echo "FEATURES=${FEATURES}"
|
||||
echo "OBJDUMP=${OBJDUMP}"
|
||||
echo "STDARCH_DISABLE_ASSERT_INSTR=${STDARCH_DISABLE_ASSERT_INSTR}"
|
||||
echo "STDARCH_TEST_EVERYTHING=${STDARCH_TEST_EVERYTHING}"
|
||||
|
||||
cargo_test() {
|
||||
cmd="cargo"
|
||||
subcmd="test"
|
||||
if [ "$NORUN" = "1" ]; then
|
||||
export subcmd="build"
|
||||
fi
|
||||
cmd="$cmd ${subcmd} --target=$TARGET $1"
|
||||
cmd="$cmd -- $2"
|
||||
|
||||
# wasm targets can't catch panics so if a test failures make sure the test
|
||||
# harness isn't trying to capture output, otherwise we won't get any useful
|
||||
# output.
|
||||
case ${TARGET} in
|
||||
wasm32*)
|
||||
cmd="$cmd --nocapture"
|
||||
;;
|
||||
esac
|
||||
|
||||
$cmd
|
||||
}
|
||||
|
||||
CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
|
||||
STD_DETECT="--manifest-path=crates/std_detect/Cargo.toml"
|
||||
STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml"
|
||||
cargo_test "${CORE_ARCH} --release"
|
||||
|
||||
if [ "$NOSTD" != "1" ]; then
|
||||
cargo_test "${STD_DETECT}"
|
||||
cargo_test "${STD_DETECT} --release"
|
||||
|
||||
cargo_test "${STD_DETECT} --no-default-features"
|
||||
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_file_io"
|
||||
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval"
|
||||
cargo_test "${STD_DETECT} --no-default-features --features=std_detect_dlsym_getauxval,std_detect_file_io"
|
||||
|
||||
cargo_test "${STDARCH_EXAMPLES}"
|
||||
cargo_test "${STDARCH_EXAMPLES} --release"
|
||||
fi
|
||||
|
||||
# Test targets compiled with extra features.
|
||||
case ${TARGET} in
|
||||
x86*)
|
||||
export STDARCH_DISABLE_ASSERT_INSTR=1
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
|
||||
cargo_test "--release"
|
||||
;;
|
||||
wasm32*)
|
||||
prev="$RUSTFLAGS"
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128,+unimplemented-simd128"
|
||||
cargo_test "--release"
|
||||
export RUSTFLAGS="$prev"
|
||||
;;
|
||||
# FIXME: don't build anymore
|
||||
#mips-*gnu* | mipsel-*gnu*)
|
||||
# export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa,+fp64,+mips32r5"
|
||||
# cargo_test "--release"
|
||||
# ;;
|
||||
mips64*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa"
|
||||
cargo_test "--release"
|
||||
;;
|
||||
powerpc64*)
|
||||
# We don't build the ppc 32-bit targets with these - these targets
|
||||
# are mostly unsupported for now.
|
||||
OLD_RUSTFLAGS="${RUSTFLAGS}"
|
||||
export RUSTFLAGS="${OLD_RUSTFLAGS} -C target-feature=+altivec"
|
||||
cargo_test "--release"
|
||||
|
||||
export RUSTFLAGS="${OLD_RUSTFLAGS} -C target-feature=+vsx"
|
||||
cargo_test "--release"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
|
||||
# Test examples
|
||||
(
|
||||
cd examples
|
||||
cargo test --target "$TARGET"
|
||||
echo test | cargo run --release hex
|
||||
)
|
||||
fi
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
use std::env;
|
||||
use std::process::Command;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn main() {
|
||||
let args = env::args_os()
|
||||
.skip(1)
|
||||
.filter(|arg| arg != "--quiet")
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(args.len(), 1);
|
||||
let test = PathBuf::from(&args[0]);
|
||||
let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap());
|
||||
|
||||
let status = Command::new("adb")
|
||||
.arg("wait-for-device")
|
||||
.status()
|
||||
.expect("failed to run: adb wait-for-device");
|
||||
assert!(status.success());
|
||||
|
||||
let status = Command::new("adb")
|
||||
.arg("push")
|
||||
.arg(&test)
|
||||
.arg(&dst)
|
||||
.status()
|
||||
.expect("failed to run: adb pushr");
|
||||
assert!(status.success());
|
||||
|
||||
let output = Command::new("adb")
|
||||
.arg("shell")
|
||||
.arg(&dst)
|
||||
.output()
|
||||
.expect("failed to run: adb shell");
|
||||
assert!(status.success());
|
||||
|
||||
println!("status: {}\nstdout ---\n{}\nstderr ---\n{}",
|
||||
output.status,
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr));
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let mut lines = stdout.lines().filter(|l| l.starts_with("test result"));
|
||||
if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) {
|
||||
panic!("failed to find successful test run");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
set -ex
|
||||
|
||||
if rustup component add rustfmt-preview ; then
|
||||
command -v rustfmt
|
||||
rustfmt -V
|
||||
cargo fmt --all -- --check
|
||||
fi
|
||||
|
||||
# if rustup component add clippy-preview ; then
|
||||
# cargo clippy -V
|
||||
# cargo clippy --all -- -D clippy::pedantic
|
||||
# fi
|
||||
|
||||
if shellcheck --version ; then
|
||||
shellcheck -e SC2103 ci/*.sh
|
||||
else
|
||||
echo "shellcheck not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "assert-instr-macro"
|
||||
version = "0.1.0"
|
||||
authors = ["Alex Crichton <alex@alexcrichton.com>"]
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
||||
test = false
|
||||
|
||||
[dependencies]
|
||||
proc-macro2 = "1.0"
|
||||
quote = "1.0"
|
||||
syn = { version = "1.0", features = ["full"] }
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
use std::env;
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
let opt_level = env::var("OPT_LEVEL")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0);
|
||||
let profile = env::var("PROFILE").unwrap_or(String::new());
|
||||
if profile == "release" || opt_level >= 2 {
|
||||
println!("cargo:rustc-cfg=optimized");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
//! Implementation of the `#[assert_instr]` macro
|
||||
//!
|
||||
//! This macro is used when testing the `stdarch` crate and is used to generate
|
||||
//! test cases to assert that functions do indeed contain the instructions that
|
||||
//! we're expecting them to contain.
|
||||
//!
|
||||
//! The procedural macro here is relatively simple, it simply appends a
|
||||
//! `#[test]` function to the original token stream which asserts that the
|
||||
//! function itself contains the relevant instruction.
|
||||
|
||||
extern crate proc_macro;
|
||||
extern crate proc_macro2;
|
||||
#[macro_use]
|
||||
extern crate quote;
|
||||
extern crate syn;
|
||||
|
||||
use proc_macro2::TokenStream;
|
||||
use quote::ToTokens;
|
||||
|
||||
#[proc_macro_attribute]
|
||||
pub fn assert_instr(
|
||||
attr: proc_macro::TokenStream,
|
||||
item: proc_macro::TokenStream,
|
||||
) -> proc_macro::TokenStream {
|
||||
let invoc = match syn::parse::<Invoc>(attr) {
|
||||
Ok(s) => s,
|
||||
Err(e) => return e.to_compile_error().into(),
|
||||
};
|
||||
let item = match syn::parse::<syn::Item>(item) {
|
||||
Ok(s) => s,
|
||||
Err(e) => return e.to_compile_error().into(),
|
||||
};
|
||||
let func = match item {
|
||||
syn::Item::Fn(ref f) => f,
|
||||
_ => panic!("must be attached to a function"),
|
||||
};
|
||||
|
||||
let instr = &invoc.instr;
|
||||
let name = &func.sig.ident;
|
||||
|
||||
// Disable assert_instr for x86 targets compiled with avx enabled, which
|
||||
// causes LLVM to generate different intrinsics that the ones we are
|
||||
// testing for.
|
||||
let disable_assert_instr = std::env::var("STDARCH_DISABLE_ASSERT_INSTR").is_ok();
|
||||
|
||||
// If instruction tests are disabled avoid emitting this shim at all, just
|
||||
// return the original item without our attribute.
|
||||
if !cfg!(optimized) || disable_assert_instr {
|
||||
return (quote! { #item }).into();
|
||||
}
|
||||
|
||||
let instr_str = instr
|
||||
.replace('.', "_")
|
||||
.replace('/', "_")
|
||||
.replace(':', "_")
|
||||
.replace(char::is_whitespace, "");
|
||||
let assert_name = syn::Ident::new(&format!("assert_{}_{}", name, instr_str), name.span());
|
||||
// These name has to be unique enough for us to find it in the disassembly later on:
|
||||
let shim_name = syn::Ident::new(
|
||||
&format!("stdarch_test_shim_{}_{}", name, instr_str),
|
||||
name.span(),
|
||||
);
|
||||
let mut inputs = Vec::new();
|
||||
let mut input_vals = Vec::new();
|
||||
let ret = &func.sig.output;
|
||||
for arg in func.sig.inputs.iter() {
|
||||
let capture = match *arg {
|
||||
syn::FnArg::Typed(ref c) => c,
|
||||
ref v => panic!(
|
||||
"arguments must not have patterns: `{:?}`",
|
||||
v.clone().into_token_stream()
|
||||
),
|
||||
};
|
||||
let ident = match *capture.pat {
|
||||
syn::Pat::Ident(ref i) => &i.ident,
|
||||
_ => panic!("must have bare arguments"),
|
||||
};
|
||||
if let Some(&(_, ref tokens)) = invoc.args.iter().find(|a| *ident == a.0) {
|
||||
input_vals.push(quote! { #tokens });
|
||||
} else {
|
||||
inputs.push(capture);
|
||||
input_vals.push(quote! { #ident });
|
||||
}
|
||||
}
|
||||
|
||||
let attrs = func
|
||||
.attrs
|
||||
.iter()
|
||||
.filter(|attr| {
|
||||
attr.path
|
||||
.segments
|
||||
.first()
|
||||
.expect("attr.path.segments.first() failed")
|
||||
.ident
|
||||
.to_string()
|
||||
.starts_with("target")
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let attrs = Append(&attrs);
|
||||
|
||||
// Use an ABI on Windows that passes SIMD values in registers, like what
|
||||
// happens on Unix (I think?) by default.
|
||||
let abi = if cfg!(windows) {
|
||||
syn::LitStr::new("vectorcall", proc_macro2::Span::call_site())
|
||||
} else {
|
||||
syn::LitStr::new("C", proc_macro2::Span::call_site())
|
||||
};
|
||||
let shim_name_str = format!("{}{}", shim_name, assert_name);
|
||||
let to_test = quote! {
|
||||
#attrs
|
||||
#[no_mangle]
|
||||
#[inline(never)]
|
||||
pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
|
||||
// The compiler in optimized mode by default runs a pass called
|
||||
// "mergefunc" where it'll merge functions that look identical.
|
||||
// Turns out some intrinsics produce identical code and they're
|
||||
// folded together, meaning that one just jumps to another. This
|
||||
// messes up our inspection of the disassembly of this function and
|
||||
// we're not a huge fan of that.
|
||||
//
|
||||
// To thwart this pass and prevent functions from being merged we
|
||||
// generate some code that's hopefully very tight in terms of
|
||||
// codegen but is otherwise unique to prevent code from being
|
||||
// folded.
|
||||
//
|
||||
// This is avoided on Wasm32 right now since these functions aren't
|
||||
// inlined which breaks our tests since each intrinsic looks like it
|
||||
// calls functions. Turns out functions aren't similar enough to get
|
||||
// merged on wasm32 anyway. This bug is tracked at
|
||||
// rust-lang/rust#74320.
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
::stdarch_test::_DONT_DEDUP.store(
|
||||
std::mem::transmute(#shim_name_str.as_bytes().as_ptr()),
|
||||
std::sync::atomic::Ordering::Relaxed,
|
||||
);
|
||||
#name(#(#input_vals),*)
|
||||
}
|
||||
};
|
||||
|
||||
let tokens: TokenStream = quote! {
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn #assert_name() {
|
||||
#to_test
|
||||
|
||||
// Make sure that the shim is not removed by leaking it to unknown
|
||||
// code:
|
||||
unsafe { llvm_asm!("" : : "r"(#shim_name as usize) : "memory" : "volatile") };
|
||||
|
||||
::stdarch_test::assert(#shim_name as usize,
|
||||
stringify!(#shim_name),
|
||||
#instr);
|
||||
}
|
||||
};
|
||||
|
||||
let tokens: TokenStream = quote! {
|
||||
#item
|
||||
#tokens
|
||||
};
|
||||
tokens.into()
|
||||
}
|
||||
|
||||
struct Invoc {
|
||||
instr: String,
|
||||
args: Vec<(syn::Ident, syn::Expr)>,
|
||||
}
|
||||
|
||||
impl syn::parse::Parse for Invoc {
|
||||
fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
|
||||
use syn::{ext::IdentExt, Token};
|
||||
|
||||
let mut instr = String::new();
|
||||
while !input.is_empty() {
|
||||
if input.parse::<Token![,]>().is_ok() {
|
||||
break;
|
||||
}
|
||||
if let Ok(ident) = syn::Ident::parse_any(input) {
|
||||
instr.push_str(&ident.to_string());
|
||||
continue;
|
||||
}
|
||||
if input.parse::<Token![.]>().is_ok() {
|
||||
instr.push_str(".");
|
||||
continue;
|
||||
}
|
||||
if let Ok(s) = input.parse::<syn::LitStr>() {
|
||||
instr.push_str(&s.value());
|
||||
continue;
|
||||
}
|
||||
println!("{:?}", input.cursor().token_stream());
|
||||
return Err(input.error("expected an instruction"));
|
||||
}
|
||||
if instr.is_empty() {
|
||||
return Err(input.error("expected an instruction before comma"));
|
||||
}
|
||||
let mut args = Vec::new();
|
||||
while !input.is_empty() {
|
||||
let name = input.parse::<syn::Ident>()?;
|
||||
input.parse::<Token![=]>()?;
|
||||
let expr = input.parse::<syn::Expr>()?;
|
||||
args.push((name, expr));
|
||||
|
||||
if input.parse::<Token![,]>().is_err() {
|
||||
if !input.is_empty() {
|
||||
return Err(input.error("extra tokens at end"));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(Self { instr, args })
|
||||
}
|
||||
}
|
||||
|
||||
struct Append<T>(T);
|
||||
|
||||
impl<T> quote::ToTokens for Append<T>
|
||||
where
|
||||
T: Clone + IntoIterator,
|
||||
T::Item: quote::ToTokens,
|
||||
{
|
||||
fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
|
||||
for item in self.0.clone() {
|
||||
item.to_tokens(tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
[package]
|
||||
name = "core_arch"
|
||||
version = "0.1.5"
|
||||
authors = [
|
||||
"Alex Crichton <alex@alexcrichton.com>",
|
||||
"Andrew Gallant <jamslam@gmail.com>",
|
||||
"Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>",
|
||||
]
|
||||
description = "`core::arch` - Rust's core library architecture-specific intrinsics."
|
||||
documentation = "https://docs.rs/core_arch"
|
||||
homepage = "https://github.com/rust-lang/stdarch"
|
||||
repository = "https://github.com/rust-lang/stdarch"
|
||||
readme = "README.md"
|
||||
keywords = ["core", "simd", "arch", "intrinsics"]
|
||||
categories = ["hardware-support", "no-std"]
|
||||
license = "MIT/Apache-2.0"
|
||||
build = "build.rs"
|
||||
edition = "2018"
|
||||
|
||||
[badges]
|
||||
is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" }
|
||||
is-it-maintained-open-issues = { repository = "rust-lang/stdarch" }
|
||||
maintenance = { status = "experimental" }
|
||||
|
||||
[dev-dependencies]
|
||||
stdarch-test = { version = "0.*", path = "../stdarch-test" }
|
||||
std_detect = { version = "0.*", path = "../std_detect" }
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2017 The Rust Project Developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
`core::arch` - Rust's core library architecture-specific intrinsics
|
||||
=======
|
||||
|
||||
[![core_arch_crate_badge]][core_arch_crate_link] [![core_arch_docs_badge]][core_arch_docs_link]
|
||||
|
||||
|
||||
The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD).
|
||||
|
||||
# Usage
|
||||
|
||||
`core::arch` is available as part of `libcore` and it is re-exported by
|
||||
`libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
|
||||
Unstable features are often available in nightly Rust via the
|
||||
`feature(stdsimd)`.
|
||||
|
||||
Using `core::arch` via this crate requires nightly Rust, and it can (and does)
|
||||
break often. The only cases in which you should consider using it via this crate
|
||||
are:
|
||||
|
||||
* if you need to re-compile `core::arch` yourself, e.g., with particular
|
||||
target-features enabled that are not enabled for `libcore`/`libstd`. Note: if
|
||||
you need to re-compile it for a non-standard target, please prefer using
|
||||
`xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using
|
||||
this crate.
|
||||
|
||||
* using some features that might not be available even behind unstable Rust
|
||||
features. We try to keep these to a minimum. If you need to use some of these
|
||||
features, please open an issue so that we can expose them in nightly Rust and
|
||||
you can use them from there.
|
||||
|
||||
# Documentation
|
||||
|
||||
* [Documentation - i686][i686]
|
||||
* [Documentation - x86\_64][x86_64]
|
||||
* [Documentation - arm][arm]
|
||||
* [Documentation - aarch64][aarch64]
|
||||
* [Documentation - powerpc][powerpc]
|
||||
* [Documentation - powerpc64][powerpc64]
|
||||
* [How to get started][contrib]
|
||||
* [How to help implement intrinsics][help-implement]
|
||||
|
||||
[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
|
||||
[help-implement]: https://github.com/rust-lang/stdarch/issues/40
|
||||
[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
|
||||
[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/
|
||||
[arm]: https://rust-lang.github.io/stdarch/arm/core_arch/
|
||||
[aarch64]: https://rust-lang.github.io/stdarch/aarch64/core_arch/
|
||||
[powerpc]: https://rust-lang.github.io/stdarch/powerpc/core_arch/
|
||||
[powerpc64]: https://rust-lang.github.io/stdarch/powerpc64/core_arch/
|
||||
|
||||
# License
|
||||
|
||||
`core_arch` is primarily distributed under the terms of both the MIT license and
|
||||
the Apache License (Version 2.0), with portions covered by various BSD-like
|
||||
licenses.
|
||||
|
||||
See LICENSE-APACHE, and LICENSE-MIT for details.
|
||||
|
||||
# Contribution
|
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted
|
||||
for inclusion in `core_arch` by you, as defined in the Apache-2.0 license,
|
||||
shall be dual licensed as above, without any additional terms or conditions.
|
||||
|
||||
[core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg
|
||||
[core_arch_crate_link]: https://crates.io/crates/core_arch
|
||||
[core_arch_docs_badge]: https://docs.rs/core_arch/badge.svg
|
||||
[core_arch_docs_link]: https://docs.rs/core_arch/
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,17 @@
|
|||
use std::env;
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rustc-cfg=core_arch_docs");
|
||||
|
||||
// Used to tell our `#[assert_instr]` annotations that all simd intrinsics
|
||||
// are available to test their codegen, since some are gated behind an extra
|
||||
// `-Ctarget-feature=+unimplemented-simd128` that doesn't have any
|
||||
// equivalent in `#[target_feature]` right now.
|
||||
println!("cargo:rerun-if-env-changed=RUSTFLAGS");
|
||||
if env::var("RUSTFLAGS")
|
||||
.unwrap_or_default()
|
||||
.contains("unimplemented-simd128")
|
||||
{
|
||||
println!("cargo:rustc-cfg=all_simd");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
ignore = [
|
||||
"src/simd.rs",
|
||||
]
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
extern "C" {
|
||||
#[link_name = "llvm.aarch64.crc32x"]
|
||||
fn crc32x_(crc: u32, data: u64) -> u32;
|
||||
|
||||
#[link_name = "llvm.aarch64.crc32cx"]
|
||||
fn crc32cx_(crc: u32, data: u64) -> u32;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// CRC32 single round checksum for quad words (64 bits).
|
||||
#[inline]
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(test, assert_instr(crc32x))]
|
||||
pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
|
||||
crc32x_(crc, data)
|
||||
}
|
||||
|
||||
/// CRC32-C single round checksum for quad words (64 bits).
|
||||
#[inline]
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(test, assert_instr(crc32cx))]
|
||||
pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
|
||||
crc32cx_(crc, data)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::{aarch64::*, simd::*};
|
||||
use std::mem;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "crc")]
|
||||
unsafe fn test_crc32d() {
|
||||
assert_eq!(__crc32d(0, 0), 0);
|
||||
assert_eq!(__crc32d(0, 18446744073709551615), 1147535477);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crc")]
|
||||
unsafe fn test_crc32cd() {
|
||||
assert_eq!(__crc32cd(0, 0), 0);
|
||||
assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,331 @@
|
|||
use crate::core_arch::arm::{uint32x4_t, uint8x16_t};
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.aarch64.crypto.aese"]
|
||||
fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
|
||||
#[link_name = "llvm.aarch64.crypto.aesd"]
|
||||
fn vaesdq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
|
||||
#[link_name = "llvm.aarch64.crypto.aesmc"]
|
||||
fn vaesmcq_u8_(data: uint8x16_t) -> uint8x16_t;
|
||||
#[link_name = "llvm.aarch64.crypto.aesimc"]
|
||||
fn vaesimcq_u8_(data: uint8x16_t) -> uint8x16_t;
|
||||
|
||||
#[link_name = "llvm.aarch64.crypto.sha1h"]
|
||||
fn vsha1h_u32_(hash_e: u32) -> u32;
|
||||
#[link_name = "llvm.aarch64.crypto.sha1su0"]
|
||||
fn vsha1su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t;
|
||||
#[link_name = "llvm.aarch64.crypto.sha1su1"]
|
||||
fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
|
||||
#[link_name = "llvm.aarch64.crypto.sha1c"]
|
||||
fn vsha1cq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
|
||||
#[link_name = "llvm.aarch64.crypto.sha1p"]
|
||||
fn vsha1pq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
|
||||
#[link_name = "llvm.aarch64.crypto.sha1m"]
|
||||
fn vsha1mq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
|
||||
|
||||
#[link_name = "llvm.aarch64.crypto.sha256h"]
|
||||
fn vsha256hq_u32_(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t;
|
||||
#[link_name = "llvm.aarch64.crypto.sha256h2"]
|
||||
fn vsha256h2q_u32_(hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t) -> uint32x4_t;
|
||||
#[link_name = "llvm.aarch64.crypto.sha256su0"]
|
||||
fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
|
||||
#[link_name = "llvm.aarch64.crypto.sha256su1"]
|
||||
fn vsha256su1q_u32_(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// AES single round encryption.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(aese))]
|
||||
pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
|
||||
vaeseq_u8_(data, key)
|
||||
}
|
||||
|
||||
/// AES single round decryption.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(aesd))]
|
||||
pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
|
||||
vaesdq_u8_(data, key)
|
||||
}
|
||||
|
||||
/// AES mix columns.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(aesmc))]
|
||||
pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t {
|
||||
vaesmcq_u8_(data)
|
||||
}
|
||||
|
||||
/// AES inverse mix columns.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(aesimc))]
|
||||
pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t {
|
||||
vaesimcq_u8_(data)
|
||||
}
|
||||
|
||||
/// SHA1 fixed rotate.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha1h))]
|
||||
pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 {
|
||||
vsha1h_u32_(hash_e)
|
||||
}
|
||||
|
||||
/// SHA1 hash update accelerator, choose.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha1c))]
|
||||
pub unsafe fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
|
||||
vsha1cq_u32_(hash_abcd, hash_e, wk)
|
||||
}
|
||||
|
||||
/// SHA1 hash update accelerator, majority.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha1m))]
|
||||
pub unsafe fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
|
||||
vsha1mq_u32_(hash_abcd, hash_e, wk)
|
||||
}
|
||||
|
||||
/// SHA1 hash update accelerator, parity.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha1p))]
|
||||
pub unsafe fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
|
||||
vsha1pq_u32_(hash_abcd, hash_e, wk)
|
||||
}
|
||||
|
||||
/// SHA1 schedule update accelerator, first part.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha1su0))]
|
||||
pub unsafe fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t {
|
||||
vsha1su0q_u32_(w0_3, w4_7, w8_11)
|
||||
}
|
||||
|
||||
/// SHA1 schedule update accelerator, second part.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha1su1))]
|
||||
pub unsafe fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
|
||||
vsha1su1q_u32_(tw0_3, w12_15)
|
||||
}
|
||||
|
||||
/// SHA256 hash update accelerator.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha256h))]
|
||||
pub unsafe fn vsha256hq_u32(
|
||||
hash_abcd: uint32x4_t,
|
||||
hash_efgh: uint32x4_t,
|
||||
wk: uint32x4_t,
|
||||
) -> uint32x4_t {
|
||||
vsha256hq_u32_(hash_abcd, hash_efgh, wk)
|
||||
}
|
||||
|
||||
/// SHA256 hash update accelerator, upper part.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha256h2))]
|
||||
pub unsafe fn vsha256h2q_u32(
|
||||
hash_efgh: uint32x4_t,
|
||||
hash_abcd: uint32x4_t,
|
||||
wk: uint32x4_t,
|
||||
) -> uint32x4_t {
|
||||
vsha256h2q_u32_(hash_efgh, hash_abcd, wk)
|
||||
}
|
||||
|
||||
/// SHA256 schedule update accelerator, first part.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha256su0))]
|
||||
pub unsafe fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
|
||||
vsha256su0q_u32_(w0_3, w4_7)
|
||||
}
|
||||
|
||||
/// SHA256 schedule update accelerator, second part.
|
||||
#[inline]
|
||||
#[target_feature(enable = "crypto")]
|
||||
#[cfg_attr(test, assert_instr(sha256su1))]
|
||||
pub unsafe fn vsha256su1q_u32(
|
||||
tw0_3: uint32x4_t,
|
||||
w8_11: uint32x4_t,
|
||||
w12_15: uint32x4_t,
|
||||
) -> uint32x4_t {
|
||||
vsha256su1q_u32_(tw0_3, w8_11, w12_15)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::{aarch64::*, simd::*};
|
||||
use std::mem;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vaeseq_u8() {
|
||||
let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
|
||||
let key = mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
|
||||
let r: u8x16 = mem::transmute(vaeseq_u8(data, key));
|
||||
assert_eq!(
|
||||
r,
|
||||
u8x16::new(
|
||||
124, 123, 124, 118, 124, 123, 124, 197, 124, 123, 124, 118, 124, 123, 124, 197
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vaesdq_u8() {
|
||||
let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
|
||||
let key = mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
|
||||
let r: u8x16 = mem::transmute(vaesdq_u8(data, key));
|
||||
assert_eq!(
|
||||
r,
|
||||
u8x16::new(9, 213, 9, 251, 9, 213, 9, 56, 9, 213, 9, 251, 9, 213, 9, 56)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vaesmcq_u8() {
|
||||
let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
|
||||
let r: u8x16 = mem::transmute(vaesmcq_u8(data));
|
||||
assert_eq!(
|
||||
r,
|
||||
u8x16::new(3, 4, 9, 10, 15, 8, 21, 30, 3, 4, 9, 10, 15, 8, 21, 30)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vaesimcq_u8() {
|
||||
let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
|
||||
let r: u8x16 = mem::transmute(vaesimcq_u8(data));
|
||||
assert_eq!(
|
||||
r,
|
||||
u8x16::new(43, 60, 33, 50, 103, 80, 125, 70, 43, 60, 33, 50, 103, 80, 125, 70)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha1h_u32() {
|
||||
assert_eq!(vsha1h_u32(0x1234), 0x048d);
|
||||
assert_eq!(vsha1h_u32(0x5678), 0x159e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha1su0q_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha1su0q_u32(
|
||||
mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
|
||||
mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
|
||||
mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
|
||||
));
|
||||
assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha1su1q_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha1su1q_u32(
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
));
|
||||
assert_eq!(
|
||||
r,
|
||||
u32x4::new(0x00008898, 0x00019988, 0x00008898, 0x0000acd0)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha1cq_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha1cq_u32(
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
0x1234,
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
));
|
||||
assert_eq!(
|
||||
r,
|
||||
u32x4::new(0x8a32cbd8, 0x0c518a96, 0x0018a081, 0x0000c168)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha1pq_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha1pq_u32(
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
0x1234,
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
));
|
||||
assert_eq!(
|
||||
r,
|
||||
u32x4::new(0x469f0ba3, 0x0a326147, 0x80145d7f, 0x00009f47)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha1mq_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha1mq_u32(
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
0x1234,
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
));
|
||||
assert_eq!(
|
||||
r,
|
||||
u32x4::new(0xaa39693b, 0x0d51bf84, 0x001aa109, 0x0000d278)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha256hq_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha256hq_u32(
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
));
|
||||
assert_eq!(
|
||||
r,
|
||||
u32x4::new(0x05e9aaa8, 0xec5f4c02, 0x20a1ea61, 0x28738cef)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha256h2q_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha256h2q_u32(
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
));
|
||||
assert_eq!(
|
||||
r,
|
||||
u32x4::new(0x3745362e, 0x2fb51d00, 0xbd4c529b, 0x968b8516)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha256su0q_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha256su0q_u32(
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
));
|
||||
assert_eq!(
|
||||
r,
|
||||
u32x4::new(0xe59e1c97, 0x5eaf68da, 0xd7bcb51f, 0x6c8de152)
|
||||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crypto")]
|
||||
unsafe fn test_vsha256su1q_u32() {
|
||||
let r: u32x4 = mem::transmute(vsha256su1q_u32(
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
|
||||
));
|
||||
assert_eq!(
|
||||
r,
|
||||
u32x4::new(0x5e09e8d2, 0x74a6f16b, 0xc966606b, 0xa686ee9f)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
//! AArch64 intrinsics.
|
||||
//!
|
||||
//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
|
||||
//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
|
||||
//!
|
||||
//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
|
||||
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
|
||||
|
||||
mod v8;
|
||||
pub use self::v8::*;
|
||||
|
||||
mod neon;
|
||||
pub use self::neon::*;
|
||||
|
||||
mod crypto;
|
||||
pub use self::crypto::*;
|
||||
|
||||
mod tme;
|
||||
pub use self::tme::*;
|
||||
|
||||
mod crc;
|
||||
pub use self::crc::*;
|
||||
|
||||
mod prefetch;
|
||||
pub use self::prefetch::*;
|
||||
|
||||
pub use super::acle::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Generates the trap instruction `BRK 1`
|
||||
#[cfg_attr(test, assert_instr(brk))]
|
||||
#[inline]
|
||||
pub unsafe fn brk() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test_support;
|
||||
|
|
@ -0,0 +1,666 @@
|
|||
// This code is automatically generated. DO NOT MODIFY.
|
||||
//
|
||||
// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file:
|
||||
//
|
||||
// ```
|
||||
// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec
|
||||
// ```
|
||||
use super::*;
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Compare bitwise Equal (vector)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmeq))]
|
||||
pub unsafe fn vceq_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
simd_eq(a, b)
|
||||
}
|
||||
|
||||
/// Compare bitwise Equal (vector)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmeq))]
|
||||
pub unsafe fn vceqq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
|
||||
simd_eq(a, b)
|
||||
}
|
||||
|
||||
/// Compare bitwise Equal (vector)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmeq))]
|
||||
pub unsafe fn vceq_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
|
||||
simd_eq(a, b)
|
||||
}
|
||||
|
||||
/// Compare bitwise Equal (vector)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmeq))]
|
||||
pub unsafe fn vceqq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
|
||||
simd_eq(a, b)
|
||||
}
|
||||
|
||||
/// Compare bitwise Equal (vector)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmeq))]
|
||||
pub unsafe fn vceq_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
|
||||
simd_eq(a, b)
|
||||
}
|
||||
|
||||
/// Compare bitwise Equal (vector)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmeq))]
|
||||
pub unsafe fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
|
||||
simd_eq(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmeq))]
|
||||
pub unsafe fn vceq_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
simd_eq(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmeq))]
|
||||
pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
simd_eq(a, b)
|
||||
}
|
||||
|
||||
/// Compare signed greater than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmgt))]
|
||||
pub unsafe fn vcgt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
|
||||
simd_gt(a, b)
|
||||
}
|
||||
|
||||
/// Compare signed greater than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmgt))]
|
||||
pub unsafe fn vcgtq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
|
||||
simd_gt(a, b)
|
||||
}
|
||||
|
||||
/// Compare unsigned highe
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmhi))]
|
||||
pub unsafe fn vcgt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
simd_gt(a, b)
|
||||
}
|
||||
|
||||
/// Compare unsigned highe
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmhi))]
|
||||
pub unsafe fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
|
||||
simd_gt(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare greater than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmgt))]
|
||||
pub unsafe fn vcgt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
simd_gt(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare greater than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmgt))]
|
||||
pub unsafe fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
simd_gt(a, b)
|
||||
}
|
||||
|
||||
/// Compare signed less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmgt))]
|
||||
pub unsafe fn vclt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
|
||||
simd_lt(a, b)
|
||||
}
|
||||
|
||||
/// Compare signed less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmgt))]
|
||||
pub unsafe fn vcltq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
|
||||
simd_lt(a, b)
|
||||
}
|
||||
|
||||
/// Compare unsigned less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmhi))]
|
||||
pub unsafe fn vclt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
simd_lt(a, b)
|
||||
}
|
||||
|
||||
/// Compare unsigned less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmhi))]
|
||||
pub unsafe fn vcltq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
|
||||
simd_lt(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmgt))]
|
||||
pub unsafe fn vclt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
simd_lt(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare less than
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmgt))]
|
||||
pub unsafe fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
simd_lt(a, b)
|
||||
}
|
||||
|
||||
/// Compare signed less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmge))]
|
||||
pub unsafe fn vcle_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
|
||||
simd_le(a, b)
|
||||
}
|
||||
|
||||
/// Compare signed less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmge))]
|
||||
pub unsafe fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
|
||||
simd_le(a, b)
|
||||
}
|
||||
|
||||
/// Compare unsigned less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmhs))]
|
||||
pub unsafe fn vcle_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
simd_le(a, b)
|
||||
}
|
||||
|
||||
/// Compare unsigned less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmhs))]
|
||||
pub unsafe fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
|
||||
simd_le(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmge))]
|
||||
pub unsafe fn vcle_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
simd_le(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare less than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmge))]
|
||||
pub unsafe fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
simd_le(a, b)
|
||||
}
|
||||
|
||||
/// Compare signed greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmge))]
|
||||
pub unsafe fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
|
||||
simd_ge(a, b)
|
||||
}
|
||||
|
||||
/// Compare signed greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmge))]
|
||||
pub unsafe fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
|
||||
simd_ge(a, b)
|
||||
}
|
||||
|
||||
/// Compare unsigned greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmhs))]
|
||||
pub unsafe fn vcge_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
simd_ge(a, b)
|
||||
}
|
||||
|
||||
/// Compare unsigned greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(cmhs))]
|
||||
pub unsafe fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
|
||||
simd_ge(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmge))]
|
||||
pub unsafe fn vcge_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
|
||||
simd_ge(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point compare greater than or equal
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcmge))]
|
||||
pub unsafe fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
|
||||
simd_ge(a, b)
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul))]
|
||||
pub unsafe fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
|
||||
simd_mul(a, b)
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul))]
|
||||
pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
||||
simd_mul(a, b)
|
||||
}
|
||||
|
||||
/// Subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fsub))]
|
||||
pub unsafe fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
|
||||
simd_sub(a, b)
|
||||
}
|
||||
|
||||
/// Subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fsub))]
|
||||
pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
||||
simd_sub(a, b)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::core_arch::simd::*;
|
||||
use std::mem::transmute;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vceq_u64() {
|
||||
let a: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vceqq_u64() {
|
||||
let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0x01);
|
||||
let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0x01);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
|
||||
let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vceq_s64() {
|
||||
let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vceqq_s64() {
|
||||
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01);
|
||||
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
|
||||
let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vceq_p64() {
|
||||
let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vceqq_p64() {
|
||||
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01);
|
||||
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
|
||||
let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vceq_f64() {
|
||||
let a: f64 = 1.2;
|
||||
let b: f64 = 1.2;
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vceq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vceqq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.2, 3.4);
|
||||
let b: f64x2 = f64x2::new(1.2, 3.4);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vceqq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgt_s64() {
|
||||
let a: i64x1 = i64x1::new(1);
|
||||
let b: i64x1 = i64x1::new(0);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcgt_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgtq_s64() {
|
||||
let a: i64x2 = i64x2::new(1, 2);
|
||||
let b: i64x2 = i64x2::new(0, 1);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcgtq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgt_u64() {
|
||||
let a: u64x1 = u64x1::new(1);
|
||||
let b: u64x1 = u64x1::new(0);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcgt_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgtq_u64() {
|
||||
let a: u64x2 = u64x2::new(1, 2);
|
||||
let b: u64x2 = u64x2::new(0, 1);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcgtq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgt_f64() {
|
||||
let a: f64 = 1.2;
|
||||
let b: f64 = 0.1;
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcgt_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgtq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.2, 2.3);
|
||||
let b: f64x2 = f64x2::new(0.1, 1.2);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcgtq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclt_s64() {
|
||||
let a: i64x1 = i64x1::new(0);
|
||||
let b: i64x1 = i64x1::new(1);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vclt_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcltq_s64() {
|
||||
let a: i64x2 = i64x2::new(0, 1);
|
||||
let b: i64x2 = i64x2::new(1, 2);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcltq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclt_u64() {
|
||||
let a: u64x1 = u64x1::new(0);
|
||||
let b: u64x1 = u64x1::new(1);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vclt_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcltq_u64() {
|
||||
let a: u64x2 = u64x2::new(0, 1);
|
||||
let b: u64x2 = u64x2::new(1, 2);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcltq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vclt_f64() {
|
||||
let a: f64 = 0.1;
|
||||
let b: f64 = 1.2;
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vclt_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcltq_f64() {
|
||||
let a: f64x2 = f64x2::new(0.1, 1.2);
|
||||
let b: f64x2 = f64x2::new(1.2, 2.3);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcltq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcle_s64() {
|
||||
let a: i64x1 = i64x1::new(0);
|
||||
let b: i64x1 = i64x1::new(1);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcle_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcleq_s64() {
|
||||
let a: i64x2 = i64x2::new(0, 1);
|
||||
let b: i64x2 = i64x2::new(1, 2);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcleq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcle_u64() {
|
||||
let a: u64x1 = u64x1::new(0);
|
||||
let b: u64x1 = u64x1::new(1);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcle_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcleq_u64() {
|
||||
let a: u64x2 = u64x2::new(0, 1);
|
||||
let b: u64x2 = u64x2::new(1, 2);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcleq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcle_f64() {
|
||||
let a: f64 = 0.1;
|
||||
let b: f64 = 1.2;
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcle_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcleq_f64() {
|
||||
let a: f64x2 = f64x2::new(0.1, 1.2);
|
||||
let b: f64x2 = f64x2::new(1.2, 2.3);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcleq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcge_s64() {
|
||||
let a: i64x1 = i64x1::new(1);
|
||||
let b: i64x1 = i64x1::new(0);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcge_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgeq_s64() {
|
||||
let a: i64x2 = i64x2::new(1, 2);
|
||||
let b: i64x2 = i64x2::new(0, 1);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcgeq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcge_u64() {
|
||||
let a: u64x1 = u64x1::new(1);
|
||||
let b: u64x1 = u64x1::new(0);
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcge_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgeq_u64() {
|
||||
let a: u64x2 = u64x2::new(1, 2);
|
||||
let b: u64x2 = u64x2::new(0, 1);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcgeq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcge_f64() {
|
||||
let a: f64 = 1.2;
|
||||
let b: f64 = 0.1;
|
||||
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x1 = transmute(vcge_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vcgeq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.2, 2.3);
|
||||
let b: f64x2 = f64x2::new(0.1, 1.2);
|
||||
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r: u64x2 = transmute(vcgeq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_f64() {
|
||||
let a: f64 = 1.0;
|
||||
let b: f64 = 2.0;
|
||||
let e: f64 = 2.0;
|
||||
let r: f64 = transmute(vmul_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.0, 2.0);
|
||||
let b: f64x2 = f64x2::new(2.0, 3.0);
|
||||
let e: f64x2 = f64x2::new(2.0, 6.0);
|
||||
let r: f64x2 = transmute(vmulq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsub_f64() {
|
||||
let a: f64 = 1.0;
|
||||
let b: f64 = 1.0;
|
||||
let e: f64 = 0.0;
|
||||
let r: f64 = transmute(vsub_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.0, 4.0);
|
||||
let b: f64x2 = f64x2::new(1.0, 2.0);
|
||||
let e: f64x2 = f64x2::new(0.0, 2.0);
|
||||
let r: f64x2 = transmute(vsubq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,89 @@
|
|||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "llvm.prefetch"]
|
||||
fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
|
||||
}
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
pub const _PREFETCH_READ: i32 = 0;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
pub const _PREFETCH_WRITE: i32 = 1;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
pub const _PREFETCH_LOCALITY0: i32 = 0;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
pub const _PREFETCH_LOCALITY1: i32 = 1;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
pub const _PREFETCH_LOCALITY2: i32 = 2;
|
||||
|
||||
/// See [`prefetch`](fn._prefetch.html).
|
||||
pub const _PREFETCH_LOCALITY3: i32 = 3;
|
||||
|
||||
/// Fetch the cache line that contains address `p` using the given `rw` and `locality`.
|
||||
///
|
||||
/// The `rw` must be one of:
|
||||
///
|
||||
/// * [`_PREFETCH_READ`](constant._PREFETCH_READ.html): the prefetch is preparing
|
||||
/// for a read.
|
||||
///
|
||||
/// * [`_PREFETCH_WRITE`](constant._PREFETCH_WRITE.html): the prefetch is preparing
|
||||
/// for a write.
|
||||
///
|
||||
/// The `locality` must be one of:
|
||||
///
|
||||
/// * [`_PREFETCH_LOCALITY0`](constant._PREFETCH_LOCALITY0.html): Streaming or
|
||||
/// non-temporal prefetch, for data that is used only once.
|
||||
///
|
||||
/// * [`_PREFETCH_LOCALITY1`](constant._PREFETCH_LOCALITY1.html): Fetch into level 3 cache.
|
||||
///
|
||||
/// * [`_PREFETCH_LOCALITY2`](constant._PREFETCH_LOCALITY2.html): Fetch into level 2 cache.
|
||||
///
|
||||
/// * [`_PREFETCH_LOCALITY3`](constant._PREFETCH_LOCALITY3.html): Fetch into level 1 cache.
|
||||
///
|
||||
/// The prefetch memory instructions signal to the memory system that memory accesses
|
||||
/// from a specified address are likely to occur in the near future. The memory system
|
||||
/// can respond by taking actions that are expected to speed up the memory access when
|
||||
/// they do occur, such as preloading the specified address into one or more caches.
|
||||
/// Because these signals are only hints, it is valid for a particular CPU to treat
|
||||
/// any or all prefetch instructions as a NOP.
|
||||
///
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/documentation/den0024/a/the-a64-instruction-set/memory-access-instructions/prefetching-memory?lang=en)
|
||||
#[inline(always)]
|
||||
#[cfg_attr(test, assert_instr("prfm pldl1strm", rw = _PREFETCH_READ, locality = _PREFETCH_LOCALITY0))]
|
||||
#[cfg_attr(test, assert_instr("prfm pldl3keep", rw = _PREFETCH_READ, locality = _PREFETCH_LOCALITY1))]
|
||||
#[cfg_attr(test, assert_instr("prfm pldl2keep", rw = _PREFETCH_READ, locality = _PREFETCH_LOCALITY2))]
|
||||
#[cfg_attr(test, assert_instr("prfm pldl1keep", rw = _PREFETCH_READ, locality = _PREFETCH_LOCALITY3))]
|
||||
#[cfg_attr(test, assert_instr("prfm pstl1strm", rw = _PREFETCH_WRITE, locality = _PREFETCH_LOCALITY0))]
|
||||
#[cfg_attr(test, assert_instr("prfm pstl3keep", rw = _PREFETCH_WRITE, locality = _PREFETCH_LOCALITY1))]
|
||||
#[cfg_attr(test, assert_instr("prfm pstl2keep", rw = _PREFETCH_WRITE, locality = _PREFETCH_LOCALITY2))]
|
||||
#[cfg_attr(test, assert_instr("prfm pstl1keep", rw = _PREFETCH_WRITE, locality = _PREFETCH_LOCALITY3))]
|
||||
#[rustc_args_required_const(1, 2)]
|
||||
pub unsafe fn _prefetch(p: *const i8, rw: i32, locality: i32) {
|
||||
// We use the `llvm.prefetch` instrinsic with `cache type` = 1 (data cache).
|
||||
// `rw` and `strategy` are based on the function parameters.
|
||||
macro_rules! pref {
|
||||
($rdwr:expr, $local:expr) => {
|
||||
match ($rdwr, $local) {
|
||||
(0, 0) => prefetch(p, 0, 0, 1),
|
||||
(0, 1) => prefetch(p, 0, 1, 1),
|
||||
(0, 2) => prefetch(p, 0, 2, 1),
|
||||
(0, 3) => prefetch(p, 0, 3, 1),
|
||||
(1, 0) => prefetch(p, 1, 0, 1),
|
||||
(1, 1) => prefetch(p, 1, 1, 1),
|
||||
(1, 2) => prefetch(p, 1, 2, 1),
|
||||
(1, 3) => prefetch(p, 1, 3, 1),
|
||||
(_, _) => panic!(
|
||||
"Illegal (rw, locality) pair in prefetch, value ({}, {}).",
|
||||
$rdwr, $local
|
||||
),
|
||||
}
|
||||
};
|
||||
}
|
||||
pref!(rw, locality);
|
||||
}
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
use crate::core_arch::{aarch64::neon::*, arm::*, simd::*};
|
||||
use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec};
|
||||
|
||||
macro_rules! V_u64 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000000000000000u64,
|
||||
0x0101010101010101u64,
|
||||
0x0202020202020202u64,
|
||||
0x0F0F0F0F0F0F0F0Fu64,
|
||||
0x8080808080808080u64,
|
||||
0xF0F0F0F0F0F0F0F0u64,
|
||||
0xFFFFFFFFFFFFFFFFu64,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! V_f64 {
|
||||
() => {
|
||||
vec![
|
||||
0.0f64,
|
||||
1.0f64,
|
||||
-1.0f64,
|
||||
1.2f64,
|
||||
2.4f64,
|
||||
std::f64::MAX,
|
||||
std::f64::MIN,
|
||||
std::f64::INFINITY,
|
||||
std::f64::NEG_INFINITY,
|
||||
std::f64::NAN,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to64 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u64 { transmute(v) }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to128 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u128 { transmute(v) }
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) fn test<T, U, V, W, X>(
|
||||
vals: Vec<T>,
|
||||
fill1: fn(T) -> V,
|
||||
fill2: fn(U) -> W,
|
||||
cast: fn(W) -> X,
|
||||
test_fun: fn(V, V) -> W,
|
||||
verify_fun: fn(T, T) -> U,
|
||||
) where
|
||||
T: Copy + core::fmt::Debug,
|
||||
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
V: Copy + core::fmt::Debug,
|
||||
W: Copy + core::fmt::Debug,
|
||||
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
{
|
||||
let pairs = vals.iter().zip(vals.iter());
|
||||
|
||||
for (i, j) in pairs {
|
||||
let a: V = fill1(*i);
|
||||
let b: V = fill1(*j);
|
||||
|
||||
let actual_pre: W = test_fun(a, b);
|
||||
let expected_pre: W = fill2(verify_fun(*i, *j));
|
||||
|
||||
let actual: X = cast(actual_pre);
|
||||
let expected: X = cast(expected_pre);
|
||||
|
||||
assert_eq!(
|
||||
actual, expected,
|
||||
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
|
||||
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! gen_test_fn {
|
||||
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
|
||||
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
|
||||
unsafe {
|
||||
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! gen_fill_fn {
|
||||
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
|
||||
pub(crate) fn $id(val: $in_t) -> $out_t {
|
||||
let initial: [$in_t; $num_els] = [val; $num_els];
|
||||
let result: $cmp_t = unsafe { transmute(initial) };
|
||||
let result_out: $out_t = unsafe { transmute(result) };
|
||||
|
||||
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
|
||||
|
||||
result_out
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
|
||||
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
|
||||
gen_fill_fn!(fill_f64, 64, 1, f64, float64x1_t, u64);
|
||||
gen_fill_fn!(fillq_f64, 64, 2, f64, float64x2_t, u128);
|
||||
gen_fill_fn!(fill_p64, 64, 1, u64, poly64x1_t, u64);
|
||||
gen_fill_fn!(fillq_p64, 64, 2, u64, poly64x2_t, u128);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_f64,
|
||||
f64,
|
||||
f64,
|
||||
float64x1_t,
|
||||
float64x1_t,
|
||||
u64,
|
||||
V_f64!(),
|
||||
fill_f64,
|
||||
fill_f64,
|
||||
to64!(float64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_f64,
|
||||
f64,
|
||||
u64,
|
||||
float64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_f64!(),
|
||||
fill_f64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_f64,
|
||||
f64,
|
||||
f64,
|
||||
float64x2_t,
|
||||
float64x2_t,
|
||||
u128,
|
||||
V_f64!(),
|
||||
fillq_f64,
|
||||
fillq_f64,
|
||||
to128!(float64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_f64,
|
||||
f64,
|
||||
u64,
|
||||
float64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_f64!(),
|
||||
fillq_f64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_cmp_p64,
|
||||
u64,
|
||||
u64,
|
||||
poly64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_u64!(),
|
||||
fill_p64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_p64,
|
||||
u64,
|
||||
u64,
|
||||
poly64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_u64!(),
|
||||
fillq_p64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
//! ARM's Transactional Memory Extensions (TME).
|
||||
//!
|
||||
//! This CPU feature is available on Aarch64 - A architecture profile.
|
||||
//! This feature is in the non-neon feature set. TME specific vendor documentation can
|
||||
//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
|
||||
//!
|
||||
//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
|
||||
//!
|
||||
//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
|
||||
//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
|
||||
//!
|
||||
//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
|
||||
//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
|
||||
//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
|
||||
//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "llvm.aarch64.tstart"]
|
||||
fn aarch64_tstart() -> u64;
|
||||
#[link_name = "llvm.aarch64.tcommit"]
|
||||
fn aarch64_tcommit() -> ();
|
||||
#[link_name = "llvm.aarch64.tcancel"]
|
||||
fn aarch64_tcancel(imm0: u64) -> ();
|
||||
#[link_name = "llvm.aarch64.ttest"]
|
||||
fn aarch64_ttest() -> u64;
|
||||
}
|
||||
|
||||
/// Transaction successfully started.
|
||||
pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
|
||||
|
||||
/// Extraction mask for failure reason
|
||||
pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
|
||||
|
||||
/// Transaction retry is possible.
|
||||
pub const _TMFAILURE_RTRY: u64 = 1 << 15;
|
||||
|
||||
/// Transaction executed a TCANCEL instruction
|
||||
pub const _TMFAILURE_CNCL: u64 = 1 << 16;
|
||||
|
||||
/// Transaction aborted because a conflict occurred
|
||||
pub const _TMFAILURE_MEM: u64 = 1 << 17;
|
||||
|
||||
/// Fallback error type for any other reason
|
||||
pub const _TMFAILURE_IMP: u64 = 1 << 18;
|
||||
|
||||
/// Transaction aborted because a non-permissible operation was attempted
|
||||
pub const _TMFAILURE_ERR: u64 = 1 << 19;
|
||||
|
||||
/// Transaction aborted due to read or write set limit was exceeded
|
||||
pub const _TMFAILURE_SIZE: u64 = 1 << 20;
|
||||
|
||||
/// Transaction aborted due to transactional nesting level was exceeded
|
||||
pub const _TMFAILURE_NEST: u64 = 1 << 21;
|
||||
|
||||
/// Transaction aborted due to a debug trap.
|
||||
pub const _TMFAILURE_DBG: u64 = 1 << 22;
|
||||
|
||||
/// Transaction failed from interrupt
|
||||
pub const _TMFAILURE_INT: u64 = 1 << 23;
|
||||
|
||||
/// Indicates a TRIVIAL version of TM is available
|
||||
pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
|
||||
|
||||
/// Starts a new transaction. When the transaction starts successfully the return value is 0.
|
||||
/// If the transaction fails, all state modifications are discarded and a cause of the failure
|
||||
/// is encoded in the return value.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(test, assert_instr(tstart))]
|
||||
pub unsafe fn __tstart() -> u64 {
|
||||
aarch64_tstart()
|
||||
}
|
||||
|
||||
/// Commits the current transaction. For a nested transaction, the only effect is that the
|
||||
/// transactional nesting depth is decreased. For an outer transaction, the state modifications
|
||||
/// performed transactionally are committed to the architectural state.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(test, assert_instr(tcommit))]
|
||||
pub unsafe fn __tcommit() {
|
||||
aarch64_tcommit()
|
||||
}
|
||||
|
||||
/// Cancels the current transaction and discards all state modifications that were performed transactionally.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(test, assert_instr(tcancel, imm0 = 0x0))]
|
||||
#[rustc_args_required_const(0)]
|
||||
pub unsafe fn __tcancel(imm0: u64) {
|
||||
macro_rules! call {
|
||||
($imm0:expr) => {
|
||||
aarch64_tcancel($imm0)
|
||||
};
|
||||
}
|
||||
constify_imm8!(imm0, call)
|
||||
}
|
||||
|
||||
/// Tests if executing inside a transaction. If no transaction is currently executing,
|
||||
/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(test, assert_instr(ttest))]
|
||||
pub unsafe fn __ttest() -> u64 {
|
||||
aarch64_ttest()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::aarch64::*;
|
||||
|
||||
const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tstart() {
|
||||
let mut x = 0;
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
break;
|
||||
}
|
||||
assert_eq!(x, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tcommit() {
|
||||
let mut x = 0;
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
tme::__tcommit();
|
||||
}
|
||||
assert_eq!(x, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tcancel() {
|
||||
let mut x = 0;
|
||||
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
tme::__tcancel(CANCEL_CODE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(x, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_ttest() {
|
||||
for _ in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
if tme::__ttest() == 2 {
|
||||
tme::__tcancel(CANCEL_CODE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
//! ARMv8 intrinsics.
|
||||
//!
|
||||
//! The reference is [ARMv8-A Reference Manual][armv8].
|
||||
//!
|
||||
//! [armv8]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.
|
||||
//! ddi0487a.k_10775/index.html
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Reverse the order of the bytes.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rev))]
|
||||
pub unsafe fn _rev_u64(x: u64) -> u64 {
|
||||
x.swap_bytes() as u64
|
||||
}
|
||||
|
||||
/// Count Leading Zeros.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(clz))]
|
||||
pub unsafe fn _clz_u64(x: u64) -> u64 {
|
||||
x.leading_zeros() as u64
|
||||
}
|
||||
|
||||
/// Reverse the bit order.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn _rbit_u64(x: u64) -> u64 {
|
||||
crate::intrinsics::bitreverse(x)
|
||||
}
|
||||
|
||||
/// Counts the leading most significant bits set.
|
||||
///
|
||||
/// When all bits of the operand are set it returns the size of the operand in
|
||||
/// bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(cls))]
|
||||
pub unsafe fn _cls_u32(x: u32) -> u32 {
|
||||
u32::leading_zeros((((((x as i32) >> 31) as u32) ^ x) << 1) | 1) as u32
|
||||
}
|
||||
|
||||
/// Counts the leading most significant bits set.
|
||||
///
|
||||
/// When all bits of the operand are set it returns the size of the operand in
|
||||
/// bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(cls))]
|
||||
pub unsafe fn _cls_u64(x: u64) -> u64 {
|
||||
u64::leading_zeros((((((x as i64) >> 63) as u64) ^ x) << 1) | 1) as u64
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::aarch64::v8;
|
||||
|
||||
#[test]
|
||||
fn _rev_u64() {
|
||||
unsafe {
|
||||
assert_eq!(
|
||||
v8::_rev_u64(0b0000_0000_1111_1111_0000_0000_1111_1111_u64),
|
||||
0b1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _clz_u64() {
|
||||
unsafe {
|
||||
assert_eq!(v8::_clz_u64(0b0000_1010u64), 60u64);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _rbit_u64() {
|
||||
unsafe {
|
||||
assert_eq!(
|
||||
v8::_rbit_u64(0b0000_0000_1111_1101_0000_0000_1111_1111_u64),
|
||||
0b1111_1111_0000_0000_1011_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _cls_u32() {
|
||||
unsafe {
|
||||
assert_eq!(
|
||||
v8::_cls_u32(0b1111_1111_1111_1111_0000_0000_1111_1111_u32),
|
||||
15_u32
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _cls_u64() {
|
||||
unsafe {
|
||||
assert_eq!(
|
||||
v8::_cls_u64(
|
||||
0b1111_1111_1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_u64
|
||||
),
|
||||
15_u64
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
//! Access types available on all architectures
|
||||
|
||||
/// Full system is the required shareability domain, reads and writes are the
|
||||
/// required access types
|
||||
pub struct SY;
|
||||
|
||||
dmb_dsb!(SY);
|
||||
|
||||
impl super::super::sealed::Isb for SY {
|
||||
#[inline(always)]
|
||||
unsafe fn __isb(&self) {
|
||||
super::isb(super::arg::SY)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
// Reference: ARM11 MPCore Processor Technical Reference Manual (ARM DDI 0360E) Section 3.5 "Summary
|
||||
// of CP15 instructions"
|
||||
|
||||
/// Full system is the required shareability domain, reads and writes are the
|
||||
/// required access types
|
||||
pub struct SY;
|
||||
|
||||
impl super::super::sealed::Dmb for SY {
|
||||
#[inline(always)]
|
||||
unsafe fn __dmb(&self) {
|
||||
llvm_asm!("mcr p15, 0, r0, c7, c10, 5" : : : "memory" : "volatile")
|
||||
}
|
||||
}
|
||||
|
||||
impl super::super::sealed::Dsb for SY {
|
||||
#[inline(always)]
|
||||
unsafe fn __dsb(&self) {
|
||||
llvm_asm!("mcr p15, 0, r0, c7, c10, 4" : : : "memory" : "volatile")
|
||||
}
|
||||
}
|
||||
|
||||
impl super::super::sealed::Isb for SY {
|
||||
#[inline(always)]
|
||||
unsafe fn __isb(&self) {
|
||||
llvm_asm!("mcr p15, 0, r0, c7, c5, 4" : : : "memory" : "volatile")
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
// Reference: Section 7.4 "Hints" of ACLE
|
||||
|
||||
// CP15 instruction
|
||||
#[cfg(not(any(
|
||||
// v8
|
||||
target_arch = "aarch64",
|
||||
// v7
|
||||
target_feature = "v7",
|
||||
// v6-M
|
||||
target_feature = "mclass"
|
||||
)))]
|
||||
mod cp15;
|
||||
|
||||
#[cfg(not(any(
|
||||
target_arch = "aarch64",
|
||||
target_feature = "v7",
|
||||
target_feature = "mclass"
|
||||
)))]
|
||||
pub use self::cp15::*;
|
||||
|
||||
// Dedicated instructions
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_feature = "v7",
|
||||
target_feature = "mclass"
|
||||
))]
|
||||
macro_rules! dmb_dsb {
|
||||
($A:ident) => {
|
||||
impl super::super::sealed::Dmb for $A {
|
||||
#[inline(always)]
|
||||
unsafe fn __dmb(&self) {
|
||||
super::dmb(super::arg::$A)
|
||||
}
|
||||
}
|
||||
|
||||
impl super::super::sealed::Dsb for $A {
|
||||
#[inline(always)]
|
||||
unsafe fn __dsb(&self) {
|
||||
super::dsb(super::arg::$A)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_feature = "v7",
|
||||
target_feature = "mclass"
|
||||
))]
|
||||
mod common;
|
||||
|
||||
#[cfg(any(
|
||||
target_arch = "aarch64",
|
||||
target_feature = "v7",
|
||||
target_feature = "mclass"
|
||||
))]
|
||||
pub use self::common::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7",))]
|
||||
mod not_mclass;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7",))]
|
||||
pub use self::not_mclass::*;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod v8;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub use self::v8::*;
|
||||
|
||||
/// Generates a DMB (data memory barrier) instruction or equivalent CP15 instruction.
|
||||
///
|
||||
/// DMB ensures the observed ordering of memory accesses. Memory accesses of the specified type
|
||||
/// issued before the DMB are guaranteed to be observed (in the specified scope) before memory
|
||||
/// accesses issued after the DMB.
|
||||
///
|
||||
/// For example, DMB should be used between storing data, and updating a flag variable that makes
|
||||
/// that data available to another core.
|
||||
///
|
||||
/// The __dmb() intrinsic also acts as a compiler memory barrier of the appropriate type.
|
||||
#[inline(always)]
|
||||
pub unsafe fn __dmb<A>(arg: A)
|
||||
where
|
||||
A: super::sealed::Dmb,
|
||||
{
|
||||
arg.__dmb()
|
||||
}
|
||||
|
||||
/// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction.
|
||||
///
|
||||
/// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has
|
||||
/// additional properties. After a DSB instruction completes, all memory accesses of the specified
|
||||
/// type issued before the DSB are guaranteed to have completed.
|
||||
///
|
||||
/// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type.
|
||||
#[inline(always)]
|
||||
pub unsafe fn __dsb<A>(arg: A)
|
||||
where
|
||||
A: super::sealed::Dsb,
|
||||
{
|
||||
arg.__dsb()
|
||||
}
|
||||
|
||||
/// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15
|
||||
/// instruction.
|
||||
///
|
||||
/// This instruction flushes the processor pipeline fetch buffers, so that following instructions
|
||||
/// are fetched from cache or memory.
|
||||
///
|
||||
/// An ISB is needed after some system maintenance operations. An ISB is also needed before
|
||||
/// transferring control to code that has been loaded or modified in memory, for example by an
|
||||
/// overlay mechanism or just-in-time code generator. (Note that if instruction and data caches are
|
||||
/// separate, privileged cache maintenance operations would be needed in order to unify the caches.)
|
||||
///
|
||||
/// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full
|
||||
/// system) scope of the ISB instruction.
|
||||
#[inline(always)]
|
||||
pub unsafe fn __isb<A>(arg: A)
|
||||
where
|
||||
A: super::sealed::Isb,
|
||||
{
|
||||
arg.__isb()
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dmb")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")]
|
||||
fn dmb(_: i32);
|
||||
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dsb")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")]
|
||||
fn dsb(_: i32);
|
||||
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.isb")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")]
|
||||
fn isb(_: i32);
|
||||
}
|
||||
|
||||
// we put these in a module to prevent weirdness with glob re-exports
|
||||
mod arg {
|
||||
// See Section 7.3 Memory barriers of ACLE
|
||||
pub const SY: i32 = 15;
|
||||
pub const ST: i32 = 14;
|
||||
pub const LD: i32 = 13;
|
||||
pub const ISH: i32 = 11;
|
||||
pub const ISHST: i32 = 10;
|
||||
pub const ISHLD: i32 = 9;
|
||||
pub const NSH: i32 = 7;
|
||||
pub const NSHST: i32 = 6;
|
||||
pub const NSHLD: i32 = 5;
|
||||
pub const OSH: i32 = 3;
|
||||
pub const OSHST: i32 = 2;
|
||||
pub const OSHLD: i32 = 1;
|
||||
}
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
//! Access types available on v7 and v8 but not on v7(E)-M or v8-M
|
||||
|
||||
/// Full system is the required shareability domain, writes are the required
|
||||
/// access type
|
||||
pub struct ST;
|
||||
|
||||
dmb_dsb!(ST);
|
||||
|
||||
/// Inner Shareable is the required shareability domain, reads and writes are
|
||||
/// the required access types
|
||||
pub struct ISH;
|
||||
|
||||
dmb_dsb!(ISH);
|
||||
|
||||
/// Inner Shareable is the required shareability domain, writes are the required
|
||||
/// access type
|
||||
pub struct ISHST;
|
||||
|
||||
dmb_dsb!(ISHST);
|
||||
|
||||
/// Non-shareable is the required shareability domain, reads and writes are the
|
||||
/// required access types
|
||||
pub struct NSH;
|
||||
|
||||
dmb_dsb!(NSH);
|
||||
|
||||
/// Non-shareable is the required shareability domain, writes are the required
|
||||
/// access type
|
||||
pub struct NSHST;
|
||||
|
||||
dmb_dsb!(NSHST);
|
||||
|
||||
/// Outer Shareable is the required shareability domain, reads and writes are
|
||||
/// the required access types
|
||||
pub struct OSH;
|
||||
|
||||
dmb_dsb!(OSH);
|
||||
|
||||
/// Outer Shareable is the required shareability domain, writes are the required
|
||||
/// access type
|
||||
pub struct OSHST;
|
||||
|
||||
dmb_dsb!(OSHST);
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
/// Full system is the required shareability domain, reads are the required
|
||||
/// access type
|
||||
pub struct LD;
|
||||
|
||||
dmb_dsb!(LD);
|
||||
|
||||
/// Inner Shareable is the required shareability domain, reads are the required
|
||||
/// access type
|
||||
pub struct ISHLD;
|
||||
|
||||
dmb_dsb!(ISHLD);
|
||||
|
||||
/// Non-shareable is the required shareability domain, reads are the required
|
||||
/// access type
|
||||
pub struct NSHLD;
|
||||
|
||||
dmb_dsb!(NSHLD);
|
||||
|
||||
/// Outher Shareable is the required shareability domain, reads are the required
|
||||
/// access type
|
||||
pub struct OSHLD;
|
||||
|
||||
dmb_dsb!(OSHLD);
|
||||
|
|
@ -0,0 +1,384 @@
|
|||
//! # References:
|
||||
//!
|
||||
//! - Section 8.3 "16-bit multiplications"
|
||||
//!
|
||||
//! Intrinsics that could live here:
|
||||
//!
|
||||
//! - \[x\] __smulbb
|
||||
//! - \[x\] __smulbt
|
||||
//! - \[x\] __smultb
|
||||
//! - \[x\] __smultt
|
||||
//! - \[x\] __smulwb
|
||||
//! - \[x\] __smulwt
|
||||
//! - \[x\] __qadd
|
||||
//! - \[x\] __qsub
|
||||
//! - \[x\] __qdbl
|
||||
//! - \[x\] __smlabb
|
||||
//! - \[x\] __smlabt
|
||||
//! - \[x\] __smlatb
|
||||
//! - \[x\] __smlatt
|
||||
//! - \[x\] __smlawb
|
||||
//! - \[x\] __smlawt
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
use crate::mem::transmute;
|
||||
|
||||
types! {
|
||||
/// ARM-specific 32-bit wide vector of two packed `i16`.
|
||||
pub struct int16x2_t(i16, i16);
|
||||
/// ARM-specific 32-bit wide vector of two packed `u16`.
|
||||
pub struct uint16x2_t(u16, u16);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.smulbb"]
|
||||
fn arm_smulbb(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smulbt"]
|
||||
fn arm_smulbt(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smultb"]
|
||||
fn arm_smultb(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smultt"]
|
||||
fn arm_smultt(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smulwb"]
|
||||
fn arm_smulwb(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smulwt"]
|
||||
fn arm_smulwt(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qadd"]
|
||||
fn arm_qadd(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qsub"]
|
||||
fn arm_qsub(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlabb"]
|
||||
fn arm_smlabb(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlabt"]
|
||||
fn arm_smlabt(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlatb"]
|
||||
fn arm_smlatb(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlatt"]
|
||||
fn arm_smlatt(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlawb"]
|
||||
fn arm_smlawb(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlawt"]
|
||||
fn arm_smlawt(a: i32, b: i32, c: i32) -> i32;
|
||||
}
|
||||
|
||||
/// Insert a SMULBB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[0\] * b\[0\]
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smulbb))]
|
||||
pub unsafe fn __smulbb(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smulbb(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Insert a SMULTB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[0\] * b\[1\]
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smultb))]
|
||||
pub unsafe fn __smultb(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smultb(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Insert a SMULTB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[1\] * b\[0\]
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smulbt))]
|
||||
pub unsafe fn __smulbt(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smulbt(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Insert a SMULTT instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[1\] * b\[1\]
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smultt))]
|
||||
pub unsafe fn __smultt(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smultt(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Insert a SMULWB instruction
|
||||
///
|
||||
/// Multiplies the 32-bit signed first operand with the low halfword
|
||||
/// (as a 16-bit signed integer) of the second operand.
|
||||
/// Return the top 32 bits of the 48-bit product
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smulwb))]
|
||||
pub unsafe fn __smulwb(a: int16x2_t, b: i32) -> i32 {
|
||||
arm_smulwb(transmute(a), b)
|
||||
}
|
||||
|
||||
/// Insert a SMULWT instruction
|
||||
///
|
||||
/// Multiplies the 32-bit signed first operand with the high halfword
|
||||
/// (as a 16-bit signed integer) of the second operand.
|
||||
/// Return the top 32 bits of the 48-bit product
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smulwt))]
|
||||
pub unsafe fn __smulwt(a: int16x2_t, b: i32) -> i32 {
|
||||
arm_smulwt(transmute(a), b)
|
||||
}
|
||||
|
||||
/// Signed saturating addition
|
||||
///
|
||||
/// Returns the 32-bit saturating signed equivalent of a + b.
|
||||
/// Sets the Q flag if saturation occurs.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qadd))]
|
||||
pub unsafe fn __qadd(a: i32, b: i32) -> i32 {
|
||||
arm_qadd(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating subtraction
|
||||
///
|
||||
/// Returns the 32-bit saturating signed equivalent of a - b.
|
||||
/// Sets the Q flag if saturation occurs.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qsub))]
|
||||
pub unsafe fn __qsub(a: i32, b: i32) -> i32 {
|
||||
arm_qsub(a, b)
|
||||
}
|
||||
|
||||
/// Insert a QADD instruction
|
||||
///
|
||||
/// Returns the 32-bit saturating signed equivalent of a + a
|
||||
/// Sets the Q flag if saturation occurs.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qadd))]
|
||||
pub unsafe fn __qdbl(a: i32) -> i32 {
|
||||
arm_qadd(a, a)
|
||||
}
|
||||
|
||||
/// Insert a SMLABB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[0\] * b\[0\] + c
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlabb))]
|
||||
pub unsafe fn __smlabb(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlabb(transmute(a), transmute(b), c)
|
||||
}
|
||||
|
||||
/// Insert a SMLABT instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[0\] * b\[1\] + c
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlabt))]
|
||||
pub unsafe fn __smlabt(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlabt(transmute(a), transmute(b), c)
|
||||
}
|
||||
|
||||
/// Insert a SMLATB instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[1\] * b\[0\] + c
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlatb))]
|
||||
pub unsafe fn __smlatb(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlatb(transmute(a), transmute(b), c)
|
||||
}
|
||||
|
||||
/// Insert a SMLATT instruction
|
||||
///
|
||||
/// Returns the equivalent of a\[1\] * b\[1\] + c
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlatt))]
|
||||
pub unsafe fn __smlatt(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlatt(transmute(a), transmute(b), c)
|
||||
}
|
||||
|
||||
/// Insert a SMLAWB instruction
|
||||
///
|
||||
/// Returns the equivalent of (a * b\[0\] + (c << 16)) >> 16
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlawb))]
|
||||
pub unsafe fn __smlawb(a: i32, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlawb(a, transmute(b), c)
|
||||
}
|
||||
|
||||
/// Insert a SMLAWT instruction
|
||||
///
|
||||
/// Returns the equivalent of (a * b\[1\] + (c << 16)) >> 16
|
||||
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
|
||||
/// Sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlawt))]
|
||||
pub unsafe fn __smlawt(a: i32, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlawt(a, transmute(b), c)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::{
|
||||
arm::*,
|
||||
simd::{i16x2, i8x4, u8x4},
|
||||
};
|
||||
use std::mem::transmute;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[test]
|
||||
fn smulbb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
assert_eq!(super::__smulbb(transmute(a), transmute(b)), 10 * 30);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smulbt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
assert_eq!(super::__smulbt(transmute(a), transmute(b)), 10 * 40);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smultb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
assert_eq!(super::__smultb(transmute(a), transmute(b)), 20 * 30);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smultt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
assert_eq!(super::__smultt(transmute(a), transmute(b)), 20 * 40);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smulwb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = 30;
|
||||
assert_eq!(super::__smulwb(transmute(a), b), 20 * b);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smulwt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = 30;
|
||||
assert_eq!(super::__smulwt(transmute(a), b), (10 * b) >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qadd() {
|
||||
unsafe {
|
||||
assert_eq!(super::__qadd(-10, 60), 50);
|
||||
assert_eq!(super::__qadd(i32::MAX, 10), i32::MAX);
|
||||
assert_eq!(super::__qadd(i32::MIN, -10), i32::MIN);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qsub() {
|
||||
unsafe {
|
||||
assert_eq!(super::__qsub(10, 60), -50);
|
||||
assert_eq!(super::__qsub(i32::MAX, -10), i32::MAX);
|
||||
assert_eq!(super::__qsub(i32::MIN, 10), i32::MIN);
|
||||
}
|
||||
}
|
||||
|
||||
fn qdbl() {
|
||||
unsafe {
|
||||
assert_eq!(super::__qdbl(10), 20);
|
||||
assert_eq!(super::__qdbl(i32::MAX), i32::MAX);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlabb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
let c = 50;
|
||||
let r = (10 * 30) + c;
|
||||
assert_eq!(super::__smlabb(transmute(a), transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlabt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
let c = 50;
|
||||
let r = (10 * 40) + c;
|
||||
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlatb() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
let c = 50;
|
||||
let r = (20 * 30) + c;
|
||||
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlatt() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(30, 40);
|
||||
let c = 50;
|
||||
let r = (20 * 40) + c;
|
||||
assert_eq!(super::__smlatt(transmute(a), transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlawb() {
|
||||
unsafe {
|
||||
let a: i32 = 10;
|
||||
let b = i16x2::new(30, 40);
|
||||
let c: i32 = 50;
|
||||
let r: i32 = ((a * 30) + (c << 16)) >> 16;
|
||||
assert_eq!(super::__smlawb(a, transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
|
||||
fn smlawt() {
|
||||
unsafe {
|
||||
let a: i32 = 10;
|
||||
let b = i16x2::new(30, 40);
|
||||
let c: i32 = 50;
|
||||
let r: i32 = ((a * 40) + (c << 16)) >> 16;
|
||||
assert_eq!(super::__smlawt(a, transmute(b), c), r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
// Reference: Section 5.4.4 "LDREX / STREX" of ACLE
|
||||
|
||||
/// Removes the exclusive lock created by LDREX
|
||||
// Supported: v6, v6K, v7-M, v7-A, v7-R
|
||||
// Not supported: v5, v6-M
|
||||
// NOTE: there's no dedicated CLREX instruction in v6 (<v6k); to clear the exclusive monitor users
|
||||
// have to do a dummy STREX operation
|
||||
#[cfg(any(
|
||||
all(target_feature = "v6k", not(target_feature = "mclass")), // excludes v6-M
|
||||
all(target_feature = "v7", target_feature = "mclass"), // v7-M
|
||||
))]
|
||||
pub unsafe fn __clrex() {
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.clrex"]
|
||||
fn clrex();
|
||||
}
|
||||
|
||||
clrex()
|
||||
}
|
||||
|
||||
/// Executes a exclusive LDR instruction for 8 bit value.
|
||||
// Supported: v6K, v7-M, v7-A, v7-R
|
||||
// Not supported: v5, v6, v6-M
|
||||
#[cfg(
|
||||
target_feature = "v6k", // includes v7-M but excludes v6-M
|
||||
)]
|
||||
pub unsafe fn __ldrexb(p: *const u8) -> u8 {
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.ldrex.p0i8"]
|
||||
fn ldrex8(p: *const u8) -> u32;
|
||||
}
|
||||
|
||||
ldrex8(p) as u8
|
||||
}
|
||||
|
||||
/// Executes a exclusive LDR instruction for 16 bit value.
|
||||
// Supported: v6K, v7-M, v7-A, v7-R, v8
|
||||
// Not supported: v5, v6, v6-M
|
||||
#[cfg(
|
||||
target_feature = "v6k", // includes v7-M but excludes v6-M
|
||||
)]
|
||||
pub unsafe fn __ldrexh(p: *const u16) -> u16 {
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.ldrex.p0i16"]
|
||||
fn ldrex16(p: *const u16) -> u32;
|
||||
}
|
||||
|
||||
ldrex16(p) as u16
|
||||
}
|
||||
|
||||
/// Executes a exclusive LDR instruction for 32 bit value.
|
||||
// Supported: v6, v7-M, v6K, v7-A, v7-R, v8
|
||||
// Not supported: v5, v6-M
|
||||
#[cfg(any(
|
||||
all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M
|
||||
all(target_feature = "v7", target_feature = "mclass"), // v7-M
|
||||
))]
|
||||
pub unsafe fn __ldrex(p: *const u32) -> u32 {
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.ldrex.p0i32"]
|
||||
fn ldrex32(p: *const u32) -> u32;
|
||||
}
|
||||
|
||||
ldrex32(p)
|
||||
}
|
||||
|
||||
/// Executes a exclusive STR instruction for 8 bit values
|
||||
///
|
||||
/// Returns `0` if the operation succeeded, or `1` if it failed
|
||||
// supported: v6K, v7-M, v7-A, v7-R
|
||||
// Not supported: v5, v6, v6-M
|
||||
#[cfg(
|
||||
target_feature = "v6k", // includes v7-M but excludes v6-M
|
||||
)]
|
||||
pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 {
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.strex.p0i8"]
|
||||
fn strex8(value: u32, addr: *mut u8) -> u32;
|
||||
}
|
||||
|
||||
strex8(value, addr)
|
||||
}
|
||||
|
||||
/// Executes a exclusive STR instruction for 16 bit values
|
||||
///
|
||||
/// Returns `0` if the operation succeeded, or `1` if it failed
|
||||
// Supported: v6K, v7-M, v7-A, v7-R, v8
|
||||
// Not supported: v5, v6, v6-M
|
||||
#[cfg(
|
||||
target_feature = "v6k", // includes v7-M but excludes v6-M
|
||||
)]
|
||||
pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 {
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.strex.p0i16"]
|
||||
fn strex16(value: u32, addr: *mut u16) -> u32;
|
||||
}
|
||||
|
||||
strex16(value as u32, addr)
|
||||
}
|
||||
|
||||
/// Executes a exclusive STR instruction for 32 bit values
|
||||
///
|
||||
/// Returns `0` if the operation succeeded, or `1` if it failed
|
||||
// Supported: v6, v7-M, v6K, v7-A, v7-R, v8
|
||||
// Not supported: v5, v6-M
|
||||
#[cfg(any(
|
||||
all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M
|
||||
all(target_feature = "v7", target_feature = "mclass"), // v7-M
|
||||
))]
|
||||
pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 {
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.strex.p0i32"]
|
||||
fn strex32(value: u32, addr: *mut u32) -> u32;
|
||||
}
|
||||
|
||||
strex32(value, addr)
|
||||
}
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
// # References
|
||||
//
|
||||
// - Section 7.4 "Hints" of ACLE
|
||||
// - Section 7.7 "NOP" of ACLE
|
||||
|
||||
/// Generates a WFI (wait for interrupt) hint instruction, or nothing.
|
||||
///
|
||||
/// The WFI instruction allows (but does not require) the processor to enter a
|
||||
/// low-power state until one of a number of asynchronous events occurs.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
|
||||
// LLVM says "instruction requires: armv6k"
|
||||
#[cfg(any(target_feature = "v6", target_arch = "aarch64"))]
|
||||
#[inline(always)]
|
||||
pub unsafe fn __wfi() {
|
||||
hint(HINT_WFI);
|
||||
}
|
||||
|
||||
/// Generates a WFE (wait for event) hint instruction, or nothing.
|
||||
///
|
||||
/// The WFE instruction allows (but does not require) the processor to enter a
|
||||
/// low-power state until some event occurs such as a SEV being issued by
|
||||
/// another processor.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
|
||||
// LLVM says "instruction requires: armv6k"
|
||||
#[cfg(any(target_feature = "v6", target_arch = "aarch64"))]
|
||||
#[inline(always)]
|
||||
pub unsafe fn __wfe() {
|
||||
hint(HINT_WFE);
|
||||
}
|
||||
|
||||
/// Generates a SEV (send a global event) hint instruction.
|
||||
///
|
||||
/// This causes an event to be signaled to all processors in a multiprocessor
|
||||
/// system. It is a NOP on a uniprocessor system.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M
|
||||
// LLVM says "instruction requires: armv6k"
|
||||
#[cfg(any(target_feature = "v6", target_arch = "aarch64"))]
|
||||
#[inline(always)]
|
||||
pub unsafe fn __sev() {
|
||||
hint(HINT_SEV);
|
||||
}
|
||||
|
||||
/// Generates a send a local event hint instruction.
|
||||
///
|
||||
/// This causes an event to be signaled to only the processor executing this
|
||||
/// instruction. In a multiprocessor system, it is not required to affect the
|
||||
/// other processors.
|
||||
// LLVM says "instruction requires: armv8"
|
||||
#[cfg(any(
|
||||
target_feature = "v8", // 32-bit ARMv8
|
||||
target_arch = "aarch64", // AArch64
|
||||
))]
|
||||
#[inline(always)]
|
||||
pub unsafe fn __sevl() {
|
||||
hint(HINT_SEVL);
|
||||
}
|
||||
|
||||
/// Generates a YIELD hint instruction.
|
||||
///
|
||||
/// This enables multithreading software to indicate to the hardware that it is
|
||||
/// performing a task, for example a spin-lock, that could be swapped out to
|
||||
/// improve overall system performance.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
|
||||
// LLVM says "instruction requires: armv6k"
|
||||
#[cfg(any(target_feature = "v6", target_arch = "aarch64"))]
|
||||
#[inline(always)]
|
||||
pub unsafe fn __yield() {
|
||||
hint(HINT_YIELD);
|
||||
}
|
||||
|
||||
/// Generates a DBG instruction.
|
||||
///
|
||||
/// This provides a hint to debugging and related systems. The argument must be
|
||||
/// a constant integer from 0 to 15 inclusive. See implementation documentation
|
||||
/// for the effect (if any) of this instruction and the meaning of the
|
||||
/// argument. This is available only when compliling for AArch32.
|
||||
// Section 10.1 of ACLE says that the supported arches are: 7, 7-M
|
||||
// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and
|
||||
// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A
|
||||
// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2
|
||||
// "Thumb instruction set support"
|
||||
#[cfg(target_feature = "v7")]
|
||||
#[inline(always)]
|
||||
#[rustc_args_required_const(0)]
|
||||
pub unsafe fn __dbg(imm4: u32) {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => {
|
||||
llvm_asm!(concat!("DBG ", stringify!($imm4)) : : : : "volatile")
|
||||
}
|
||||
}
|
||||
|
||||
match imm4 & 0b1111 {
|
||||
0 => call!(0),
|
||||
1 => call!(1),
|
||||
2 => call!(2),
|
||||
3 => call!(3),
|
||||
4 => call!(4),
|
||||
5 => call!(5),
|
||||
6 => call!(6),
|
||||
7 => call!(7),
|
||||
8 => call!(8),
|
||||
9 => call!(9),
|
||||
10 => call!(10),
|
||||
11 => call!(11),
|
||||
12 => call!(12),
|
||||
13 => call!(13),
|
||||
14 => call!(14),
|
||||
_ => call!(15),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates an unspecified no-op instruction.
|
||||
///
|
||||
/// Note that not all architectures provide a distinguished NOP instruction. On
|
||||
/// those that do, it is unspecified whether this intrinsic generates it or
|
||||
/// another instruction. It is not guaranteed that inserting this instruction
|
||||
/// will increase execution time.
|
||||
#[inline(always)]
|
||||
pub unsafe fn __nop() {
|
||||
llvm_asm!("NOP" : : : : "volatile")
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
|
||||
fn hint(_: i32);
|
||||
}
|
||||
|
||||
// from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td
|
||||
const HINT_NOP: i32 = 0;
|
||||
const HINT_YIELD: i32 = 1;
|
||||
const HINT_WFE: i32 = 2;
|
||||
const HINT_WFI: i32 = 3;
|
||||
const HINT_SEV: i32 = 4;
|
||||
const HINT_SEVL: i32 = 5;
|
||||
|
|
@ -0,0 +1,158 @@
|
|||
//! ARM C Language Extensions (ACLE)
|
||||
//!
|
||||
//! # Developer notes
|
||||
//!
|
||||
//! Below is a list of built-in targets that are representative of the different ARM
|
||||
//! architectures; the list includes the `target_feature`s they possess.
|
||||
//!
|
||||
//! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t`
|
||||
//! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te`
|
||||
//! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6`
|
||||
//! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass`
|
||||
//! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass`
|
||||
//! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass`
|
||||
//! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
|
||||
//! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass`
|
||||
//! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass`
|
||||
//! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass`
|
||||
//! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon`
|
||||
//!
|
||||
//! Section 10.1 of ACLE says:
|
||||
//!
|
||||
//! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes
|
||||
//! its predecessor instruction set."
|
||||
//!
|
||||
//! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes
|
||||
//! its predecessor instruction set."
|
||||
//!
|
||||
//! From that info and from looking at how LLVM features work (using custom targets) we can identify
|
||||
//! features that are subsets of others:
|
||||
//!
|
||||
//! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is
|
||||
//! enabled as well.
|
||||
//!
|
||||
//! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8`
|
||||
//! - `v6 < v8m < v6t2`
|
||||
//! - `v7 < v8m.main`
|
||||
//!
|
||||
//! *NOTE*: Section 5.4.7 of ACLE says:
|
||||
//!
|
||||
//! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the
|
||||
//! intrinsics defined in Saturating intrinsics are available."
|
||||
//!
|
||||
//! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te
|
||||
//! targets so we have to work around this difference.
|
||||
//!
|
||||
//! # References
|
||||
//!
|
||||
//! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest)
|
||||
|
||||
// 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported
|
||||
// via CP15 instructions. See Section 10.1 of ACLE
|
||||
mod barrier;
|
||||
|
||||
pub use self::barrier::*;
|
||||
|
||||
mod hints;
|
||||
|
||||
pub use self::hints::*;
|
||||
|
||||
mod registers;
|
||||
|
||||
pub use self::registers::*;
|
||||
|
||||
mod ex;
|
||||
|
||||
pub use self::ex::*;
|
||||
|
||||
// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD)
|
||||
// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see
|
||||
// section 5.4.7)
|
||||
// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on
|
||||
// '+v5te' rather than on '+dsp'
|
||||
#[cfg(all(
|
||||
not(target_arch = "aarch64"),
|
||||
any(
|
||||
// >= v5TE but excludes v7-M
|
||||
all(target_feature = "v5te", not(target_feature = "mclass")),
|
||||
// v7E-M
|
||||
all(target_feature = "mclass", target_feature = "dsp"),
|
||||
)
|
||||
))]
|
||||
mod dsp;
|
||||
|
||||
#[cfg(all(
|
||||
not(target_arch = "aarch64"),
|
||||
any(
|
||||
all(target_feature = "v5te", not(target_feature = "mclass")),
|
||||
all(target_feature = "mclass", target_feature = "dsp"),
|
||||
)
|
||||
))]
|
||||
pub use self::dsp::*;
|
||||
|
||||
// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT)
|
||||
#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))]
|
||||
mod sat;
|
||||
|
||||
#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))]
|
||||
pub use self::sat::*;
|
||||
|
||||
// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says
|
||||
// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated
|
||||
#[cfg(all(
|
||||
not(target_arch = "aarch64"),
|
||||
any(
|
||||
// v7-A, v7-R
|
||||
all(target_feature = "v6", not(target_feature = "mclass")),
|
||||
// v7E-M
|
||||
all(target_feature = "mclass", target_feature = "dsp")
|
||||
)
|
||||
))]
|
||||
mod simd32;
|
||||
|
||||
#[cfg(all(
|
||||
not(target_arch = "aarch64"),
|
||||
any(
|
||||
all(target_feature = "v6", not(target_feature = "mclass")),
|
||||
all(target_feature = "mclass", target_feature = "dsp")
|
||||
)
|
||||
))]
|
||||
pub use self::simd32::*;
|
||||
|
||||
mod sealed {
|
||||
pub trait Dmb {
|
||||
unsafe fn __dmb(&self);
|
||||
}
|
||||
|
||||
pub trait Dsb {
|
||||
unsafe fn __dsb(&self);
|
||||
}
|
||||
|
||||
pub trait Isb {
|
||||
unsafe fn __isb(&self);
|
||||
}
|
||||
|
||||
pub trait Rsr {
|
||||
unsafe fn __rsr(&self) -> u32;
|
||||
}
|
||||
|
||||
pub trait Rsr64 {
|
||||
unsafe fn __rsr64(&self) -> u64;
|
||||
}
|
||||
|
||||
pub trait Rsrp {
|
||||
unsafe fn __rsrp(&self) -> *const u8;
|
||||
}
|
||||
|
||||
pub trait Wsr {
|
||||
unsafe fn __wsr(&self, value: u32);
|
||||
}
|
||||
|
||||
pub trait Wsr64 {
|
||||
unsafe fn __wsr64(&self, value: u64);
|
||||
}
|
||||
|
||||
pub trait Wsrp {
|
||||
unsafe fn __wsrp(&self, value: *const u8);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
/// Application Program Status Register
|
||||
pub struct APSR;
|
||||
|
||||
// Note (@Lokathor): Because this breaks the use of Rust on the Game Boy
|
||||
// Advance, this change must be reverted until Rust learns to handle cpu state
|
||||
// properly. See also: https://github.com/rust-lang/stdarch/issues/702
|
||||
|
||||
//#[cfg(any(not(target_feature = "thumb-state"), target_feature = "v6t2"))]
|
||||
//rsr!(APSR);
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
#[allow(unused_macros)]
|
||||
macro_rules! rsr {
|
||||
($R:ident) => {
|
||||
impl super::super::sealed::Rsr for $R {
|
||||
unsafe fn __rsr(&self) -> u32 {
|
||||
let r: u32;
|
||||
llvm_asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
|
||||
r
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! rsrp {
|
||||
($R:ident) => {
|
||||
impl super::super::sealed::Rsrp for $R {
|
||||
unsafe fn __rsrp(&self) -> *const u8 {
|
||||
let r: *const u8;
|
||||
llvm_asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile");
|
||||
r
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! wsr {
|
||||
($R:ident) => {
|
||||
impl super::super::sealed::Wsr for $R {
|
||||
unsafe fn __wsr(&self, value: u32) {
|
||||
llvm_asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! wsrp {
|
||||
($R:ident) => {
|
||||
impl super::super::sealed::Wsrp for $R {
|
||||
unsafe fn __wsrp(&self, value: *const u8) {
|
||||
llvm_asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile");
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(target_feature = "mclass")]
|
||||
mod v6m;
|
||||
|
||||
#[cfg(target_feature = "mclass")]
|
||||
pub use self::v6m::*;
|
||||
|
||||
#[cfg(all(target_feature = "v7", target_feature = "mclass"))]
|
||||
mod v7m;
|
||||
|
||||
#[cfg(all(target_feature = "v7", target_feature = "mclass"))]
|
||||
pub use self::v7m::*;
|
||||
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
mod aarch32;
|
||||
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
pub use self::aarch32::*;
|
||||
|
||||
/// Reads a 32-bit system register
|
||||
#[inline(always)]
|
||||
pub unsafe fn __rsr<R>(reg: R) -> u32
|
||||
where
|
||||
R: super::sealed::Rsr,
|
||||
{
|
||||
reg.__rsr()
|
||||
}
|
||||
|
||||
/// Reads a 64-bit system register
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline(always)]
|
||||
pub unsafe fn __rsr64<R>(reg: R) -> u64
|
||||
where
|
||||
R: super::sealed::Rsr64,
|
||||
{
|
||||
reg.__rsr64()
|
||||
}
|
||||
|
||||
/// Reads a system register containing an address
|
||||
#[inline(always)]
|
||||
pub unsafe fn __rsrp<R>(reg: R) -> *const u8
|
||||
where
|
||||
R: super::sealed::Rsrp,
|
||||
{
|
||||
reg.__rsrp()
|
||||
}
|
||||
|
||||
/// Writes a 32-bit system register
|
||||
#[inline(always)]
|
||||
pub unsafe fn __wsr<R>(reg: R, value: u32)
|
||||
where
|
||||
R: super::sealed::Wsr,
|
||||
{
|
||||
reg.__wsr(value)
|
||||
}
|
||||
|
||||
/// Writes a 64-bit system register
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline(always)]
|
||||
pub unsafe fn __wsr64<R>(reg: R, value: u64)
|
||||
where
|
||||
R: super::sealed::Wsr64,
|
||||
{
|
||||
reg.__wsr64(value)
|
||||
}
|
||||
|
||||
/// Writes a system register containing an address
|
||||
#[inline(always)]
|
||||
pub unsafe fn __wsrp<R>(reg: R, value: *const u8)
|
||||
where
|
||||
R: super::sealed::Wsrp,
|
||||
{
|
||||
reg.__wsrp(value)
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
/// CONTROL register
|
||||
pub struct CONTROL;
|
||||
|
||||
rsr!(CONTROL);
|
||||
wsr!(CONTROL);
|
||||
|
||||
/// Execution Program Status Register
|
||||
pub struct EPSR;
|
||||
|
||||
rsr!(EPSR);
|
||||
|
||||
/// Interrupt Program Status Register
|
||||
pub struct IPSR;
|
||||
|
||||
rsr!(IPSR);
|
||||
|
||||
/// Main Stack Pointer
|
||||
pub struct MSP;
|
||||
|
||||
rsrp!(MSP);
|
||||
wsrp!(MSP);
|
||||
|
||||
/// Priority Mask Register
|
||||
pub struct PRIMASK;
|
||||
|
||||
rsr!(PRIMASK);
|
||||
wsr!(PRIMASK);
|
||||
|
||||
/// Process Stack Pointer
|
||||
pub struct PSP;
|
||||
|
||||
rsrp!(PSP);
|
||||
wsrp!(PSP);
|
||||
|
||||
/// Program Status Register
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct xPSR;
|
||||
|
||||
rsr!(xPSR);
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
/// Base Priority Mask Register
|
||||
pub struct BASEPRI;
|
||||
|
||||
rsr!(BASEPRI);
|
||||
wsr!(BASEPRI);
|
||||
|
||||
/// Base Priority Mask Register (conditional write)
|
||||
#[allow(non_camel_case_types)]
|
||||
pub struct BASEPRI_MAX;
|
||||
|
||||
wsr!(BASEPRI_MAX);
|
||||
|
||||
/// Fault Mask Register
|
||||
pub struct FAULTMASK;
|
||||
|
||||
rsr!(FAULTMASK);
|
||||
wsr!(FAULTMASK);
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
//! # References:
|
||||
//!
|
||||
//! - Section 8.4 "Saturating intrinsics"
|
||||
//!
|
||||
//! Intrinsics that could live here:
|
||||
//!
|
||||
//! - __ssat
|
||||
//! - __usat
|
||||
|
|
@ -0,0 +1,728 @@
|
|||
//! # References
|
||||
//!
|
||||
//! - Section 8.5 "32-bit SIMD intrinsics" of ACLE
|
||||
//!
|
||||
//! Intrinsics that could live here
|
||||
//!
|
||||
//! - \[x\] __sel
|
||||
//! - \[ \] __ssat16
|
||||
//! - \[ \] __usat16
|
||||
//! - \[ \] __sxtab16
|
||||
//! - \[ \] __sxtb16
|
||||
//! - \[ \] __uxtab16
|
||||
//! - \[ \] __uxtb16
|
||||
//! - \[x\] __qadd8
|
||||
//! - \[x\] __qsub8
|
||||
//! - \[x\] __sadd8
|
||||
//! - \[x\] __shadd8
|
||||
//! - \[x\] __shsub8
|
||||
//! - \[x\] __ssub8
|
||||
//! - \[ \] __uadd8
|
||||
//! - \[ \] __uhadd8
|
||||
//! - \[ \] __uhsub8
|
||||
//! - \[ \] __uqadd8
|
||||
//! - \[ \] __uqsub8
|
||||
//! - \[x\] __usub8
|
||||
//! - \[x\] __usad8
|
||||
//! - \[x\] __usada8
|
||||
//! - \[x\] __qadd16
|
||||
//! - \[x\] __qasx
|
||||
//! - \[x\] __qsax
|
||||
//! - \[x\] __qsub16
|
||||
//! - \[x\] __sadd16
|
||||
//! - \[x\] __sasx
|
||||
//! - \[x\] __shadd16
|
||||
//! - \[ \] __shasx
|
||||
//! - \[ \] __shsax
|
||||
//! - \[x\] __shsub16
|
||||
//! - \[ \] __ssax
|
||||
//! - \[ \] __ssub16
|
||||
//! - \[ \] __uadd16
|
||||
//! - \[ \] __uasx
|
||||
//! - \[ \] __uhadd16
|
||||
//! - \[ \] __uhasx
|
||||
//! - \[ \] __uhsax
|
||||
//! - \[ \] __uhsub16
|
||||
//! - \[ \] __uqadd16
|
||||
//! - \[ \] __uqasx
|
||||
//! - \[x\] __uqsax
|
||||
//! - \[ \] __uqsub16
|
||||
//! - \[ \] __usax
|
||||
//! - \[ \] __usub16
|
||||
//! - \[x\] __smlad
|
||||
//! - \[ \] __smladx
|
||||
//! - \[ \] __smlald
|
||||
//! - \[ \] __smlaldx
|
||||
//! - \[x\] __smlsd
|
||||
//! - \[ \] __smlsdx
|
||||
//! - \[ \] __smlsld
|
||||
//! - \[ \] __smlsldx
|
||||
//! - \[x\] __smuad
|
||||
//! - \[x\] __smuadx
|
||||
//! - \[x\] __smusd
|
||||
//! - \[x\] __smusdx
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
use crate::{core_arch::acle::dsp::int16x2_t, mem::transmute};
|
||||
|
||||
types! {
|
||||
/// ARM-specific 32-bit wide vector of four packed `i8`.
|
||||
pub struct int8x4_t(i8, i8, i8, i8);
|
||||
/// ARM-specific 32-bit wide vector of four packed `u8`.
|
||||
pub struct uint8x4_t(u8, u8, u8, u8);
|
||||
}
|
||||
|
||||
macro_rules! dsp_call {
|
||||
($name:expr, $a:expr, $b:expr) => {
|
||||
transmute($name(transmute($a), transmute($b)))
|
||||
};
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "llvm.arm.qadd8"]
|
||||
fn arm_qadd8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qsub8"]
|
||||
fn arm_qsub8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qsub16"]
|
||||
fn arm_qsub16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qadd16"]
|
||||
fn arm_qadd16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qasx"]
|
||||
fn arm_qasx(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.qsax"]
|
||||
fn arm_qsax(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.sadd16"]
|
||||
fn arm_sadd16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.sadd8"]
|
||||
fn arm_sadd8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlad"]
|
||||
fn arm_smlad(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smlsd"]
|
||||
fn arm_smlsd(a: i32, b: i32, c: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.sasx"]
|
||||
fn arm_sasx(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.sel"]
|
||||
fn arm_sel(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.shadd8"]
|
||||
fn arm_shadd8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.shadd16"]
|
||||
fn arm_shadd16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.shsub8"]
|
||||
fn arm_shsub8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.ssub8"]
|
||||
fn arm_ssub8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.usub8"]
|
||||
fn arm_usub8(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.shsub16"]
|
||||
fn arm_shsub16(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smuad"]
|
||||
fn arm_smuad(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smuadx"]
|
||||
fn arm_smuadx(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smusd"]
|
||||
fn arm_smusd(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.smusdx"]
|
||||
fn arm_smusdx(a: i32, b: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.arm.usad8"]
|
||||
fn arm_usad8(a: i32, b: i32) -> u32;
|
||||
}
|
||||
|
||||
/// Saturating four 8-bit integer additions
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[0\]
|
||||
/// res\[1\] = a\[1\] + b\[1\]
|
||||
/// res\[2\] = a\[2\] + b\[2\]
|
||||
/// res\[3\] = a\[3\] + b\[3\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qadd8))]
|
||||
pub unsafe fn __qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_qadd8, a, b)
|
||||
}
|
||||
|
||||
/// Saturating two 8-bit integer subtraction
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - b\[0\]
|
||||
/// res\[1\] = a\[1\] - b\[1\]
|
||||
/// res\[2\] = a\[2\] - b\[2\]
|
||||
/// res\[3\] = a\[3\] - b\[3\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qsub8))]
|
||||
pub unsafe fn __qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_qsub8, a, b)
|
||||
}
|
||||
|
||||
/// Saturating two 16-bit integer subtraction
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - b\[0\]
|
||||
/// res\[1\] = a\[1\] - b\[1\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qsub16))]
|
||||
pub unsafe fn __qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_qsub16, a, b)
|
||||
}
|
||||
|
||||
/// Saturating two 16-bit integer additions
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[0\]
|
||||
/// res\[1\] = a\[1\] + b\[1\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qadd16))]
|
||||
pub unsafe fn __qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_qadd16, a, b)
|
||||
}
|
||||
|
||||
/// Returns the 16-bit signed saturated equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - b\[1\]
|
||||
/// res\[1\] = a\[1\] + b\[0\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qasx))]
|
||||
pub unsafe fn __qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_qasx, a, b)
|
||||
}
|
||||
|
||||
/// Returns the 16-bit signed saturated equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[1\]
|
||||
/// res\[1\] = a\[1\] - b\[0\]
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(qsax))]
|
||||
pub unsafe fn __qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_qsax, a, b)
|
||||
}
|
||||
|
||||
/// Returns the 16-bit signed saturated equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[1\]
|
||||
/// res\[1\] = a\[1\] + b\[0\]
|
||||
///
|
||||
/// and the GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sadd16))]
|
||||
pub unsafe fn __sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_sadd16, a, b)
|
||||
}
|
||||
|
||||
/// Returns the 8-bit signed saturated equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] + b\[1\]
|
||||
/// res\[1\] = a\[1\] + b\[0\]
|
||||
/// res\[2\] = a\[2\] + b\[2\]
|
||||
/// res\[3\] = a\[3\] + b\[3\]
|
||||
///
|
||||
/// and the GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sadd8))]
|
||||
pub unsafe fn __sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_sadd8, a, b)
|
||||
}
|
||||
|
||||
/// Dual 16-bit Signed Multiply with Addition of products
|
||||
/// and 32-bit accumulation.
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlad))]
|
||||
pub unsafe fn __smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlad(transmute(a), transmute(b), c)
|
||||
}
|
||||
|
||||
/// Dual 16-bit Signed Multiply with Subtraction of products
|
||||
/// and 32-bit accumulation and overflow detection.
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smlsd))]
|
||||
pub unsafe fn __smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
|
||||
arm_smlsd(transmute(a), transmute(b), c)
|
||||
}
|
||||
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - b\[1\]
|
||||
/// res\[1\] = a\[1\] + b\[0\]
|
||||
///
|
||||
/// and the GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sasx))]
|
||||
pub unsafe fn __sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_sasx, a, b)
|
||||
}
|
||||
|
||||
/// Select bytes from each operand according to APSR GE flags
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res\[0\] = GE\[0\] ? a\[0\] : b\[0\]
|
||||
/// res\[1\] = GE\[1\] ? a\[1\] : b\[1\]
|
||||
/// res\[2\] = GE\[2\] ? a\[2\] : b\[2\]
|
||||
/// res\[3\] = GE\[3\] ? a\[3\] : b\[3\]
|
||||
///
|
||||
/// where GE are bits of APSR
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(sel))]
|
||||
pub unsafe fn __sel(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_sel, a, b)
|
||||
}
|
||||
|
||||
/// Signed halving parallel byte-wise addition.
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = (a\[0\] + b\[0\]) / 2
|
||||
/// res\[1\] = (a\[1\] + b\[1\]) / 2
|
||||
/// res\[2\] = (a\[2\] + b\[2\]) / 2
|
||||
/// res\[3\] = (a\[3\] + b\[3\]) / 2
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(shadd8))]
|
||||
pub unsafe fn __shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_shadd8, a, b)
|
||||
}
|
||||
|
||||
/// Signed halving parallel halfword-wise addition.
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = (a\[0\] + b\[0\]) / 2
|
||||
/// res\[1\] = (a\[1\] + b\[1\]) / 2
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(shadd16))]
|
||||
pub unsafe fn __shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_shadd16, a, b)
|
||||
}
|
||||
|
||||
/// Signed halving parallel byte-wise subtraction.
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = (a\[0\] - b\[0\]) / 2
|
||||
/// res\[1\] = (a\[1\] - b\[1\]) / 2
|
||||
/// res\[2\] = (a\[2\] - b\[2\]) / 2
|
||||
/// res\[3\] = (a\[3\] - b\[3\]) / 2
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(shsub8))]
|
||||
pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_shsub8, a, b)
|
||||
}
|
||||
|
||||
/// Inserts a `USUB8` instruction.
|
||||
///
|
||||
/// Returns the 8-bit unsigned equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - a\[0\]
|
||||
/// res\[1\] = a\[1\] - a\[1\]
|
||||
/// res\[2\] = a\[2\] - a\[2\]
|
||||
/// res\[3\] = a\[3\] - a\[3\]
|
||||
///
|
||||
/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
|
||||
/// The GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(usub8))]
|
||||
pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t {
|
||||
dsp_call!(arm_usub8, a, b)
|
||||
}
|
||||
|
||||
/// Inserts a `SSUB8` instruction.
|
||||
///
|
||||
/// Returns the 8-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = a\[0\] - a\[0\]
|
||||
/// res\[1\] = a\[1\] - a\[1\]
|
||||
/// res\[2\] = a\[2\] - a\[2\]
|
||||
/// res\[3\] = a\[3\] - a\[3\]
|
||||
///
|
||||
/// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits.
|
||||
/// The GE bits of the APSR are set.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(ssub8))]
|
||||
pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
||||
dsp_call!(arm_ssub8, a, b)
|
||||
}
|
||||
|
||||
/// Signed halving parallel halfword-wise subtraction.
|
||||
///
|
||||
/// Returns the 16-bit signed equivalent of
|
||||
///
|
||||
/// res\[0\] = (a\[0\] - b\[0\]) / 2
|
||||
/// res\[1\] = (a\[1\] - b\[1\]) / 2
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(shsub16))]
|
||||
pub unsafe fn __shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
|
||||
dsp_call!(arm_shsub16, a, b)
|
||||
}
|
||||
|
||||
/// Signed Dual Multiply Add.
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\]
|
||||
///
|
||||
/// and sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smuad))]
|
||||
pub unsafe fn __smuad(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smuad(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Signed Dual Multiply Add Reversed.
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res = a\[0\] * b\[1\] + a\[1\] * b\[0\]
|
||||
///
|
||||
/// and sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smuadx))]
|
||||
pub unsafe fn __smuadx(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smuadx(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Signed Dual Multiply Subtract.
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\]
|
||||
///
|
||||
/// and sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smusd))]
|
||||
pub unsafe fn __smusd(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smusd(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Signed Dual Multiply Subtract Reversed.
|
||||
///
|
||||
/// Returns the equivalent of
|
||||
///
|
||||
/// res = a\[0\] * b\[1\] - a\[1\] * b\[0\]
|
||||
///
|
||||
/// and sets the Q flag if overflow occurs on the addition.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(smusdx))]
|
||||
pub unsafe fn __smusdx(a: int16x2_t, b: int16x2_t) -> i32 {
|
||||
arm_smusdx(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Sum of 8-bit absolute differences.
|
||||
///
|
||||
/// Returns the 8-bit unsigned equivalent of
|
||||
///
|
||||
/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
|
||||
/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\])
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(usad8))]
|
||||
pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 {
|
||||
arm_usad8(transmute(a), transmute(b))
|
||||
}
|
||||
|
||||
/// Sum of 8-bit absolute differences and constant.
|
||||
///
|
||||
/// Returns the 8-bit unsigned equivalent of
|
||||
///
|
||||
/// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\
|
||||
/// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(usad8))]
|
||||
pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
|
||||
__usad8(a, b) + c
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::simd::{i16x2, i8x4, u8x4};
|
||||
use std::mem::transmute;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[test]
|
||||
fn qadd8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, i8::MAX);
|
||||
let b = i8x4::new(2, -1, 0, 1);
|
||||
let c = i8x4::new(3, 1, 3, i8::MAX);
|
||||
let r: i8x4 = dsp_call!(super::__qadd8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qsub8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, i8::MIN);
|
||||
let b = i8x4::new(2, -1, 0, 1);
|
||||
let c = i8x4::new(-1, 3, 3, i8::MIN);
|
||||
let r: i8x4 = dsp_call!(super::__qsub8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qadd16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(2, -1);
|
||||
let c = i16x2::new(3, 1);
|
||||
let r: i16x2 = dsp_call!(super::__qadd16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qsub16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(10, 20);
|
||||
let b = i16x2::new(20, -10);
|
||||
let c = i16x2::new(-10, 30);
|
||||
let r: i16x2 = dsp_call!(super::__qsub16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qasx() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, i16::MAX);
|
||||
let b = i16x2::new(2, 2);
|
||||
let c = i16x2::new(-1, i16::MAX);
|
||||
let r: i16x2 = dsp_call!(super::__qasx, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qsax() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, i16::MAX);
|
||||
let b = i16x2::new(2, 2);
|
||||
let c = i16x2::new(3, i16::MAX - 2);
|
||||
let r: i16x2 = dsp_call!(super::__qsax, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sadd16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, i16::MAX);
|
||||
let b = i16x2::new(2, 2);
|
||||
let c = i16x2::new(3, -i16::MAX);
|
||||
let r: i16x2 = dsp_call!(super::__sadd16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sadd8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, i8::MAX);
|
||||
let b = i8x4::new(4, 3, 2, 2);
|
||||
let c = i8x4::new(5, 5, 5, -i8::MAX);
|
||||
let r: i8x4 = dsp_call!(super::__sadd8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sasx() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(2, 1);
|
||||
let c = i16x2::new(0, 4);
|
||||
let r: i16x2 = dsp_call!(super::__sasx, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smlad() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(3, 4);
|
||||
let r = super::__smlad(transmute(a), transmute(b), 10);
|
||||
assert_eq!(r, (1 * 3) + (2 * 4) + 10);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smlsd() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(3, 4);
|
||||
let r = super::__smlsd(transmute(a), transmute(b), 10);
|
||||
assert_eq!(r, ((1 * 3) - (2 * 4)) + 10);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sel() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, i8::MAX);
|
||||
let b = i8x4::new(4, 3, 2, 2);
|
||||
// call sadd8() to set GE bits
|
||||
super::__sadd8(transmute(a), transmute(b));
|
||||
let c = i8x4::new(1, 2, 3, i8::MAX);
|
||||
let r: i8x4 = dsp_call!(super::__sel, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shadd8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(5, 4, 3, 2);
|
||||
let c = i8x4::new(3, 3, 3, 3);
|
||||
let r: i8x4 = dsp_call!(super::__shadd8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shadd16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let c = i16x2::new(3, 3);
|
||||
let r: i16x2 = dsp_call!(super::__shadd16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shsub8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(5, 4, 3, 2);
|
||||
let c = i8x4::new(-2, -1, 0, 1);
|
||||
let r: i8x4 = dsp_call!(super::__shsub8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ssub8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(5, 4, 3, 2);
|
||||
let c = i8x4::new(-4, -2, 0, 2);
|
||||
let r: i8x4 = dsp_call!(super::__ssub8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usub8() {
|
||||
unsafe {
|
||||
let a = u8x4::new(1, 2, 3, 4);
|
||||
let b = u8x4::new(5, 4, 3, 2);
|
||||
let c = u8x4::new(252, 254, 0, 2);
|
||||
let r: u8x4 = dsp_call!(super::__usub8, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shsub16() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let c = i16x2::new(-2, -1);
|
||||
let r: i16x2 = dsp_call!(super::__shsub16, a, b);
|
||||
assert_eq!(r, c);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smuad() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let r = super::__smuad(transmute(a), transmute(b));
|
||||
assert_eq!(r, 13);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smuadx() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let r = super::__smuadx(transmute(a), transmute(b));
|
||||
assert_eq!(r, 14);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smusd() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let r = super::__smusd(transmute(a), transmute(b));
|
||||
assert_eq!(r, -3);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smusdx() {
|
||||
unsafe {
|
||||
let a = i16x2::new(1, 2);
|
||||
let b = i16x2::new(5, 4);
|
||||
let r = super::__smusdx(transmute(a), transmute(b));
|
||||
assert_eq!(r, -6);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usad8() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(4, 3, 2, 1);
|
||||
let r = super::__usad8(transmute(a), transmute(b));
|
||||
assert_eq!(r, 8);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usad8a() {
|
||||
unsafe {
|
||||
let a = i8x4::new(1, 2, 3, 4);
|
||||
let b = i8x4::new(4, 3, 2, 1);
|
||||
let c = 10;
|
||||
let r = super::__usada8(transmute(a), transmute(b), c);
|
||||
assert_eq!(r, 8 + c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
//! ARM compiler specific intrinsics
|
||||
//!
|
||||
//! # References
|
||||
//!
|
||||
//! - [ARM Compiler v 6.10 - armclang Reference Guide][arm_comp_ref]
|
||||
//!
|
||||
//! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Inserts a breakpoint instruction.
|
||||
///
|
||||
/// `val` is a compile-time constant integer in range `[0, 255]`.
|
||||
///
|
||||
/// The breakpoint instruction inserted is:
|
||||
///
|
||||
/// * `BKPT` when compiling as T32,
|
||||
/// * `BRK` when compiling as A32 or A64.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// If `val` is out-of-range the behavior is **undefined**.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// [ARM's documentation][arm_docs] defines that `__breakpoint` accepts the
|
||||
/// following values for `val`:
|
||||
///
|
||||
/// - `0...65535` when compiling as A32 or A64,
|
||||
/// - `0...255` when compiling as T32.
|
||||
///
|
||||
/// The current implementation only accepts values in range `[0, 255]` - if the
|
||||
/// value is out-of-range the behavior is **undefined**.
|
||||
///
|
||||
/// [arm_docs]: https://developer.arm.com/docs/100067/latest/compiler-specific-intrinsics/__breakpoint-intrinsic
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(bkpt, val = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(brk, val = 0))]
|
||||
#[inline(always)]
|
||||
#[rustc_args_required_const(0)]
|
||||
pub unsafe fn __breakpoint(val: i32) {
|
||||
// Ensure that this compiles correctly on non-arm architectures, so libstd
|
||||
// doc builds work. The proper macro will shadow this definition below.
|
||||
#[allow(unused_macros)]
|
||||
macro_rules! call {
|
||||
($e:expr) => {
|
||||
()
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
llvm_asm!(concat!("BKPT ", stringify!($imm8)) : : : : "volatile")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
llvm_asm!(concat!("BRK ", stringify!($imm8)) : : : : "volatile")
|
||||
}
|
||||
}
|
||||
|
||||
// We can't `panic!` inside this intrinsic, so we can't really validate the
|
||||
// arguments here. If `val` is out-of-range this macro uses `val == 255`:
|
||||
constify_imm8!(val, call);
|
||||
}
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32b")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32b")]
|
||||
fn crc32b_(crc: u32, data: u32) -> u32;
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32h")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32h")]
|
||||
fn crc32h_(crc: u32, data: u32) -> u32;
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32w")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")]
|
||||
fn crc32w_(crc: u32, data: u32) -> u32;
|
||||
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cb")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cb")]
|
||||
fn crc32cb_(crc: u32, data: u32) -> u32;
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32ch")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32ch")]
|
||||
fn crc32ch_(crc: u32, data: u32) -> u32;
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32cw")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")]
|
||||
fn crc32cw_(crc: u32, data: u32) -> u32;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// CRC32 single round checksum for bytes (8 bits).
|
||||
#[inline]
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(test, assert_instr(crc32b))]
|
||||
pub unsafe fn __crc32b(crc: u32, data: u8) -> u32 {
|
||||
crc32b_(crc, data as u32)
|
||||
}
|
||||
|
||||
/// CRC32 single round checksum for half words (16 bits).
|
||||
#[inline]
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(test, assert_instr(crc32h))]
|
||||
pub unsafe fn __crc32h(crc: u32, data: u16) -> u32 {
|
||||
crc32h_(crc, data as u32)
|
||||
}
|
||||
|
||||
/// CRC32 single round checksum for words (32 bits).
|
||||
#[inline]
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(test, assert_instr(crc32w))]
|
||||
pub unsafe fn __crc32w(crc: u32, data: u32) -> u32 {
|
||||
crc32w_(crc, data)
|
||||
}
|
||||
|
||||
/// CRC32-C single round checksum for bytes (8 bits).
|
||||
#[inline]
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(test, assert_instr(crc32cb))]
|
||||
pub unsafe fn __crc32cb(crc: u32, data: u8) -> u32 {
|
||||
crc32cb_(crc, data as u32)
|
||||
}
|
||||
|
||||
/// CRC32-C single round checksum for half words (16 bits).
|
||||
#[inline]
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(test, assert_instr(crc32ch))]
|
||||
pub unsafe fn __crc32ch(crc: u32, data: u16) -> u32 {
|
||||
crc32ch_(crc, data as u32)
|
||||
}
|
||||
|
||||
/// CRC32-C single round checksum for words (32 bits).
|
||||
#[inline]
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
#[cfg_attr(test, assert_instr(crc32cw))]
|
||||
pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 {
|
||||
crc32cw_(crc, data)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::{arm::*, simd::*};
|
||||
use std::mem;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "crc")]
|
||||
unsafe fn test_crc32b() {
|
||||
assert_eq!(__crc32b(0, 0), 0);
|
||||
assert_eq!(__crc32b(0, 255), 755167117);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crc")]
|
||||
unsafe fn test_crc32h() {
|
||||
assert_eq!(__crc32h(0, 0), 0);
|
||||
assert_eq!(__crc32h(0, 16384), 1994146192);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crc")]
|
||||
unsafe fn test_crc32w() {
|
||||
assert_eq!(__crc32w(0, 0), 0);
|
||||
assert_eq!(__crc32w(0, 4294967295), 3736805603);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crc")]
|
||||
unsafe fn test_crc32cb() {
|
||||
assert_eq!(__crc32cb(0, 0), 0);
|
||||
assert_eq!(__crc32cb(0, 255), 2910671697);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crc")]
|
||||
unsafe fn test_crc32ch() {
|
||||
assert_eq!(__crc32ch(0, 0), 0);
|
||||
assert_eq!(__crc32ch(0, 16384), 1098587580);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "crc")]
|
||||
unsafe fn test_crc32cw() {
|
||||
assert_eq!(__crc32cw(0, 0), 0);
|
||||
assert_eq!(__crc32cw(0, 4294967295), 3080238136);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
//! ARM intrinsics.
|
||||
//!
|
||||
//! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The
|
||||
//! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful.
|
||||
//!
|
||||
//! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
|
||||
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
mod armclang;
|
||||
|
||||
pub use self::armclang::*;
|
||||
|
||||
mod v6;
|
||||
pub use self::v6::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
|
||||
mod v7;
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
|
||||
pub use self::v7::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
|
||||
mod neon;
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))]
|
||||
pub use self::neon::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
|
||||
mod crc;
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
|
||||
pub use self::crc::*;
|
||||
|
||||
pub use crate::core_arch::acle::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Generates the trap instruction `UDF`
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(test, assert_instr(udf))]
|
||||
#[inline]
|
||||
pub unsafe fn udf() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
|
||||
pub(crate) mod test_support;
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,830 @@
|
|||
use crate::core_arch::{arm::*, simd::*};
|
||||
use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec};
|
||||
|
||||
macro_rules! V_u8 {
|
||||
() => {
|
||||
vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8]
|
||||
};
|
||||
}
|
||||
macro_rules! V_u16 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16,
|
||||
]
|
||||
};
|
||||
}
|
||||
macro_rules! V_u32 {
|
||||
() => {
|
||||
vec![
|
||||
0x00000000u32,
|
||||
0x01010101u32,
|
||||
0x02020202u32,
|
||||
0x0F0F0F0Fu32,
|
||||
0x80000000u32,
|
||||
0xF0F0F0F0u32,
|
||||
0xFFFFFFFFu32,
|
||||
]
|
||||
};
|
||||
}
|
||||
macro_rules! V_u64 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000000000000000u64,
|
||||
0x0101010101010101u64,
|
||||
0x0202020202020202u64,
|
||||
0x0F0F0F0F0F0F0F0Fu64,
|
||||
0x8080808080808080u64,
|
||||
0xF0F0F0F0F0F0F0F0u64,
|
||||
0xFFFFFFFFFFFFFFFFu64,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! V_i8 {
|
||||
() => {
|
||||
vec![
|
||||
0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */
|
||||
-16i8, /* 0xF0 */
|
||||
-1i8, /* 0xFF */
|
||||
]
|
||||
};
|
||||
}
|
||||
macro_rules! V_i16 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */
|
||||
-3856i16, /* 0xF0F0 */
|
||||
-1i16, /* 0xFFF */
|
||||
]
|
||||
};
|
||||
}
|
||||
macro_rules! V_i32 {
|
||||
() => {
|
||||
vec![
|
||||
0x00000000i32,
|
||||
0x01010101i32,
|
||||
0x02020202i32,
|
||||
0x0F0F0F0Fi32,
|
||||
-2139062144i32, /* 0x80000000 */
|
||||
-252645136i32, /* 0xF0F0F0F0 */
|
||||
-1i32, /* 0xFFFFFFFF */
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! V_i64 {
|
||||
() => {
|
||||
vec![
|
||||
0x0000000000000000i64,
|
||||
0x0101010101010101i64,
|
||||
0x0202020202020202i64,
|
||||
0x0F0F0F0F0F0F0F0Fi64,
|
||||
-9223372036854775808i64, /* 0x8000000000000000 */
|
||||
-1152921504606846976i64, /* 0xF000000000000000 */
|
||||
-1i64, /* 0xFFFFFFFFFFFFFFFF */
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! V_f32 {
|
||||
() => {
|
||||
vec![
|
||||
0.0f32,
|
||||
1.0f32,
|
||||
-1.0f32,
|
||||
1.2f32,
|
||||
2.4f32,
|
||||
std::f32::MAX,
|
||||
std::f32::MIN,
|
||||
std::f32::INFINITY,
|
||||
std::f32::NEG_INFINITY,
|
||||
std::f32::NAN,
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to64 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u64 { transmute(v) }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! to128 {
|
||||
($t : ident) => {
|
||||
|v: $t| -> u128 { transmute(v) }
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) fn test<T, U, V, W, X>(
|
||||
vals: Vec<T>,
|
||||
fill1: fn(T) -> V,
|
||||
fill2: fn(U) -> W,
|
||||
cast: fn(W) -> X,
|
||||
test_fun: fn(V, V) -> W,
|
||||
verify_fun: fn(T, T) -> U,
|
||||
) where
|
||||
T: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
V: Copy + core::fmt::Debug,
|
||||
W: Copy + core::fmt::Debug,
|
||||
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
|
||||
{
|
||||
let pairs = vals.iter().zip(vals.iter());
|
||||
|
||||
for (i, j) in pairs {
|
||||
let a: V = fill1(*i);
|
||||
let b: V = fill1(*j);
|
||||
|
||||
let actual_pre: W = test_fun(a, b);
|
||||
let expected_pre: W = fill2(verify_fun(*i, *j));
|
||||
|
||||
let actual: X = cast(actual_pre);
|
||||
let expected: X = cast(expected_pre);
|
||||
|
||||
assert_eq!(
|
||||
actual, expected,
|
||||
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
|
||||
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! gen_test_fn {
|
||||
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
|
||||
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
|
||||
unsafe {
|
||||
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! gen_fill_fn {
|
||||
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
|
||||
pub(crate) fn $id(val: $in_t) -> $out_t {
|
||||
let initial: [$in_t; $num_els] = [val; $num_els];
|
||||
let result: $cmp_t = unsafe { transmute(initial) };
|
||||
let result_out: $out_t = unsafe { transmute(result) };
|
||||
|
||||
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
|
||||
|
||||
result_out
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64);
|
||||
gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64);
|
||||
gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128);
|
||||
gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128);
|
||||
|
||||
gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64);
|
||||
gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64);
|
||||
gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128);
|
||||
gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128);
|
||||
|
||||
gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64);
|
||||
gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64);
|
||||
gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128);
|
||||
gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128);
|
||||
|
||||
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
|
||||
gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64);
|
||||
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
|
||||
gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128);
|
||||
|
||||
gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64);
|
||||
gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x8_t,
|
||||
uint8x8_t,
|
||||
u64,
|
||||
V_u8!(),
|
||||
fill_u8,
|
||||
fill_u8,
|
||||
to64!(uint8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x8_t,
|
||||
uint8x8_t,
|
||||
u64,
|
||||
V_u8!(),
|
||||
fill_u8,
|
||||
fill_u8,
|
||||
to64!(uint8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x8_t,
|
||||
uint8x8_t,
|
||||
u64,
|
||||
V_u8!(),
|
||||
fill_u8,
|
||||
fill_u8,
|
||||
to64!(uint8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x16_t,
|
||||
uint8x16_t,
|
||||
u128,
|
||||
V_u8!(),
|
||||
fillq_u8,
|
||||
fillq_u8,
|
||||
to128!(uint8x16_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x16_t,
|
||||
uint8x16_t,
|
||||
u128,
|
||||
V_u8!(),
|
||||
fillq_u8,
|
||||
fillq_u8,
|
||||
to128!(uint8x16_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_u8,
|
||||
u8,
|
||||
u8,
|
||||
uint8x16_t,
|
||||
uint8x16_t,
|
||||
u128,
|
||||
V_u8!(),
|
||||
fillq_u8,
|
||||
fillq_u8,
|
||||
to128!(uint8x16_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_s8,
|
||||
i8,
|
||||
i8,
|
||||
int8x8_t,
|
||||
int8x8_t,
|
||||
u64,
|
||||
V_i8!(),
|
||||
fill_s8,
|
||||
fill_s8,
|
||||
to64!(int8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_s8,
|
||||
i8,
|
||||
i8,
|
||||
int8x8_t,
|
||||
int8x8_t,
|
||||
u64,
|
||||
V_i8!(),
|
||||
fill_s8,
|
||||
fill_s8,
|
||||
to64!(int8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_s8,
|
||||
i8,
|
||||
u8,
|
||||
int8x8_t,
|
||||
uint8x8_t,
|
||||
u64,
|
||||
V_i8!(),
|
||||
fill_s8,
|
||||
fill_u8,
|
||||
to64!(uint8x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_s8,
|
||||
i8,
|
||||
i8,
|
||||
int8x16_t,
|
||||
int8x16_t,
|
||||
u128,
|
||||
V_i8!(),
|
||||
fillq_s8,
|
||||
fillq_s8,
|
||||
to128!(int8x16_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_s8,
|
||||
i8,
|
||||
i8,
|
||||
int8x16_t,
|
||||
int8x16_t,
|
||||
u128,
|
||||
V_i8!(),
|
||||
fillq_s8,
|
||||
fillq_s8,
|
||||
to128!(int8x16_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_s8,
|
||||
i8,
|
||||
u8,
|
||||
int8x16_t,
|
||||
uint8x16_t,
|
||||
u128,
|
||||
V_i8!(),
|
||||
fillq_s8,
|
||||
fillq_u8,
|
||||
to128!(uint8x16_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x4_t,
|
||||
uint16x4_t,
|
||||
u64,
|
||||
V_u16!(),
|
||||
fill_u16,
|
||||
fill_u16,
|
||||
to64!(uint16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x4_t,
|
||||
uint16x4_t,
|
||||
u64,
|
||||
V_u16!(),
|
||||
fill_u16,
|
||||
fill_u16,
|
||||
to64!(uint16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x4_t,
|
||||
uint16x4_t,
|
||||
u64,
|
||||
V_u16!(),
|
||||
fill_u16,
|
||||
fill_u16,
|
||||
to64!(uint16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x8_t,
|
||||
uint16x8_t,
|
||||
u128,
|
||||
V_u16!(),
|
||||
fillq_u16,
|
||||
fillq_u16,
|
||||
to128!(uint16x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x8_t,
|
||||
uint16x8_t,
|
||||
u128,
|
||||
V_u16!(),
|
||||
fillq_u16,
|
||||
fillq_u16,
|
||||
to128!(uint16x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_u16,
|
||||
u16,
|
||||
u16,
|
||||
uint16x8_t,
|
||||
uint16x8_t,
|
||||
u128,
|
||||
V_u16!(),
|
||||
fillq_u16,
|
||||
fillq_u16,
|
||||
to128!(uint16x8_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_s16,
|
||||
i16,
|
||||
i16,
|
||||
int16x4_t,
|
||||
int16x4_t,
|
||||
u64,
|
||||
V_i16!(),
|
||||
fill_s16,
|
||||
fill_s16,
|
||||
to64!(int16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_s16,
|
||||
i16,
|
||||
i16,
|
||||
int16x4_t,
|
||||
int16x4_t,
|
||||
u64,
|
||||
V_i16!(),
|
||||
fill_s16,
|
||||
fill_s16,
|
||||
to64!(int16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_s16,
|
||||
i16,
|
||||
u16,
|
||||
int16x4_t,
|
||||
uint16x4_t,
|
||||
u64,
|
||||
V_i16!(),
|
||||
fill_s16,
|
||||
fill_u16,
|
||||
to64!(uint16x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_s16,
|
||||
i16,
|
||||
i16,
|
||||
int16x8_t,
|
||||
int16x8_t,
|
||||
u128,
|
||||
V_i16!(),
|
||||
fillq_s16,
|
||||
fillq_s16,
|
||||
to128!(int16x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_s16,
|
||||
i16,
|
||||
i16,
|
||||
int16x8_t,
|
||||
int16x8_t,
|
||||
u128,
|
||||
V_i16!(),
|
||||
fillq_s16,
|
||||
fillq_s16,
|
||||
to128!(int16x8_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_s16,
|
||||
i16,
|
||||
u16,
|
||||
int16x8_t,
|
||||
uint16x8_t,
|
||||
u128,
|
||||
V_i16!(),
|
||||
fillq_s16,
|
||||
fillq_u16,
|
||||
to128!(uint16x8_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_u32!(),
|
||||
fill_u32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_u32!(),
|
||||
fill_u32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_u32!(),
|
||||
fill_u32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_u32!(),
|
||||
fillq_u32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_u32!(),
|
||||
fillq_u32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_u32,
|
||||
u32,
|
||||
u32,
|
||||
uint32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_u32!(),
|
||||
fillq_u32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_s32,
|
||||
i32,
|
||||
i32,
|
||||
int32x2_t,
|
||||
int32x2_t,
|
||||
u64,
|
||||
V_i32!(),
|
||||
fill_s32,
|
||||
fill_s32,
|
||||
to64!(int32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_s32,
|
||||
i32,
|
||||
i32,
|
||||
int32x2_t,
|
||||
int32x2_t,
|
||||
u64,
|
||||
V_i32!(),
|
||||
fill_s32,
|
||||
fill_s32,
|
||||
to64!(int32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_s32,
|
||||
i32,
|
||||
u32,
|
||||
int32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_i32!(),
|
||||
fill_s32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_s32,
|
||||
i32,
|
||||
i32,
|
||||
int32x4_t,
|
||||
int32x4_t,
|
||||
u128,
|
||||
V_i32!(),
|
||||
fillq_s32,
|
||||
fillq_s32,
|
||||
to128!(int32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_s32,
|
||||
i32,
|
||||
i32,
|
||||
int32x4_t,
|
||||
int32x4_t,
|
||||
u128,
|
||||
V_i32!(),
|
||||
fillq_s32,
|
||||
fillq_s32,
|
||||
to128!(int32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_s32,
|
||||
i32,
|
||||
u32,
|
||||
int32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_i32!(),
|
||||
fillq_s32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_u64!(),
|
||||
fill_u64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_u64!(),
|
||||
fill_u64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_u64!(),
|
||||
fill_u64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_u64!(),
|
||||
fillq_u64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_u64!(),
|
||||
fillq_u64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_u64,
|
||||
u64,
|
||||
u64,
|
||||
uint64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_u64!(),
|
||||
fillq_u64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_s64,
|
||||
i64,
|
||||
i64,
|
||||
int64x1_t,
|
||||
int64x1_t,
|
||||
u64,
|
||||
V_i64!(),
|
||||
fill_s64,
|
||||
fill_s64,
|
||||
to64!(int64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_bit_s64,
|
||||
i64,
|
||||
i64,
|
||||
int64x1_t,
|
||||
int64x1_t,
|
||||
u64,
|
||||
V_i64!(),
|
||||
fill_s64,
|
||||
fill_s64,
|
||||
to64!(int64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_s64,
|
||||
i64,
|
||||
u64,
|
||||
int64x1_t,
|
||||
uint64x1_t,
|
||||
u64,
|
||||
V_i64!(),
|
||||
fill_s64,
|
||||
fill_u64,
|
||||
to64!(uint64x1_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_s64,
|
||||
i64,
|
||||
i64,
|
||||
int64x2_t,
|
||||
int64x2_t,
|
||||
u128,
|
||||
V_i64!(),
|
||||
fillq_s64,
|
||||
fillq_s64,
|
||||
to128!(int64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_bit_s64,
|
||||
i64,
|
||||
i64,
|
||||
int64x2_t,
|
||||
int64x2_t,
|
||||
u128,
|
||||
V_i64!(),
|
||||
fillq_s64,
|
||||
fillq_s64,
|
||||
to128!(int64x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_s64,
|
||||
i64,
|
||||
u64,
|
||||
int64x2_t,
|
||||
uint64x2_t,
|
||||
u128,
|
||||
V_i64!(),
|
||||
fillq_s64,
|
||||
fillq_u64,
|
||||
to128!(uint64x2_t)
|
||||
);
|
||||
|
||||
gen_test_fn!(
|
||||
test_ari_f32,
|
||||
f32,
|
||||
f32,
|
||||
float32x2_t,
|
||||
float32x2_t,
|
||||
u64,
|
||||
V_f32!(),
|
||||
fill_f32,
|
||||
fill_f32,
|
||||
to64!(float32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
test_cmp_f32,
|
||||
f32,
|
||||
u32,
|
||||
float32x2_t,
|
||||
uint32x2_t,
|
||||
u64,
|
||||
V_f32!(),
|
||||
fill_f32,
|
||||
fill_u32,
|
||||
to64!(uint32x2_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_ari_f32,
|
||||
f32,
|
||||
f32,
|
||||
float32x4_t,
|
||||
float32x4_t,
|
||||
u128,
|
||||
V_f32!(),
|
||||
fillq_f32,
|
||||
fillq_f32,
|
||||
to128!(float32x4_t)
|
||||
);
|
||||
gen_test_fn!(
|
||||
testq_cmp_f32,
|
||||
f32,
|
||||
u32,
|
||||
float32x4_t,
|
||||
uint32x4_t,
|
||||
u128,
|
||||
V_f32!(),
|
||||
fillq_f32,
|
||||
fillq_u32,
|
||||
to128!(uint32x4_t)
|
||||
);
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
//! ARMv6 intrinsics.
|
||||
//!
|
||||
//! The reference is [ARMv6-M Architecture Reference Manual][armv6m].
|
||||
//!
|
||||
//! [armv6m]:
|
||||
//! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.
|
||||
//! html
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Reverse the order of the bytes.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rev))]
|
||||
pub unsafe fn _rev_u16(x: u16) -> u16 {
|
||||
x.swap_bytes() as u16
|
||||
}
|
||||
|
||||
/// Reverse the order of the bytes.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rev))]
|
||||
pub unsafe fn _rev_u32(x: u32) -> u32 {
|
||||
x.swap_bytes() as u32
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::arm::v6;
|
||||
|
||||
#[test]
|
||||
fn _rev_u16() {
|
||||
unsafe {
|
||||
assert_eq!(
|
||||
v6::_rev_u16(0b0000_0000_1111_1111_u16),
|
||||
0b1111_1111_0000_0000_u16
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _rev_u32() {
|
||||
unsafe {
|
||||
assert_eq!(
|
||||
v6::_rev_u32(0b0000_0000_1111_1111_0000_0000_1111_1111_u32),
|
||||
0b1111_1111_0000_0000_1111_1111_0000_0000_u32
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
//! ARMv7 intrinsics.
|
||||
//!
|
||||
//! The reference is [ARMv7-M Architecture Reference Manual (Issue
|
||||
//! E.b)][armv7m].
|
||||
//!
|
||||
//! [armv7m]:
|
||||
//! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.
|
||||
//! b/index.html
|
||||
|
||||
pub use super::v6::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Count Leading Zeros.
|
||||
#[inline]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
// FIXME: https://github.com/rust-lang/stdarch/issues/382
|
||||
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
|
||||
pub unsafe fn _clz_u8(x: u8) -> u8 {
|
||||
x.leading_zeros() as u8
|
||||
}
|
||||
|
||||
/// Count Leading Zeros.
|
||||
#[inline]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
// FIXME: https://github.com/rust-lang/stdarch/issues/382
|
||||
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
|
||||
pub unsafe fn _clz_u16(x: u16) -> u16 {
|
||||
x.leading_zeros() as u16
|
||||
}
|
||||
|
||||
/// Count Leading Zeros.
|
||||
#[inline]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
// FIXME: https://github.com/rust-lang/stdarch/issues/382
|
||||
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
|
||||
pub unsafe fn _clz_u32(x: u32) -> u32 {
|
||||
x.leading_zeros() as u32
|
||||
}
|
||||
|
||||
/// Reverse the bit order.
|
||||
#[inline]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn _rbit_u32(x: u32) -> u32 {
|
||||
crate::intrinsics::bitreverse(x)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::arm::v7;
|
||||
|
||||
#[test]
|
||||
fn _clz_u8() {
|
||||
unsafe {
|
||||
assert_eq!(v7::_clz_u8(0b0000_1010u8), 4u8);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _clz_u16() {
|
||||
unsafe {
|
||||
assert_eq!(v7::_clz_u16(0b0000_1010u16), 12u16);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _clz_u32() {
|
||||
unsafe {
|
||||
assert_eq!(v7::_clz_u32(0b0000_1010u32), 28u32);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(dont_compile_me)] // FIXME need to add `v7` upstream in rustc
|
||||
fn _rbit_u32() {
|
||||
unsafe {
|
||||
assert_eq!(
|
||||
v7::_rbit_u32(0b0000_1010u32),
|
||||
0b0101_0000_0000_0000_0000_0000_0000_0000u32
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,340 @@
|
|||
SIMD and vendor intrinsics module.
|
||||
|
||||
This module is intended to be the gateway to architecture-specific
|
||||
intrinsic functions, typically related to SIMD (but not always!). Each
|
||||
architecture that Rust compiles to may contain a submodule here, which
|
||||
means that this is not a portable module! If you're writing a portable
|
||||
library take care when using these APIs!
|
||||
|
||||
Under this module you'll find an architecture-named module, such as
|
||||
`x86_64`. Each `#[cfg(target_arch)]` that Rust can compile to may have a
|
||||
module entry here, only present on that particular target. For example the
|
||||
`i686-pc-windows-msvc` target will have an `x86` module here, whereas
|
||||
`x86_64-pc-windows-msvc` has `x86_64`.
|
||||
|
||||
[rfc]: https://github.com/rust-lang/rfcs/pull/2325
|
||||
[tracked]: https://github.com/rust-lang/rust/issues/48556
|
||||
|
||||
# Overview
|
||||
|
||||
This module exposes vendor-specific intrinsics that typically correspond to
|
||||
a single machine instruction. These intrinsics are not portable: their
|
||||
availability is architecture-dependent, and not all machines of that
|
||||
architecture might provide the intrinsic.
|
||||
|
||||
The `arch` module is intended to be a low-level implementation detail for
|
||||
higher-level APIs. Using it correctly can be quite tricky as you need to
|
||||
ensure at least a few guarantees are upheld:
|
||||
|
||||
* The correct architecture's module is used. For example the `arm` module
|
||||
isn't available on the `x86_64-unknown-linux-gnu` target. This is
|
||||
typically done by ensuring that `#[cfg]` is used appropriately when using
|
||||
this module.
|
||||
* The CPU the program is currently running on supports the function being
|
||||
called. For example it is unsafe to call an AVX2 function on a CPU that
|
||||
doesn't actually support AVX2.
|
||||
|
||||
As a result of the latter of these guarantees all intrinsics in this module
|
||||
are `unsafe` and extra care needs to be taken when calling them!
|
||||
|
||||
# CPU Feature Detection
|
||||
|
||||
In order to call these APIs in a safe fashion there's a number of
|
||||
mechanisms available to ensure that the correct CPU feature is available
|
||||
to call an intrinsic. Let's consider, for example, the `_mm256_add_epi64`
|
||||
intrinsics on the `x86` and `x86_64` architectures. This function requires
|
||||
the AVX2 feature as [documented by Intel][intel-dox] so to correctly call
|
||||
this function we need to (a) guarantee we only call it on `x86`/`x86_64`
|
||||
and (b) ensure that the CPU feature is available
|
||||
|
||||
[intel-dox]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64&expand=100
|
||||
|
||||
## Static CPU Feature Detection
|
||||
|
||||
The first option available to us is to conditionally compile code via the
|
||||
`#[cfg]` attribute. CPU features correspond to the `target_feature` cfg
|
||||
available, and can be used like so:
|
||||
|
||||
```ignore
|
||||
#[cfg(
|
||||
all(
|
||||
any(target_arch = "x86", target_arch = "x86_64"),
|
||||
target_feature = "avx2"
|
||||
)
|
||||
)]
|
||||
fn foo() {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::_mm256_add_epi64;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::_mm256_add_epi64;
|
||||
|
||||
unsafe {
|
||||
_mm256_add_epi64(...);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Here we're using `#[cfg(target_feature = "avx2")]` to conditionally compile
|
||||
this function into our module. This means that if the `avx2` feature is
|
||||
*enabled statically* then we'll use the `_mm256_add_epi64` function at
|
||||
runtime. The `unsafe` block here can be justified through the usage of
|
||||
`#[cfg]` to only compile the code in situations where the safety guarantees
|
||||
are upheld.
|
||||
|
||||
Statically enabling a feature is typically done with the `-C
|
||||
target-feature` or `-C target-cpu` flags to the compiler. For example if
|
||||
your local CPU supports AVX2 then you can compile the above function with:
|
||||
|
||||
```sh
|
||||
$ RUSTFLAGS='-C target-cpu=native' cargo build
|
||||
```
|
||||
|
||||
Or otherwise you can specifically enable just the AVX2 feature:
|
||||
|
||||
```sh
|
||||
$ RUSTFLAGS='-C target-feature=+avx2' cargo build
|
||||
```
|
||||
|
||||
Note that when you compile a binary with a particular feature enabled it's
|
||||
important to ensure that you only run the binary on systems which satisfy
|
||||
the required feature set.
|
||||
|
||||
## Dynamic CPU Feature Detection
|
||||
|
||||
Sometimes statically dispatching isn't quite what you want. Instead you
|
||||
might want to build a portable binary that runs across a variety of CPUs,
|
||||
but at runtime it selects the most optimized implementation available. This
|
||||
allows you to build a "least common denominator" binary which has certain
|
||||
sections more optimized for different CPUs.
|
||||
|
||||
Taking our previous example from before, we're going to compile our binary
|
||||
*without* AVX2 support, but we'd like to enable it for just one function.
|
||||
We can do that in a manner like:
|
||||
|
||||
```ignore
|
||||
fn foo() {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
return unsafe { foo_avx2() };
|
||||
}
|
||||
}
|
||||
|
||||
// fallback implementation without using AVX2
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn foo_avx2() {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::_mm256_add_epi64;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::_mm256_add_epi64;
|
||||
|
||||
_mm256_add_epi64(...);
|
||||
}
|
||||
```
|
||||
|
||||
There's a couple of components in play here, so let's go through them in
|
||||
detail!
|
||||
|
||||
* First up we notice the `is_x86_feature_detected!` macro. Provided by
|
||||
the standard library, this macro will perform necessary runtime detection
|
||||
to determine whether the CPU the program is running on supports the
|
||||
specified feature. In this case the macro will expand to a boolean
|
||||
expression evaluating to whether the local CPU has the AVX2 feature or
|
||||
not.
|
||||
|
||||
Note that this macro, like the `arch` module, is platform-specific. For
|
||||
example calling `is_x86_feature_detected!("avx2")` on ARM will be a
|
||||
compile time error. To ensure we don't hit this error a statement level
|
||||
`#[cfg]` is used to only compile usage of the macro on `x86`/`x86_64`.
|
||||
|
||||
* Next up we see our AVX2-enabled function, `foo_avx2`. This function is
|
||||
decorated with the `#[target_feature]` attribute which enables a CPU
|
||||
feature for just this one function. Using a compiler flag like `-C
|
||||
target-feature=+avx2` will enable AVX2 for the entire program, but using
|
||||
an attribute will only enable it for the one function. Usage of the
|
||||
`#[target_feature]` attribute currently requires the function to also be
|
||||
`unsafe`, as we see here. This is because the function can only be
|
||||
correctly called on systems which have the AVX2 (like the intrinsics
|
||||
themselves).
|
||||
|
||||
And with all that we should have a working program! This program will run
|
||||
across all machines and it'll use the optimized AVX2 implementation on
|
||||
machines where support is detected.
|
||||
|
||||
# Ergonomics
|
||||
|
||||
It's important to note that using the `arch` module is not the easiest
|
||||
thing in the world, so if you're curious to try it out you may want to
|
||||
brace yourself for some wordiness!
|
||||
|
||||
The primary purpose of this module is to enable stable crates on crates.io
|
||||
to build up much more ergonomic abstractions which end up using SIMD under
|
||||
the hood. Over time these abstractions may also move into the standard
|
||||
library itself, but for now this module is tasked with providing the bare
|
||||
minimum necessary to use vendor intrinsics on stable Rust.
|
||||
|
||||
# Other architectures
|
||||
|
||||
This documentation is only for one particular architecture, you can find
|
||||
others at:
|
||||
|
||||
* [`x86`]
|
||||
* [`x86_64`]
|
||||
* [`arm`]
|
||||
* [`aarch64`]
|
||||
* [`mips`]
|
||||
* [`mips64`]
|
||||
* [`powerpc`]
|
||||
* [`powerpc64`]
|
||||
* [`nvptx`]
|
||||
* [`wasm32`]
|
||||
|
||||
[`x86`]: x86/index.html
|
||||
[`x86_64`]: x86_64/index.html
|
||||
[`arm`]: arm/index.html
|
||||
[`aarch64`]: aarch64/index.html
|
||||
[`mips`]: mips/index.html
|
||||
[`mips64`]: mips64/index.html
|
||||
[`powerpc`]: powerpc/index.html
|
||||
[`powerpc64`]: powerpc64/index.html
|
||||
[`nvptx`]: nvptx/index.html
|
||||
[`wasm32`]: wasm32/index.html
|
||||
|
||||
# Examples
|
||||
|
||||
First let's take a look at not actually using any intrinsics but instead
|
||||
using LLVM's auto-vectorization to produce optimized vectorized code for
|
||||
AVX2 and also for the default platform.
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
let mut dst = [0];
|
||||
add_quickly(&[1], &[2], &mut dst);
|
||||
assert_eq!(dst[0], 3);
|
||||
}
|
||||
|
||||
fn add_quickly(a: &[u8], b: &[u8], c: &mut [u8]) {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
// Note that this `unsafe` block is safe because we're testing
|
||||
// that the `avx2` feature is indeed available on our CPU.
|
||||
if is_x86_feature_detected!("avx2") {
|
||||
return unsafe { add_quickly_avx2(a, b, c) };
|
||||
}
|
||||
}
|
||||
|
||||
add_quickly_fallback(a, b, c)
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn add_quickly_avx2(a: &[u8], b: &[u8], c: &mut [u8]) {
|
||||
add_quickly_fallback(a, b, c) // the function below is inlined here
|
||||
}
|
||||
|
||||
fn add_quickly_fallback(a: &[u8], b: &[u8], c: &mut [u8]) {
|
||||
for ((a, b), c) in a.iter().zip(b).zip(c) {
|
||||
*c = *a + *b;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Next up let's take a look at an example of manually using intrinsics. Here
|
||||
we'll be using SSE4.1 features to implement hex encoding.
|
||||
|
||||
```
|
||||
fn main() {
|
||||
let mut dst = [0; 32];
|
||||
hex_encode(b"\x01\x02\x03", &mut dst);
|
||||
assert_eq!(&dst[..6], b"010203");
|
||||
|
||||
let mut src = [0; 16];
|
||||
for i in 0..16 {
|
||||
src[i] = (i + 1) as u8;
|
||||
}
|
||||
hex_encode(&src, &mut dst);
|
||||
assert_eq!(&dst, b"0102030405060708090a0b0c0d0e0f10");
|
||||
}
|
||||
|
||||
pub fn hex_encode(src: &[u8], dst: &mut [u8]) {
|
||||
let len = src.len().checked_mul(2).unwrap();
|
||||
assert!(dst.len() >= len);
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
if is_x86_feature_detected!("sse4.1") {
|
||||
return unsafe { hex_encode_sse41(src, dst) };
|
||||
}
|
||||
}
|
||||
|
||||
hex_encode_fallback(src, dst)
|
||||
}
|
||||
|
||||
// translated from
|
||||
// https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
let ascii_zero = _mm_set1_epi8(b'0' as i8);
|
||||
let nines = _mm_set1_epi8(9);
|
||||
let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
|
||||
let and4bits = _mm_set1_epi8(0xf);
|
||||
|
||||
let mut i = 0_isize;
|
||||
while src.len() >= 16 {
|
||||
let invec = _mm_loadu_si128(src.as_ptr() as *const _);
|
||||
|
||||
let masked1 = _mm_and_si128(invec, and4bits);
|
||||
let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
|
||||
|
||||
// return 0xff corresponding to the elements > 9, or 0x00 otherwise
|
||||
let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
|
||||
let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
|
||||
|
||||
// add '0' or the offset depending on the masks
|
||||
let masked1 = _mm_add_epi8(
|
||||
masked1,
|
||||
_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
|
||||
);
|
||||
let masked2 = _mm_add_epi8(
|
||||
masked2,
|
||||
_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
|
||||
);
|
||||
|
||||
// interleave masked1 and masked2 bytes
|
||||
let res1 = _mm_unpacklo_epi8(masked2, masked1);
|
||||
let res2 = _mm_unpackhi_epi8(masked2, masked1);
|
||||
|
||||
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
|
||||
_mm_storeu_si128(
|
||||
dst.as_mut_ptr().offset(i * 2 + 16) as *mut _,
|
||||
res2,
|
||||
);
|
||||
src = &src[16..];
|
||||
i += 16;
|
||||
}
|
||||
|
||||
let i = i as usize;
|
||||
hex_encode_fallback(src, &mut dst[i * 2..]);
|
||||
}
|
||||
|
||||
fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) {
|
||||
fn hex(byte: u8) -> u8 {
|
||||
static TABLE: &[u8] = b"0123456789abcdef";
|
||||
TABLE[byte as usize]
|
||||
}
|
||||
|
||||
for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
|
||||
slots[0] = hex((*byte >> 4) & 0xf);
|
||||
slots[1] = hex(*byte & 0xf);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
#![doc(include = "core_arch_docs.md")]
|
||||
#![allow(improper_ctypes_definitions)]
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_features)]
|
||||
#![allow(incomplete_features)]
|
||||
#![feature(
|
||||
const_fn,
|
||||
const_fn_union,
|
||||
const_fn_transmute,
|
||||
const_generics,
|
||||
custom_inner_attributes,
|
||||
link_llvm_intrinsics,
|
||||
platform_intrinsics,
|
||||
repr_simd,
|
||||
simd_ffi,
|
||||
llvm_asm,
|
||||
proc_macro_hygiene,
|
||||
stmt_expr_attributes,
|
||||
core_intrinsics,
|
||||
no_core,
|
||||
rustc_attrs,
|
||||
stdsimd,
|
||||
staged_api,
|
||||
doc_cfg,
|
||||
tbm_target_feature,
|
||||
sse4a_target_feature,
|
||||
arm_target_feature,
|
||||
aarch64_target_feature,
|
||||
cmpxchg16b_target_feature,
|
||||
avx512_target_feature,
|
||||
mips_target_feature,
|
||||
powerpc_target_feature,
|
||||
wasm_target_feature,
|
||||
abi_unadjusted,
|
||||
adx_target_feature,
|
||||
rtm_target_feature,
|
||||
f16c_target_feature,
|
||||
external_doc,
|
||||
allow_internal_unstable,
|
||||
decl_macro
|
||||
)]
|
||||
#![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))]
|
||||
#![cfg_attr(all(test, target_arch = "wasm32"), feature(wasm_simd))]
|
||||
#![deny(clippy::missing_inline_in_public_items)]
|
||||
#![allow(
|
||||
clippy::inline_always,
|
||||
clippy::too_many_arguments,
|
||||
clippy::cast_sign_loss,
|
||||
clippy::cast_lossless,
|
||||
clippy::cast_possible_wrap,
|
||||
clippy::cast_possible_truncation,
|
||||
clippy::cast_precision_loss,
|
||||
clippy::shadow_reuse,
|
||||
clippy::cognitive_complexity,
|
||||
clippy::similar_names,
|
||||
clippy::many_single_char_names
|
||||
)]
|
||||
#![cfg_attr(test, allow(unused_imports))]
|
||||
#![no_std]
|
||||
#![unstable(feature = "stdsimd", issue = "27731")]
|
||||
#![doc(
|
||||
test(attr(deny(warnings))),
|
||||
test(attr(allow(dead_code, deprecated, unused_variables, unused_mut)))
|
||||
)]
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate std;
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate std_detect;
|
||||
#[cfg(test)]
|
||||
extern crate stdarch_test;
|
||||
|
||||
#[path = "mod.rs"]
|
||||
mod core_arch;
|
||||
|
||||
pub use self::core_arch::arch;
|
||||
|
||||
#[allow(unused_imports)]
|
||||
use core::{ffi, hint, intrinsics, marker, mem, ops, ptr, sync};
|
||||
|
|
@ -0,0 +1,409 @@
|
|||
//! Utility macros.
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! constify_imm8 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match ($imm8) & 0b1111_1111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
3 => $expand!(3),
|
||||
4 => $expand!(4),
|
||||
5 => $expand!(5),
|
||||
6 => $expand!(6),
|
||||
7 => $expand!(7),
|
||||
8 => $expand!(8),
|
||||
9 => $expand!(9),
|
||||
10 => $expand!(10),
|
||||
11 => $expand!(11),
|
||||
12 => $expand!(12),
|
||||
13 => $expand!(13),
|
||||
14 => $expand!(14),
|
||||
15 => $expand!(15),
|
||||
16 => $expand!(16),
|
||||
17 => $expand!(17),
|
||||
18 => $expand!(18),
|
||||
19 => $expand!(19),
|
||||
20 => $expand!(20),
|
||||
21 => $expand!(21),
|
||||
22 => $expand!(22),
|
||||
23 => $expand!(23),
|
||||
24 => $expand!(24),
|
||||
25 => $expand!(25),
|
||||
26 => $expand!(26),
|
||||
27 => $expand!(27),
|
||||
28 => $expand!(28),
|
||||
29 => $expand!(29),
|
||||
30 => $expand!(30),
|
||||
31 => $expand!(31),
|
||||
32 => $expand!(32),
|
||||
33 => $expand!(33),
|
||||
34 => $expand!(34),
|
||||
35 => $expand!(35),
|
||||
36 => $expand!(36),
|
||||
37 => $expand!(37),
|
||||
38 => $expand!(38),
|
||||
39 => $expand!(39),
|
||||
40 => $expand!(40),
|
||||
41 => $expand!(41),
|
||||
42 => $expand!(42),
|
||||
43 => $expand!(43),
|
||||
44 => $expand!(44),
|
||||
45 => $expand!(45),
|
||||
46 => $expand!(46),
|
||||
47 => $expand!(47),
|
||||
48 => $expand!(48),
|
||||
49 => $expand!(49),
|
||||
50 => $expand!(50),
|
||||
51 => $expand!(51),
|
||||
52 => $expand!(52),
|
||||
53 => $expand!(53),
|
||||
54 => $expand!(54),
|
||||
55 => $expand!(55),
|
||||
56 => $expand!(56),
|
||||
57 => $expand!(57),
|
||||
58 => $expand!(58),
|
||||
59 => $expand!(59),
|
||||
60 => $expand!(60),
|
||||
61 => $expand!(61),
|
||||
62 => $expand!(62),
|
||||
63 => $expand!(63),
|
||||
64 => $expand!(64),
|
||||
65 => $expand!(65),
|
||||
66 => $expand!(66),
|
||||
67 => $expand!(67),
|
||||
68 => $expand!(68),
|
||||
69 => $expand!(69),
|
||||
70 => $expand!(70),
|
||||
71 => $expand!(71),
|
||||
72 => $expand!(72),
|
||||
73 => $expand!(73),
|
||||
74 => $expand!(74),
|
||||
75 => $expand!(75),
|
||||
76 => $expand!(76),
|
||||
77 => $expand!(77),
|
||||
78 => $expand!(78),
|
||||
79 => $expand!(79),
|
||||
80 => $expand!(80),
|
||||
81 => $expand!(81),
|
||||
82 => $expand!(82),
|
||||
83 => $expand!(83),
|
||||
84 => $expand!(84),
|
||||
85 => $expand!(85),
|
||||
86 => $expand!(86),
|
||||
87 => $expand!(87),
|
||||
88 => $expand!(88),
|
||||
89 => $expand!(89),
|
||||
90 => $expand!(90),
|
||||
91 => $expand!(91),
|
||||
92 => $expand!(92),
|
||||
93 => $expand!(93),
|
||||
94 => $expand!(94),
|
||||
95 => $expand!(95),
|
||||
96 => $expand!(96),
|
||||
97 => $expand!(97),
|
||||
98 => $expand!(98),
|
||||
99 => $expand!(99),
|
||||
100 => $expand!(100),
|
||||
101 => $expand!(101),
|
||||
102 => $expand!(102),
|
||||
103 => $expand!(103),
|
||||
104 => $expand!(104),
|
||||
105 => $expand!(105),
|
||||
106 => $expand!(106),
|
||||
107 => $expand!(107),
|
||||
108 => $expand!(108),
|
||||
109 => $expand!(109),
|
||||
110 => $expand!(110),
|
||||
111 => $expand!(111),
|
||||
112 => $expand!(112),
|
||||
113 => $expand!(113),
|
||||
114 => $expand!(114),
|
||||
115 => $expand!(115),
|
||||
116 => $expand!(116),
|
||||
117 => $expand!(117),
|
||||
118 => $expand!(118),
|
||||
119 => $expand!(119),
|
||||
120 => $expand!(120),
|
||||
121 => $expand!(121),
|
||||
122 => $expand!(122),
|
||||
123 => $expand!(123),
|
||||
124 => $expand!(124),
|
||||
125 => $expand!(125),
|
||||
126 => $expand!(126),
|
||||
127 => $expand!(127),
|
||||
128 => $expand!(128),
|
||||
129 => $expand!(129),
|
||||
130 => $expand!(130),
|
||||
131 => $expand!(131),
|
||||
132 => $expand!(132),
|
||||
133 => $expand!(133),
|
||||
134 => $expand!(134),
|
||||
135 => $expand!(135),
|
||||
136 => $expand!(136),
|
||||
137 => $expand!(137),
|
||||
138 => $expand!(138),
|
||||
139 => $expand!(139),
|
||||
140 => $expand!(140),
|
||||
141 => $expand!(141),
|
||||
142 => $expand!(142),
|
||||
143 => $expand!(143),
|
||||
144 => $expand!(144),
|
||||
145 => $expand!(145),
|
||||
146 => $expand!(146),
|
||||
147 => $expand!(147),
|
||||
148 => $expand!(148),
|
||||
149 => $expand!(149),
|
||||
150 => $expand!(150),
|
||||
151 => $expand!(151),
|
||||
152 => $expand!(152),
|
||||
153 => $expand!(153),
|
||||
154 => $expand!(154),
|
||||
155 => $expand!(155),
|
||||
156 => $expand!(156),
|
||||
157 => $expand!(157),
|
||||
158 => $expand!(158),
|
||||
159 => $expand!(159),
|
||||
160 => $expand!(160),
|
||||
161 => $expand!(161),
|
||||
162 => $expand!(162),
|
||||
163 => $expand!(163),
|
||||
164 => $expand!(164),
|
||||
165 => $expand!(165),
|
||||
166 => $expand!(166),
|
||||
167 => $expand!(167),
|
||||
168 => $expand!(168),
|
||||
169 => $expand!(169),
|
||||
170 => $expand!(170),
|
||||
171 => $expand!(171),
|
||||
172 => $expand!(172),
|
||||
173 => $expand!(173),
|
||||
174 => $expand!(174),
|
||||
175 => $expand!(175),
|
||||
176 => $expand!(176),
|
||||
177 => $expand!(177),
|
||||
178 => $expand!(178),
|
||||
179 => $expand!(179),
|
||||
180 => $expand!(180),
|
||||
181 => $expand!(181),
|
||||
182 => $expand!(182),
|
||||
183 => $expand!(183),
|
||||
184 => $expand!(184),
|
||||
185 => $expand!(185),
|
||||
186 => $expand!(186),
|
||||
187 => $expand!(187),
|
||||
188 => $expand!(188),
|
||||
189 => $expand!(189),
|
||||
190 => $expand!(190),
|
||||
191 => $expand!(191),
|
||||
192 => $expand!(192),
|
||||
193 => $expand!(193),
|
||||
194 => $expand!(194),
|
||||
195 => $expand!(195),
|
||||
196 => $expand!(196),
|
||||
197 => $expand!(197),
|
||||
198 => $expand!(198),
|
||||
199 => $expand!(199),
|
||||
200 => $expand!(200),
|
||||
201 => $expand!(201),
|
||||
202 => $expand!(202),
|
||||
203 => $expand!(203),
|
||||
204 => $expand!(204),
|
||||
205 => $expand!(205),
|
||||
206 => $expand!(206),
|
||||
207 => $expand!(207),
|
||||
208 => $expand!(208),
|
||||
209 => $expand!(209),
|
||||
210 => $expand!(210),
|
||||
211 => $expand!(211),
|
||||
212 => $expand!(212),
|
||||
213 => $expand!(213),
|
||||
214 => $expand!(214),
|
||||
215 => $expand!(215),
|
||||
216 => $expand!(216),
|
||||
217 => $expand!(217),
|
||||
218 => $expand!(218),
|
||||
219 => $expand!(219),
|
||||
220 => $expand!(220),
|
||||
221 => $expand!(221),
|
||||
222 => $expand!(222),
|
||||
223 => $expand!(223),
|
||||
224 => $expand!(224),
|
||||
225 => $expand!(225),
|
||||
226 => $expand!(226),
|
||||
227 => $expand!(227),
|
||||
228 => $expand!(228),
|
||||
229 => $expand!(229),
|
||||
230 => $expand!(230),
|
||||
231 => $expand!(231),
|
||||
232 => $expand!(232),
|
||||
233 => $expand!(233),
|
||||
234 => $expand!(234),
|
||||
235 => $expand!(235),
|
||||
236 => $expand!(236),
|
||||
237 => $expand!(237),
|
||||
238 => $expand!(238),
|
||||
239 => $expand!(239),
|
||||
240 => $expand!(240),
|
||||
241 => $expand!(241),
|
||||
242 => $expand!(242),
|
||||
243 => $expand!(243),
|
||||
244 => $expand!(244),
|
||||
245 => $expand!(245),
|
||||
246 => $expand!(246),
|
||||
247 => $expand!(247),
|
||||
248 => $expand!(248),
|
||||
249 => $expand!(249),
|
||||
250 => $expand!(250),
|
||||
251 => $expand!(251),
|
||||
252 => $expand!(252),
|
||||
253 => $expand!(253),
|
||||
254 => $expand!(254),
|
||||
_ => $expand!(255),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
//immediate value: 0:31
|
||||
#[allow(unused)]
|
||||
macro_rules! constify_imm5 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match ($imm8) & 0b1_1111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
3 => $expand!(3),
|
||||
4 => $expand!(4),
|
||||
5 => $expand!(5),
|
||||
6 => $expand!(6),
|
||||
7 => $expand!(7),
|
||||
8 => $expand!(8),
|
||||
9 => $expand!(9),
|
||||
10 => $expand!(10),
|
||||
11 => $expand!(11),
|
||||
12 => $expand!(12),
|
||||
13 => $expand!(13),
|
||||
14 => $expand!(14),
|
||||
15 => $expand!(15),
|
||||
16 => $expand!(16),
|
||||
17 => $expand!(17),
|
||||
18 => $expand!(18),
|
||||
19 => $expand!(19),
|
||||
20 => $expand!(20),
|
||||
21 => $expand!(21),
|
||||
22 => $expand!(22),
|
||||
23 => $expand!(23),
|
||||
24 => $expand!(24),
|
||||
25 => $expand!(25),
|
||||
26 => $expand!(26),
|
||||
27 => $expand!(27),
|
||||
28 => $expand!(28),
|
||||
29 => $expand!(29),
|
||||
30 => $expand!(30),
|
||||
_ => $expand!(31),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
//immediate value: -16:15
|
||||
#[allow(unused)]
|
||||
macro_rules! constify_imm5 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match ($imm8) & 0b1_1111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
3 => $expand!(3),
|
||||
4 => $expand!(4),
|
||||
5 => $expand!(5),
|
||||
6 => $expand!(6),
|
||||
7 => $expand!(7),
|
||||
8 => $expand!(8),
|
||||
9 => $expand!(9),
|
||||
10 => $expand!(10),
|
||||
11 => $expand!(11),
|
||||
12 => $expand!(12),
|
||||
13 => $expand!(13),
|
||||
14 => $expand!(14),
|
||||
15 => $expand!(15),
|
||||
16 => $expand!(16),
|
||||
17 => $expand!(17),
|
||||
18 => $expand!(18),
|
||||
19 => $expand!(19),
|
||||
20 => $expand!(20),
|
||||
21 => $expand!(21),
|
||||
22 => $expand!(22),
|
||||
23 => $expand!(23),
|
||||
24 => $expand!(24),
|
||||
25 => $expand!(25),
|
||||
26 => $expand!(26),
|
||||
27 => $expand!(27),
|
||||
28 => $expand!(28),
|
||||
29 => $expand!(29),
|
||||
30 => $expand!(30),
|
||||
_ => $expand!(31),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
//immediate value: 0:16
|
||||
#[allow(unused)]
|
||||
macro_rules! constify_imm4 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match ($imm8) & 0b1111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
3 => $expand!(3),
|
||||
4 => $expand!(4),
|
||||
5 => $expand!(5),
|
||||
6 => $expand!(6),
|
||||
7 => $expand!(7),
|
||||
8 => $expand!(8),
|
||||
9 => $expand!(9),
|
||||
10 => $expand!(10),
|
||||
11 => $expand!(11),
|
||||
12 => $expand!(12),
|
||||
13 => $expand!(13),
|
||||
14 => $expand!(14),
|
||||
_ => $expand!(15),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
//immediate value: 0:7
|
||||
#[allow(unused)]
|
||||
macro_rules! constify_imm3 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match ($imm8) & 0b111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
3 => $expand!(3),
|
||||
4 => $expand!(4),
|
||||
5 => $expand!(5),
|
||||
6 => $expand!(6),
|
||||
_ => $expand!(7),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! types {
|
||||
($(
|
||||
$(#[$doc:meta])*
|
||||
pub struct $name:ident($($fields:tt)*);
|
||||
)*) => ($(
|
||||
$(#[$doc])*
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(simd)]
|
||||
#[allow(clippy::missing_inline_in_public_items)]
|
||||
pub struct $name($($fields)*);
|
||||
)*)
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
//! MIPS
|
||||
|
||||
// Building this module (even if unused) for non-fp64 targets fails with an LLVM
|
||||
// error.
|
||||
#[cfg(target_feature = "fp64")]
|
||||
mod msa;
|
||||
#[cfg(target_feature = "fp64")]
|
||||
pub use self::msa::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Generates the trap instruction `BREAK`
|
||||
#[cfg_attr(test, assert_instr(break))]
|
||||
#[inline]
|
||||
pub unsafe fn break_() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,257 @@
|
|||
//! `core_arch`
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))]
|
||||
mod acle;
|
||||
|
||||
mod simd;
|
||||
|
||||
#[doc(include = "core_arch_docs.md")]
|
||||
#[stable(feature = "simd_arch", since = "1.27.0")]
|
||||
pub mod arch {
|
||||
/// Platform-specific intrinsics for the `x86` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "x86", doc))]
|
||||
#[doc(cfg(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub mod x86 {
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use crate::core_arch::x86::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `x86_64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "x86_64", doc))]
|
||||
#[doc(cfg(target_arch = "x86_64"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub mod x86_64 {
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use crate::core_arch::x86::*;
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use crate::core_arch::x86_64::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `arm` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "arm", doc))]
|
||||
#[doc(cfg(target_arch = "arm"))]
|
||||
#[unstable(feature = "stdsimd", issue = "27731")]
|
||||
pub mod arm {
|
||||
pub use crate::core_arch::arm::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `aarch64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "aarch64", doc))]
|
||||
#[doc(cfg(target_arch = "aarch64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "27731")]
|
||||
pub mod aarch64 {
|
||||
pub use crate::core_arch::{aarch64::*, arm::*};
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `wasm32` platform.
|
||||
///
|
||||
/// This module provides intrinsics specific to the WebAssembly
|
||||
/// architecture. Here you'll find intrinsics necessary for leveraging
|
||||
/// WebAssembly proposals such as [atomics] and [simd]. These proposals are
|
||||
/// evolving over time and as such the support here is unstable and requires
|
||||
/// the nightly channel. As WebAssembly proposals stabilize these functions
|
||||
/// will also become stable.
|
||||
///
|
||||
/// [atomics]: https://github.com/webassembly/threads
|
||||
/// [simd]: https://github.com/webassembly/simd
|
||||
///
|
||||
/// See the [module documentation](../index.html) for general information
|
||||
/// about the `arch` module and platform intrinsics.
|
||||
///
|
||||
/// ## Atomics
|
||||
///
|
||||
/// The [threads proposal][atomics] for WebAssembly adds a number of
|
||||
/// instructions for dealing with multithreaded programs. Atomic
|
||||
/// instructions can all be generated through `std::sync::atomic` types, but
|
||||
/// some instructions have no equivalent in Rust such as
|
||||
/// `memory.atomic.notify` so this module will provide these intrinsics.
|
||||
///
|
||||
/// At this time, however, these intrinsics are only available **when the
|
||||
/// standard library itself is compiled with atomics**. Compiling with
|
||||
/// atomics is not enabled by default and requires passing
|
||||
/// `-Ctarget-feature=+atomics` to rustc. The standard library shipped via
|
||||
/// `rustup` is not compiled with atomics. To get access to these intrinsics
|
||||
/// you'll need to compile the standard library from source with the
|
||||
/// requisite compiler flags.
|
||||
///
|
||||
/// ## SIMD
|
||||
///
|
||||
/// The [simd proposal][simd] for WebAssembly adds a new `v128` type for a
|
||||
/// 128-bit SIMD register. It also adds a large array of instructions to
|
||||
/// operate on the `v128` type to perform data processing. The SIMD proposal
|
||||
/// has been in progress for quite some time and many instructions have come
|
||||
/// and gone. This module attempts to keep up with the proposal, but if you
|
||||
/// notice anything awry please feel free to [open an
|
||||
/// issue](https://github.com/rust-lang/stdarch/issues/new).
|
||||
///
|
||||
/// It's important to be aware that the current state of development of SIMD
|
||||
/// in WebAssembly is still somewhat early days. There's lots of pieces to
|
||||
/// demo and prototype with, but discussions and support are still in
|
||||
/// progress. There's a number of pitfalls and gotchas in various places,
|
||||
/// which will attempt to be documented here, but there may be others
|
||||
/// lurking!
|
||||
///
|
||||
/// Using SIMD is intended to be similar to as you would on `x86_64`, for
|
||||
/// example. You'd write a function such as:
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// #[cfg(target_arch = "wasm32")]
|
||||
/// #[target_feature(enable = "simd128")]
|
||||
/// unsafe fn uses_simd() {
|
||||
/// use std::arch::wasm32::*;
|
||||
/// // ...
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// Unlike `x86_64`, however, WebAssembly does not currently have dynamic
|
||||
/// detection at runtime as to whether SIMD is supported (this is one of the
|
||||
/// motivators for the [conditional sections proposal][condsections], but
|
||||
/// that is still pretty early days). This means that your binary will
|
||||
/// either have SIMD and can only run on engines which support SIMD, or it
|
||||
/// will not have SIMD at all. For compatibility the standard library itself
|
||||
/// does not use any SIMD internally. Determining how best to ship your
|
||||
/// WebAssembly binary with SIMD is largely left up to you as it can can be
|
||||
/// pretty nuanced depending on your situation.
|
||||
///
|
||||
/// [condsections]: https://github.com/webassembly/conditional-sections
|
||||
///
|
||||
/// To enable SIMD support at compile time you need to do one of two things:
|
||||
///
|
||||
/// * First you can annotate functions with `#[target_feature(enable =
|
||||
/// "simd128")]`. This causes just that one function to have SIMD support
|
||||
/// available to it, and intrinsics will get inlined as usual in this
|
||||
/// situation.
|
||||
///
|
||||
/// * Second you can compile your program with `-Ctarget-feature=+simd128`.
|
||||
/// This compilation flag blanket enables SIMD support for your entire
|
||||
/// compilation. Note that this does not include the standard library
|
||||
/// unless you recompile the standard library.
|
||||
///
|
||||
/// If you enable SIMD via either of these routes then you'll have a
|
||||
/// WebAssembly binary that uses SIMD instructions, and you'll need to ship
|
||||
/// that accordingly. Also note that if you call SIMD intrinsics but don't
|
||||
/// enable SIMD via either of these mechanisms, you'll still have SIMD
|
||||
/// generated in your program. This means to generate a binary without SIMD
|
||||
/// you'll need to avoid both options above plus calling into any intrinsics
|
||||
/// in this module.
|
||||
///
|
||||
/// > **Note**: Due to
|
||||
/// > [rust-lang/rust#74320](https://github.com/rust-lang/rust/issues/74320)
|
||||
/// > it's recommended to compile your entire program with SIMD support
|
||||
/// > (using `RUSTFLAGS`) or otherwise functions may not be inlined
|
||||
/// > correctly.
|
||||
///
|
||||
/// > **Note**: LLVM's SIMD support is actually split into two features:
|
||||
/// > `simd128` and `unimplemented-simd128`. Rust code can enable `simd128`
|
||||
/// > with `#[target_feature]` (and test for it with `#[cfg(target_feature =
|
||||
/// > "simd128")]`, but it cannot enable `unimplemented-simd128`. The only
|
||||
/// > way to enable this feature is to compile with
|
||||
/// > `-Ctarget-feature=+simd128,+unimplemented-simd128`. This second
|
||||
/// > feature enables more recent instructions implemented in LLVM which
|
||||
/// > haven't always had enough time to make their way to runtimes.
|
||||
#[cfg(any(target_arch = "wasm32", doc))]
|
||||
#[doc(cfg(target_arch = "wasm32"))]
|
||||
#[stable(feature = "simd_wasm32", since = "1.33.0")]
|
||||
pub mod wasm32 {
|
||||
#[stable(feature = "simd_wasm32", since = "1.33.0")]
|
||||
pub use crate::core_arch::wasm32::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `mips` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "mips", doc))]
|
||||
#[doc(cfg(target_arch = "mips"))]
|
||||
#[unstable(feature = "stdsimd", issue = "27731")]
|
||||
pub mod mips {
|
||||
pub use crate::core_arch::mips::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `mips64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "mips64", doc))]
|
||||
#[doc(cfg(target_arch = "mips64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "27731")]
|
||||
pub mod mips64 {
|
||||
pub use crate::core_arch::mips::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `PowerPC` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "powerpc", doc))]
|
||||
#[doc(cfg(target_arch = "powerpc"))]
|
||||
#[unstable(feature = "stdsimd", issue = "27731")]
|
||||
pub mod powerpc {
|
||||
pub use crate::core_arch::powerpc::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `PowerPC64` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "powerpc64", doc))]
|
||||
#[doc(cfg(target_arch = "powerpc64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "27731")]
|
||||
pub mod powerpc64 {
|
||||
pub use crate::core_arch::powerpc64::*;
|
||||
}
|
||||
|
||||
/// Platform-specific intrinsics for the `NVPTX` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64", doc))]
|
||||
#[doc(cfg(any(target_arch = "nvptx", target_arch = "nvptx64")))]
|
||||
#[unstable(feature = "stdsimd", issue = "27731")]
|
||||
pub mod nvptx {
|
||||
pub use crate::core_arch::nvptx::*;
|
||||
}
|
||||
}
|
||||
|
||||
mod simd_llvm;
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))]
|
||||
#[doc(cfg(any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
mod x86;
|
||||
#[cfg(any(target_arch = "x86_64", doc))]
|
||||
#[doc(cfg(target_arch = "x86_64"))]
|
||||
mod x86_64;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", doc))]
|
||||
#[doc(cfg(target_arch = "aarch64"))]
|
||||
mod aarch64;
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))]
|
||||
#[doc(cfg(any(target_arch = "arm", target_arch = "aarch64")))]
|
||||
mod arm;
|
||||
|
||||
#[cfg(any(target_arch = "wasm32", doc))]
|
||||
#[doc(cfg(target_arch = "wasm32"))]
|
||||
mod wasm32;
|
||||
|
||||
#[cfg(any(target_arch = "mips", target_arch = "mips64", doc))]
|
||||
#[doc(cfg(any(target_arch = "mips", target_arch = "mips64")))]
|
||||
mod mips;
|
||||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", doc))]
|
||||
#[doc(cfg(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
mod powerpc;
|
||||
|
||||
#[cfg(any(target_arch = "powerpc64", doc))]
|
||||
#[doc(cfg(target_arch = "powerpc64"))]
|
||||
mod powerpc64;
|
||||
|
||||
#[cfg(any(target_arch = "nvptx", target_arch = "nvptx64", doc))]
|
||||
#[doc(cfg(any(target_arch = "nvptx", target_arch = "nvptx64")))]
|
||||
mod nvptx;
|
||||
|
|
@ -0,0 +1,213 @@
|
|||
//! NVPTX intrinsics (experimental)
|
||||
//!
|
||||
//! These intrinsics form the foundation of the CUDA
|
||||
//! programming model.
|
||||
//!
|
||||
//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
|
||||
//! the [LLVM NVPTX Backend documentation][llvm_docs].
|
||||
//!
|
||||
//! [cuda_c]:
|
||||
//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
|
||||
//! [llvm_docs]:
|
||||
//! https://llvm.org/docs/NVPTXUsage.html
|
||||
|
||||
use crate::ffi::c_void;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.nvvm.barrier0"]
|
||||
fn syncthreads() -> ();
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
|
||||
fn block_dim_x() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
|
||||
fn block_dim_y() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
|
||||
fn block_dim_z() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
|
||||
fn block_idx_x() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
|
||||
fn block_idx_y() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
|
||||
fn block_idx_z() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
|
||||
fn grid_dim_x() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
|
||||
fn grid_dim_y() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
|
||||
fn grid_dim_z() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
|
||||
fn thread_idx_x() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
|
||||
fn thread_idx_y() -> i32;
|
||||
#[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
|
||||
fn thread_idx_z() -> i32;
|
||||
}
|
||||
|
||||
/// Synchronizes all threads in the block.
|
||||
#[inline]
|
||||
pub unsafe fn _syncthreads() -> () {
|
||||
syncthreads()
|
||||
}
|
||||
|
||||
/// x-th thread-block dimension.
|
||||
#[inline]
|
||||
pub unsafe fn _block_dim_x() -> i32 {
|
||||
block_dim_x()
|
||||
}
|
||||
|
||||
/// y-th thread-block dimension.
|
||||
#[inline]
|
||||
pub unsafe fn _block_dim_y() -> i32 {
|
||||
block_dim_y()
|
||||
}
|
||||
|
||||
/// z-th thread-block dimension.
|
||||
#[inline]
|
||||
pub unsafe fn _block_dim_z() -> i32 {
|
||||
block_dim_z()
|
||||
}
|
||||
|
||||
/// x-th thread-block index.
|
||||
#[inline]
|
||||
pub unsafe fn _block_idx_x() -> i32 {
|
||||
block_idx_x()
|
||||
}
|
||||
|
||||
/// y-th thread-block index.
|
||||
#[inline]
|
||||
pub unsafe fn _block_idx_y() -> i32 {
|
||||
block_idx_y()
|
||||
}
|
||||
|
||||
/// z-th thread-block index.
|
||||
#[inline]
|
||||
pub unsafe fn _block_idx_z() -> i32 {
|
||||
block_idx_z()
|
||||
}
|
||||
|
||||
/// x-th block-grid dimension.
|
||||
#[inline]
|
||||
pub unsafe fn _grid_dim_x() -> i32 {
|
||||
grid_dim_x()
|
||||
}
|
||||
|
||||
/// y-th block-grid dimension.
|
||||
#[inline]
|
||||
pub unsafe fn _grid_dim_y() -> i32 {
|
||||
grid_dim_y()
|
||||
}
|
||||
|
||||
/// z-th block-grid dimension.
|
||||
#[inline]
|
||||
pub unsafe fn _grid_dim_z() -> i32 {
|
||||
grid_dim_z()
|
||||
}
|
||||
|
||||
/// x-th thread index.
|
||||
#[inline]
|
||||
pub unsafe fn _thread_idx_x() -> i32 {
|
||||
thread_idx_x()
|
||||
}
|
||||
|
||||
/// y-th thread index.
|
||||
#[inline]
|
||||
pub unsafe fn _thread_idx_y() -> i32 {
|
||||
thread_idx_y()
|
||||
}
|
||||
|
||||
/// z-th thread index.
|
||||
#[inline]
|
||||
pub unsafe fn _thread_idx_z() -> i32 {
|
||||
thread_idx_z()
|
||||
}
|
||||
|
||||
/// Generates the trap instruction `TRAP`
|
||||
#[inline]
|
||||
pub unsafe fn trap() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
|
||||
// Basic CUDA syscall declarations.
|
||||
extern "C" {
|
||||
/// Print formatted output from a kernel to a host-side output stream.
|
||||
///
|
||||
/// Syscall arguments:
|
||||
/// * `status`: The status value that is returned by `vprintf`.
|
||||
/// * `format`: A pointer to the format specifier input (uses common `printf` format).
|
||||
/// * `valist`: A pointer to the valist input.
|
||||
///
|
||||
/// ```
|
||||
/// #[repr(C)]
|
||||
/// struct PrintArgs(f32, f32, f32, i32);
|
||||
///
|
||||
/// vprintf(
|
||||
/// "int(%f + %f) = int(%f) = %d\n".as_ptr(),
|
||||
/// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// Sources:
|
||||
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
|
||||
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
|
||||
pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;
|
||||
|
||||
/// Allocate memory dynamically from a fixed-size heap in global memory.
|
||||
///
|
||||
/// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
|
||||
/// from the device heap and returns a pointer to the allocated memory
|
||||
/// or `NULL` if insufficient memory exists to fulfill the request.
|
||||
///
|
||||
/// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
|
||||
///
|
||||
/// The memory allocated by a given CUDA thread via `malloc()` remains allocated
|
||||
/// for the lifetime of the CUDA context, or until it is explicitly released
|
||||
/// by a call to `free()`. It can be used by any other CUDA threads
|
||||
/// even from subsequent kernel launches.
|
||||
///
|
||||
/// Sources:
|
||||
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
|
||||
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
|
||||
// FIXME(denzp): assign `malloc` and `nothrow` attributes.
|
||||
pub fn malloc(size: usize) -> *mut c_void;
|
||||
|
||||
/// Free previously dynamically allocated memory.
|
||||
///
|
||||
/// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
|
||||
/// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
|
||||
/// the call to `free()` is ignored.
|
||||
///
|
||||
/// Any CUDA thread may free memory allocated by another thread, but care should be taken
|
||||
/// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
|
||||
/// with the same `ptr` has undefined behavior.
|
||||
///
|
||||
/// Sources:
|
||||
/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
|
||||
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
|
||||
// FIXME(denzp): assign `nothrow` attribute.
|
||||
pub fn free(ptr: *mut c_void);
|
||||
|
||||
// Internal declaration of the syscall. Exported variant has
|
||||
// the `char_size` parameter set to `1` (single char size in bytes).
|
||||
fn __assertfail(
|
||||
message: *const u8,
|
||||
file: *const u8,
|
||||
line: u32,
|
||||
function: *const u8,
|
||||
char_size: usize,
|
||||
);
|
||||
}
|
||||
|
||||
/// Syscall to be used whenever the *assert expression produces a `false` value*.
|
||||
///
|
||||
/// Syscall arguments:
|
||||
/// * `message`: The pointer to the string that should be output.
|
||||
/// * `file`: The pointer to the file name string associated with the assert.
|
||||
/// * `line`: The line number associated with the assert.
|
||||
/// * `function`: The pointer to the function name string associated with the assert.
|
||||
///
|
||||
/// Source:
|
||||
/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
|
||||
#[inline]
|
||||
pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
|
||||
__assertfail(message, file, line, function, 1)
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,19 @@
|
|||
//! PowerPC intrinsics
|
||||
|
||||
#[cfg(target_feature = "altivec")]
|
||||
mod altivec;
|
||||
#[cfg(target_feature = "altivec")]
|
||||
pub use self::altivec::*;
|
||||
|
||||
mod vsx;
|
||||
pub use self::vsx::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Generates the trap instruction `TRAP`
|
||||
#[cfg_attr(test, assert_instr(trap))]
|
||||
#[inline]
|
||||
pub unsafe fn trap() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
//! PowerPC Vector Scalar eXtensions (VSX) intrinsics.
|
||||
//!
|
||||
//! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
|
||||
//! NVlink)] and [POWER ISA v3.0B (for POWER9)].
|
||||
//!
|
||||
//! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
|
||||
//! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
|
||||
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use crate::core_arch::simd_llvm::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
use crate::mem;
|
||||
|
||||
types! {
|
||||
// pub struct vector_Float16 = f16x8;
|
||||
/// PowerPC-specific 128-bit wide vector of two packed `i64`
|
||||
pub struct vector_signed_long(i64, i64);
|
||||
/// PowerPC-specific 128-bit wide vector of two packed `u64`
|
||||
pub struct vector_unsigned_long(u64, u64);
|
||||
/// PowerPC-specific 128-bit wide vector mask of two elements
|
||||
pub struct vector_bool_long(i64, i64);
|
||||
/// PowerPC-specific 128-bit wide vector of two packed `f64`
|
||||
pub struct vector_double(f64, f64);
|
||||
// pub struct vector_signed_long_long = vector_signed_long;
|
||||
// pub struct vector_unsigned_long_long = vector_unsigned_long;
|
||||
// pub struct vector_bool_long_long = vector_bool_long;
|
||||
// pub struct vector_signed___int128 = i128x1;
|
||||
// pub struct vector_unsigned___int128 = i128x1;
|
||||
}
|
||||
|
||||
mod sealed {
|
||||
use super::*;
|
||||
use crate::core_arch::simd::*;
|
||||
|
||||
pub trait VectorPermDI {
|
||||
unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self;
|
||||
}
|
||||
|
||||
// xxpermdi has an big-endian bias and extended mnemonics
|
||||
#[inline]
|
||||
#[target_feature(enable = "vsx")]
|
||||
#[cfg_attr(all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0))]
|
||||
#[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))]
|
||||
unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 {
|
||||
match dm & 0b11 {
|
||||
0 => simd_shuffle2(a, b, [0b00, 0b10]),
|
||||
1 => simd_shuffle2(a, b, [0b01, 0b10]),
|
||||
2 => simd_shuffle2(a, b, [0b00, 0b11]),
|
||||
_ => simd_shuffle2(a, b, [0b01, 0b11]),
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! vec_xxpermdi {
|
||||
{$impl: ident} => {
|
||||
impl VectorPermDI for $impl {
|
||||
#[inline]
|
||||
#[target_feature(enable = "vsx")]
|
||||
unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self {
|
||||
mem::transmute(xxpermdi(mem::transmute(self), mem::transmute(b), dm))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vec_xxpermdi! { vector_unsigned_long }
|
||||
vec_xxpermdi! { vector_signed_long }
|
||||
vec_xxpermdi! { vector_bool_long }
|
||||
vec_xxpermdi! { vector_double }
|
||||
}
|
||||
|
||||
/// Vector permute.
|
||||
#[inline]
|
||||
#[target_feature(enable = "vsx")]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn vec_xxpermdi<T>(a: T, b: T, dm: u8) -> T
|
||||
where
|
||||
T: sealed::VectorPermDI,
|
||||
{
|
||||
a.vec_xxpermdi(b, dm)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[cfg(target_arch = "powerpc")]
|
||||
use crate::core_arch::arch::powerpc::*;
|
||||
|
||||
#[cfg(target_arch = "powerpc64")]
|
||||
use crate::core_arch::arch::powerpc64::*;
|
||||
|
||||
use super::mem;
|
||||
use crate::core_arch::simd::*;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
macro_rules! test_vec_xxpermdi {
|
||||
{$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
|
||||
#[simd_test(enable = "vsx")]
|
||||
unsafe fn $name() {
|
||||
let a: $longtype = mem::transmute($shorttype::new($($a),+, $($b),+));
|
||||
let b = mem::transmute($shorttype::new($($c),+, $($d),+));
|
||||
|
||||
assert_eq!($shorttype::new($($a),+, $($c),+), mem::transmute(vec_xxpermdi(a, b, 0)));
|
||||
assert_eq!($shorttype::new($($b),+, $($c),+), mem::transmute(vec_xxpermdi(a, b, 1)));
|
||||
assert_eq!($shorttype::new($($a),+, $($d),+), mem::transmute(vec_xxpermdi(a, b, 2)));
|
||||
assert_eq!($shorttype::new($($b),+, $($d),+), mem::transmute(vec_xxpermdi(a, b, 3)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_vec_xxpermdi! {test_vec_xxpermdi_u64x2, u64x2, vector_unsigned_long, [0], [1], [2], [3]}
|
||||
test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]}
|
||||
test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]}
|
||||
test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]}
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
//! PowerPC 64
|
||||
//!
|
||||
//! The reference is the [64-Bit ELF V2 ABI Specification - Power
|
||||
//! Architecture].
|
||||
//!
|
||||
//! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
|
||||
|
||||
pub use crate::core_arch::powerpc::*;
|
||||
|
|
@ -0,0 +1,711 @@
|
|||
//! Internal `#[repr(simd)]` types
|
||||
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
macro_rules! simd_ty {
|
||||
($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => {
|
||||
#[repr(simd)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct $id($(pub $elem_ty),*);
|
||||
|
||||
#[allow(clippy::use_self)]
|
||||
impl $id {
|
||||
#[inline]
|
||||
pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
|
||||
$id($($elem_name),*)
|
||||
}
|
||||
// FIXME: Workaround rust@60637
|
||||
#[inline(always)]
|
||||
pub(crate) const fn splat(value: $ety) -> Self {
|
||||
$id($({
|
||||
#[allow(non_camel_case_types, dead_code)]
|
||||
struct $elem_name;
|
||||
value
|
||||
}),*)
|
||||
}
|
||||
|
||||
// FIXME: Workaround rust@60637
|
||||
#[inline(always)]
|
||||
pub(crate) fn extract(self, index: usize) -> $ety {
|
||||
unsafe {
|
||||
crate::core_arch::simd_llvm::simd_extract(self, index as u32)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! simd_m_ty {
|
||||
($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => {
|
||||
#[repr(simd)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct $id($(pub $elem_ty),*);
|
||||
|
||||
#[allow(clippy::use_self)]
|
||||
impl $id {
|
||||
#[inline]
|
||||
const fn bool_to_internal(x: bool) -> $ety {
|
||||
[0 as $ety, !(0 as $ety)][x as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) const fn new($($elem_name: bool),*) -> Self {
|
||||
$id($(Self::bool_to_internal($elem_name)),*)
|
||||
}
|
||||
|
||||
// FIXME: Workaround rust@60637
|
||||
#[inline(always)]
|
||||
pub(crate) const fn splat(value: bool) -> Self {
|
||||
$id($({
|
||||
#[allow(non_camel_case_types, dead_code)]
|
||||
struct $elem_name;
|
||||
Self::bool_to_internal(value)
|
||||
}),*)
|
||||
}
|
||||
|
||||
// FIXME: Workaround rust@60637
|
||||
#[inline(always)]
|
||||
pub(crate) fn extract(self, index: usize) -> bool {
|
||||
let r: $ety = unsafe {
|
||||
crate::core_arch::simd_llvm::simd_extract(self, index as u32)
|
||||
};
|
||||
r != 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 16-bit wide types:
|
||||
|
||||
simd_ty!(u8x2[u8]: u8, u8 | x0, x1);
|
||||
simd_ty!(i8x2[i8]: i8, i8 | x0, x1);
|
||||
|
||||
// 32-bit wide types:
|
||||
|
||||
simd_ty!(u8x4[u8]: u8, u8, u8, u8 | x0, x1, x2, x3);
|
||||
simd_ty!(u16x2[u16]: u16, u16 | x0, x1);
|
||||
|
||||
simd_ty!(i8x4[i8]: i8, i8, i8, i8 | x0, x1, x2, x3);
|
||||
simd_ty!(i16x2[i16]: i16, i16 | x0, x1);
|
||||
|
||||
// 64-bit wide types:
|
||||
|
||||
simd_ty!(
|
||||
u8x8[u8]: u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3);
|
||||
simd_ty!(u32x2[u32]: u32, u32 | x0, x1);
|
||||
simd_ty!(u64x1[u64]: u64 | x1);
|
||||
|
||||
simd_ty!(
|
||||
i8x8[i8]: i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3);
|
||||
simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
|
||||
simd_ty!(i64x1[i64]: i64 | x1);
|
||||
|
||||
simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
|
||||
|
||||
// 128-bit wide types:
|
||||
|
||||
simd_ty!(
|
||||
u8x16[u8]: u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_ty!(
|
||||
u16x8[u16]: u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3);
|
||||
simd_ty!(u64x2[u64]: u64, u64 | x0, x1);
|
||||
|
||||
simd_ty!(
|
||||
i8x16[i8]: i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_ty!(
|
||||
i16x8[i16]: i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
|
||||
simd_ty!(i64x2[i64]: i64, i64 | x0, x1);
|
||||
|
||||
simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3);
|
||||
simd_ty!(f64x2[f64]: f64, f64 | x0, x1);
|
||||
|
||||
simd_m_ty!(
|
||||
m8x16[i8]: i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_m_ty!(
|
||||
m16x8[i16]: i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
|
||||
simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1);
|
||||
|
||||
// 256-bit wide types:
|
||||
|
||||
simd_ty!(
|
||||
u8x32[u8]: u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15,
|
||||
x16,
|
||||
x17,
|
||||
x18,
|
||||
x19,
|
||||
x20,
|
||||
x21,
|
||||
x22,
|
||||
x23,
|
||||
x24,
|
||||
x25,
|
||||
x26,
|
||||
x27,
|
||||
x28,
|
||||
x29,
|
||||
x30,
|
||||
x31
|
||||
);
|
||||
simd_ty!(
|
||||
u16x16[u16]: u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_ty!(
|
||||
u32x8[u32]: u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3);
|
||||
|
||||
simd_ty!(
|
||||
i8x32[i8]: i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15,
|
||||
x16,
|
||||
x17,
|
||||
x18,
|
||||
x19,
|
||||
x20,
|
||||
x21,
|
||||
x22,
|
||||
x23,
|
||||
x24,
|
||||
x25,
|
||||
x26,
|
||||
x27,
|
||||
x28,
|
||||
x29,
|
||||
x30,
|
||||
x31
|
||||
);
|
||||
simd_ty!(
|
||||
i16x16[i16]: i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_ty!(
|
||||
i32x8[i32]: i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
|
||||
|
||||
simd_ty!(
|
||||
f32x8[f32]: f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
||||
// 512-bit wide types:
|
||||
|
||||
simd_ty!(
|
||||
i32x16[i32]: i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
|
||||
simd_ty!(
|
||||
u32x16[u32]: u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
|
||||
simd_ty!(
|
||||
f32x16[f32]: f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
|
||||
simd_ty!(
|
||||
i64x8[i64]: i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
||||
simd_ty!(
|
||||
u64x8[u64]: u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
||||
simd_ty!(
|
||||
f64x8[f64]: f64,
|
||||
f64,
|
||||
f64,
|
||||
f64,
|
||||
f64,
|
||||
f64,
|
||||
f64,
|
||||
f64 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
//! LLVM's SIMD platform intrinsics
|
||||
|
||||
extern "platform-intrinsic" {
|
||||
//pub fn simd_select_bitmask
|
||||
pub fn simd_eq<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_ne<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_lt<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_le<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_gt<T, U>(x: T, y: T) -> U;
|
||||
pub fn simd_ge<T, U>(x: T, y: T) -> U;
|
||||
|
||||
#[rustc_args_required_const(2)]
|
||||
pub fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
|
||||
#[rustc_args_required_const(2)]
|
||||
pub fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
|
||||
#[rustc_args_required_const(2)]
|
||||
pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
|
||||
#[rustc_args_required_const(2)]
|
||||
pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
|
||||
#[rustc_args_required_const(2)]
|
||||
pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
|
||||
#[rustc_args_required_const(2)]
|
||||
pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
|
||||
#[rustc_args_required_const(2)]
|
||||
pub fn simd_shuffle128<T, U>(x: T, y: T, idx: [u32; 128]) -> U;
|
||||
|
||||
#[rustc_const_unstable(feature = "const_simd_insert", issue = "none")]
|
||||
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
|
||||
#[rustc_const_unstable(feature = "const_simd_extract", issue = "none")]
|
||||
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
|
||||
//pub fn simd_select
|
||||
pub fn simd_bitmask<T, U>(x: T) -> U;
|
||||
|
||||
pub fn simd_cast<T, U>(x: T) -> U;
|
||||
|
||||
pub fn simd_add<T>(x: T, y: T) -> T;
|
||||
pub fn simd_sub<T>(x: T, y: T) -> T;
|
||||
pub fn simd_mul<T>(x: T, y: T) -> T;
|
||||
pub fn simd_div<T>(x: T, y: T) -> T;
|
||||
pub fn simd_shl<T>(x: T, y: T) -> T;
|
||||
pub fn simd_shr<T>(x: T, y: T) -> T;
|
||||
pub fn simd_and<T>(x: T, y: T) -> T;
|
||||
pub fn simd_or<T>(x: T, y: T) -> T;
|
||||
pub fn simd_xor<T>(x: T, y: T) -> T;
|
||||
|
||||
pub fn simd_saturating_add<T>(x: T, y: T) -> T;
|
||||
pub fn simd_saturating_sub<T>(x: T, y: T) -> T;
|
||||
|
||||
pub fn simd_gather<T, U, V>(values: T, pointers: U, mask: V) -> T;
|
||||
pub fn simd_scatter<T, U, V>(values: T, pointers: U, mask: V);
|
||||
|
||||
pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
|
||||
pub fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
|
||||
pub fn simd_reduce_min<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_max<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_min_nanless<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_max_nanless<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_and<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_or<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_xor<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_all<T>(x: T) -> bool;
|
||||
pub fn simd_reduce_any<T>(x: T) -> bool;
|
||||
|
||||
pub fn simd_select<M, T>(m: M, a: T, b: T) -> T;
|
||||
pub fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
|
||||
|
||||
pub fn simd_fmin<T>(a: T, b: T) -> T;
|
||||
pub fn simd_fmax<T>(a: T, b: T) -> T;
|
||||
|
||||
pub fn simd_fsqrt<T>(a: T) -> T;
|
||||
pub fn simd_fsin<T>(a: T) -> T;
|
||||
pub fn simd_fcos<T>(a: T) -> T;
|
||||
pub fn simd_fabs<T>(a: T) -> T;
|
||||
pub fn simd_floor<T>(a: T) -> T;
|
||||
pub fn simd_ceil<T>(a: T) -> T;
|
||||
pub fn simd_fexp<T>(a: T) -> T;
|
||||
pub fn simd_fexp2<T>(a: T) -> T;
|
||||
pub fn simd_flog10<T>(a: T) -> T;
|
||||
pub fn simd_flog2<T>(a: T) -> T;
|
||||
pub fn simd_flog<T>(a: T) -> T;
|
||||
//pub fn simd_fpowi
|
||||
//pub fn simd_fpow
|
||||
pub fn simd_fma<T>(a: T, b: T, c: T) -> T;
|
||||
}
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
//! 64-bit wide vector types
|
||||
|
||||
use crate::prelude::v1::*;
|
||||
|
||||
use crate::core_arch::simd_llvm::*;
|
||||
|
||||
define_ty_doc! {
|
||||
f32x2, f32, f32 |
|
||||
/// A 64-bit vector with 2 `f32` lanes.
|
||||
}
|
||||
define_impl! { f32x2, f32, 2, i32x2, x0, x1 }
|
||||
|
||||
define_ty_doc! {
|
||||
u32x2, u32, u32 |
|
||||
/// A 64-bit vector with 2 `u32` lanes.
|
||||
}
|
||||
define_impl! { u32x2, u32, 2, i32x2, x0, x1 }
|
||||
|
||||
define_ty! { i32x2, i32, i32 }
|
||||
define_impl! { i32x2, i32, 2, i32x2, x0, x1 }
|
||||
|
||||
define_ty! { u16x4, u16, u16, u16, u16 }
|
||||
define_impl! { u16x4, u16, 4, i16x4, x0, x1, x2, x3 }
|
||||
|
||||
define_ty! { i16x4, i16, i16, i16, i16 }
|
||||
define_impl! { i16x4, i16, 4, i16x4, x0, x1, x2, x3 }
|
||||
|
||||
define_ty! { u8x8, u8, u8, u8, u8, u8, u8, u8, u8 }
|
||||
define_impl! { u8x8, u8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 }
|
||||
|
||||
define_ty! { i8x8, i8, i8, i8, i8, i8, i8, i8, i8 }
|
||||
define_impl! { i8x8, i8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 }
|
||||
|
||||
define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8);
|
||||
define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8);
|
||||
define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8);
|
||||
define_from!(i16x4, u32x2, i32x2, u16x4, u8x8, i8x8);
|
||||
define_from!(u8x8, u32x2, i32x2, u16x4, i16x4, i8x8);
|
||||
define_from!(i8x8, u32x2, i32x2, u16x4, i16x4, u8x8);
|
||||
|
||||
define_common_ops!(f32x2, u32x2, i32x2, u16x4, i16x4, u8x8, i8x8);
|
||||
define_float_ops!(f32x2);
|
||||
define_integer_ops!(
|
||||
(u32x2, u32),
|
||||
(i32x2, i32),
|
||||
(u16x4, u16),
|
||||
(i16x4, i16),
|
||||
(u8x8, u8),
|
||||
(i8x8, i8)
|
||||
);
|
||||
define_signed_integer_ops!(i32x2, i16x4, i8x8);
|
||||
define_casts!(
|
||||
(f32x2, f64x2, as_f64x2),
|
||||
(f32x2, u32x2, as_u32x2),
|
||||
(f32x2, i32x2, as_i32x2),
|
||||
(u32x2, f32x2, as_f32x2),
|
||||
(u32x2, i32x2, as_i32x2),
|
||||
(i32x2, f32x2, as_f32x2),
|
||||
(i32x2, u32x2, as_u32x2),
|
||||
(u16x4, i16x4, as_i16x4),
|
||||
(i16x4, u16x4, as_u16x4),
|
||||
(u8x8, i8x8, as_i8x8),
|
||||
(i8x8, u8x8, as_u8x8),
|
||||
(i8x8, i16x8, as_i16x8),
|
||||
(u8x8, i16x8, as_i16x8),
|
||||
(i16x4, i32x4, as_i32x4),
|
||||
(i32x2, i64x2, as_i64x2),
|
||||
(u8x8, u16x8, as_u16x8),
|
||||
(u16x4, u32x4, as_u32x4),
|
||||
(u16x4, i32x4, as_i32x4),
|
||||
(u32x2, u64x2, as_u64x2),
|
||||
(u32x2, i64x2, as_i64x2)
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn operators() {
|
||||
test_ops_si!(i8x8, i16x4, i32x2);
|
||||
test_ops_ui!(u8x8, u16x4, u32x2);
|
||||
test_ops_f!(f32x2);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
//! Intrinsics associated with WebAssembly's upcoming threads proposal.
|
||||
//!
|
||||
//! These intrinsics are all unstable because they're not actually stable in
|
||||
//! WebAssembly itself yet. The signatures may change as [the
|
||||
//! specification][spec] is updated.
|
||||
//!
|
||||
//! [spec]: https://github.com/WebAssembly/threads
|
||||
|
||||
#![cfg(any(target_feature = "atomics", doc))]
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "llvm.wasm.atomic.wait.i32"]
|
||||
fn llvm_atomic_wait_i32(ptr: *mut i32, exp: i32, timeout: i64) -> i32;
|
||||
#[link_name = "llvm.wasm.atomic.wait.i64"]
|
||||
fn llvm_atomic_wait_i64(ptr: *mut i64, exp: i64, timeout: i64) -> i32;
|
||||
#[link_name = "llvm.wasm.atomic.notify"]
|
||||
fn llvm_atomic_notify(ptr: *mut i32, cnt: i32) -> i32;
|
||||
}
|
||||
|
||||
/// Corresponding intrinsic to wasm's [`memory.atomic.wait32` instruction][instr]
|
||||
///
|
||||
/// This function, when called, will block the current thread if the memory
|
||||
/// pointed to by `ptr` is equal to `expression` (performing this action
|
||||
/// atomically).
|
||||
///
|
||||
/// The argument `timeout_ns` is a maxinum number of nanoseconds the calling
|
||||
/// thread will be blocked for, if it blocks. If the timeout is negative then
|
||||
/// the calling thread will be blocked forever.
|
||||
///
|
||||
/// The calling thread can only be woken up with a call to the `wake` intrinsic
|
||||
/// once it has been blocked. Changing the memory behind `ptr` will not wake
|
||||
/// the thread once it's blocked.
|
||||
///
|
||||
/// # Return value
|
||||
///
|
||||
/// * 0 - indicates that the thread blocked and then was woken up
|
||||
/// * 1 - the loaded value from `ptr` didn't match `expression`, the thread
|
||||
/// didn't block
|
||||
/// * 2 - the thread blocked, but the timeout expired.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// This intrinsic is only available **when the standard library itself is
|
||||
/// compiled with the `atomics` target feature**. This version of the standard
|
||||
/// library is not obtainable via `rustup`, but rather will require the
|
||||
/// standard library to be compiled from source.
|
||||
///
|
||||
/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr("i32.atomic.wait"))]
|
||||
pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 {
|
||||
llvm_atomic_wait_i32(ptr, expression, timeout_ns)
|
||||
}
|
||||
|
||||
/// Corresponding intrinsic to wasm's [`memory.atomic.wait64` instruction][instr]
|
||||
///
|
||||
/// This function, when called, will block the current thread if the memory
|
||||
/// pointed to by `ptr` is equal to `expression` (performing this action
|
||||
/// atomically).
|
||||
///
|
||||
/// The argument `timeout_ns` is a maxinum number of nanoseconds the calling
|
||||
/// thread will be blocked for, if it blocks. If the timeout is negative then
|
||||
/// the calling thread will be blocked forever.
|
||||
///
|
||||
/// The calling thread can only be woken up with a call to the `wake` intrinsic
|
||||
/// once it has been blocked. Changing the memory behind `ptr` will not wake
|
||||
/// the thread once it's blocked.
|
||||
///
|
||||
/// # Return value
|
||||
///
|
||||
/// * 0 - indicates that the thread blocked and then was woken up
|
||||
/// * 1 - the loaded value from `ptr` didn't match `expression`, the thread
|
||||
/// didn't block
|
||||
/// * 2 - the thread blocked, but the timeout expired.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// This intrinsic is only available **when the standard library itself is
|
||||
/// compiled with the `atomics` target feature**. This version of the standard
|
||||
/// library is not obtainable via `rustup`, but rather will require the
|
||||
/// standard library to be compiled from source.
|
||||
///
|
||||
/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr("i64.atomic.wait"))]
|
||||
pub unsafe fn memory_atomic_wait64(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 {
|
||||
llvm_atomic_wait_i64(ptr, expression, timeout_ns)
|
||||
}
|
||||
|
||||
/// Corresponding intrinsic to wasm's [`memory.atomic.notify` instruction][instr]
|
||||
///
|
||||
/// This function will notify a number of threads blocked on the address
|
||||
/// indicated by `ptr`. Threads previously blocked with the `i32_atomic_wait`
|
||||
/// and `i64_atomic_wait` functions above will be woken up.
|
||||
///
|
||||
/// The `waiters` argument indicates how many waiters should be woken up (a
|
||||
/// maximum). If the value is zero no waiters are woken up.
|
||||
///
|
||||
/// # Return value
|
||||
///
|
||||
/// Returns the number of waiters which were actually notified.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// This intrinsic is only available **when the standard library itself is
|
||||
/// compiled with the `atomics` target feature**. This version of the standard
|
||||
/// library is not obtainable via `rustup`, but rather will require the
|
||||
/// standard library to be compiled from source.
|
||||
///
|
||||
/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr("atomic.wake"))]
|
||||
pub unsafe fn memory_atomic_notify(ptr: *mut i32, waiters: u32) -> u32 {
|
||||
llvm_atomic_notify(ptr, waiters as i32) as u32
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "llvm.wasm.memory.grow.i32"]
|
||||
fn llvm_memory_grow(mem: i32, pages: i32) -> i32;
|
||||
#[link_name = "llvm.wasm.memory.size.i32"]
|
||||
fn llvm_memory_size(mem: i32) -> i32;
|
||||
}
|
||||
|
||||
/// Corresponding intrinsic to wasm's [`memory.size` instruction][instr]
|
||||
///
|
||||
/// This function, when called, will return the current memory size in units of
|
||||
/// pages. The current WebAssembly page size is 65536 bytes (64 KB).
|
||||
///
|
||||
/// The argument `mem` is the numerical index of which memory to return the
|
||||
/// size of. Note that currently the WebAssembly specification only supports one
|
||||
/// memory, so it is required that zero is passed in. The argument is present to
|
||||
/// be forward-compatible with future WebAssembly revisions. If a nonzero
|
||||
/// argument is passed to this function it will currently unconditionally abort.
|
||||
///
|
||||
/// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-size
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr("memory.size", mem = 0))]
|
||||
#[rustc_args_required_const(0)]
|
||||
#[stable(feature = "simd_wasm32", since = "1.33.0")]
|
||||
pub fn memory_size(mem: u32) -> usize {
|
||||
unsafe {
|
||||
if mem != 0 {
|
||||
crate::intrinsics::abort();
|
||||
}
|
||||
llvm_memory_size(0) as usize
|
||||
}
|
||||
}
|
||||
|
||||
/// Corresponding intrinsic to wasm's [`memory.grow` instruction][instr]
|
||||
///
|
||||
/// This function, when called, will attempt to grow the default linear memory
|
||||
/// by the specified `delta` of pages. The current WebAssembly page size is
|
||||
/// 65536 bytes (64 KB). If memory is successfully grown then the previous size
|
||||
/// of memory, in pages, is returned. If memory cannot be grown then
|
||||
/// `usize::MAX` is returned.
|
||||
///
|
||||
/// The argument `mem` is the numerical index of which memory to return the
|
||||
/// size of. Note that currently the WebAssembly specification only supports one
|
||||
/// memory, so it is required that zero is passed in. The argument is present to
|
||||
/// be forward-compatible with future WebAssembly revisions. If a nonzero
|
||||
/// argument is passed to this function it will currently unconditionally abort.
|
||||
///
|
||||
/// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-grow
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr("memory.grow", mem = 0))]
|
||||
#[rustc_args_required_const(0)]
|
||||
#[stable(feature = "simd_wasm32", since = "1.33.0")]
|
||||
pub fn memory_grow(mem: u32, delta: usize) -> usize {
|
||||
unsafe {
|
||||
if mem != 0 {
|
||||
crate::intrinsics::abort();
|
||||
}
|
||||
llvm_memory_grow(0, delta as i32) as isize as usize
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
//! WASM32 intrinsics
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
#[cfg(any(target_feature = "atomics", doc))]
|
||||
mod atomic;
|
||||
#[cfg(any(target_feature = "atomics", doc))]
|
||||
pub use self::atomic::*;
|
||||
|
||||
mod simd128;
|
||||
pub use self::simd128::*;
|
||||
|
||||
mod memory;
|
||||
pub use self::memory::*;
|
||||
|
||||
/// Generates the trap instruction `UNREACHABLE`
|
||||
#[cfg_attr(test, assert_instr(unreachable))]
|
||||
#[inline]
|
||||
#[stable(feature = "unreachable_wasm32", since = "1.37.0")]
|
||||
pub unsafe fn unreachable() -> ! {
|
||||
crate::intrinsics::abort()
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,62 @@
|
|||
//! Advanced Bit Manipulation (ABM) instructions
|
||||
//!
|
||||
//! The POPCNT and LZCNT have their own CPUID bits to indicate support.
|
||||
//!
|
||||
//! The references are:
|
||||
//!
|
||||
//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
|
||||
//! Instruction Set Reference, A-Z][intel64_ref].
|
||||
//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
|
||||
//! System Instructions][amd64_ref].
|
||||
//!
|
||||
//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
|
||||
//! available.
|
||||
//!
|
||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
||||
//! [wikipedia_bmi]:
|
||||
//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Counts the leading most significant zero bits.
|
||||
///
|
||||
/// When the operand is zero, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "lzcnt")]
|
||||
#[cfg_attr(test, assert_instr(lzcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
|
||||
x.leading_zeros()
|
||||
}
|
||||
|
||||
/// Counts the bits that are set.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "popcnt")]
|
||||
#[cfg_attr(test, assert_instr(popcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _popcnt32(x: i32) -> i32 {
|
||||
x.count_ones() as i32
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::x86::*;
|
||||
|
||||
#[simd_test(enable = "lzcnt")]
|
||||
unsafe fn test_lzcnt_u32() {
|
||||
assert_eq!(_lzcnt_u32(0b0101_1010), 25);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "popcnt")]
|
||||
unsafe fn test_popcnt32() {
|
||||
assert_eq!(_popcnt32(0b0101_1010), 4);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue