Learn Zig Series (#35) - Cross-Compilation and Target Triples

@scipio 69

11 days ago

StemSocial

Learn Zig Series (#35) - Cross-Compilation and Target Triples

What will I learn

You will learn how to write solutions for the Episode 34 exercises;
You will learn what target triples are and how Zig names its compilation targets;
You will learn cross-compiling from one OS to another with a single command;
You will learn building for ARM, RISC-V, and WebAssembly targets;
You will learn how Zig bundles libc for cross-compilation without external toolchains;
You will learn conditional compilation with @import("builtin");
You will learn platform-specific code paths using switch on the OS tag;
You will learn a practical example: building one CLI tool for 4 platforms from a single machine.

Requirements

A working modern computer running macOS, Windows or Ubuntu;
An installed Zig 0.14+ distribution (download from ziglang.org);
The ambition to learn Zig programming.

Difficulty

Intermediate

Curriculum (of the `Learn Zig Series`):

Learn Zig Series (#35) - Cross-Compilation and Target Triples

Solutions to Episode 34 Exercises

Exercise 1 - Linear search vs binary search benchmark:

const std = @import("std");

fn linearSearch(data: []const u32, target: u32) bool {
    for (data) |val| {
        if (val == target) return true;
    }
    return false;
}

fn binarySearch(data: []const u32, target: u32) bool {
    var lo: usize = 0;
    var hi: usize = data.len;
    while (lo < hi) {
        const mid = lo + (hi - lo) / 2;
        if (data[mid] == target) return true;
        if (data[mid] < target) {
            lo = mid + 1;
        } else {
            hi = mid;
        }
    }
    return false;
}

pub fn main() !void {
    const allocator = std.heap.page_allocator;
    const n: usize = 1_000_000;

    const data = try allocator.alloc(u32, n);
    defer allocator.free(data);
    for (data, 0..) |*slot, i| {
        slot.* = @intCast(i * 3);
    }

    // generate 10,000 random targets
    var prng = std.Random.DefaultPrng.init(42);
    var targets: [10_000]u32 = undefined;
    for (&targets) |*t| {
        t.* = prng.random().intRangeAtMost(u32, 0, n * 3);
    }

    // benchmark linear
    var best_linear: u64 = std.math.maxInt(u64);
    for (0..5) |_| {
        var timer = try std.time.Timer.start();
        var found: u32 = 0;
        for (targets) |t| {
            if (linearSearch(data, t)) found += 1;
        }
        const elapsed = timer.read();
        std.mem.doNotOptimizeAway(found);
        if (elapsed < best_linear) best_linear = elapsed;
    }

    // benchmark binary
    var best_binary: u64 = std.math.maxInt(u64);
    for (0..5) |_| {
        var timer = try std.time.Timer.start();
        var found: u32 = 0;
        for (targets) |t| {
            if (binarySearch(data, t)) found += 1;
        }
        const elapsed = timer.read();
        std.mem.doNotOptimizeAway(found);
        if (elapsed < best_binary) best_binary = elapsed;
    }

    std.debug.print("linear: {d:.2} ms\n", .{@as(f64, @floatFromInt(best_linear)) / 1e6});
    std.debug.print("binary: {d:.2} ms\n", .{@as(f64, @floatFromInt(best_binary)) / 1e6});
    std.debug.print("ratio:  {d:.1}x\n", .{@as(f64, @floatFromInt(best_linear)) / @as(f64, @floatFromInt(best_binary))});
}

Linear search touches every element until it finds the target (O(n)), binary search halves the range on every step (O(log n)). With 1M sorted elements, binary search wins by roughly 3 orders of magnitude because log2(1,000,000) is about 20 comparisons vs up to 1,000,000. Binary search starts winning at surprisingly small sizes -- even around 32-64 elements it's competitive, and by 128 elements it's consistently faster. The key prerequisite is that the data must be sorted, which is why we initialized data[i] = i * 3 in ascending order.

Exercise 2 - SoA particle simulation with bit-packed active flag:

const std = @import("std");

const ParticleAoS = struct {
    x: f32, y: f32,
    vx: f32, vy: f32,
    active: bool,
};

const ParticlesSoA = struct {
    x: []f32, y: []f32,
    vx: []f32, vy: []f32,
    active: []bool,
};

fn updateAoS(parts: []ParticleAoS, dt: f32) void {
    for (parts) |*p| {
        if (p.active) {
            p.x += p.vx * dt;
            p.y += p.vy * dt;
        }
    }
}

fn updateSoA(s: ParticlesSoA, dt: f32) void {
    for (s.x, s.vx, s.active) |*x, vx, a| {
        if (a) x.* += vx * dt;
    }
    for (s.y, s.vy, s.active) |*y, vy, a| {
        if (a) y.* += vy * dt;
    }
}

fn countActiveAoS(parts: []const ParticleAoS) u64 {
    var c: u64 = 0;
    for (parts) |p| {
        if (p.active) c += 1;
    }
    return c;
}

fn countActiveSoA(active: []const bool) u64 {
    var c: u64 = 0;
    for (active) |a| {
        if (a) c += 1;
    }
    return c;
}

pub fn main() !void {
    const count = 2_000_000;
    const allocator = std.heap.page_allocator;

    // AoS
    const aos = try allocator.alloc(ParticleAoS, count);
    defer allocator.free(aos);
    var prng = std.Random.DefaultPrng.init(99);
    for (aos) |*p| {
        p.* = .{ .x = 1.0, .y = 2.0, .vx = 0.5, .vy = -0.3, .active = prng.random().boolean() };
    }

    // SoA
    var soa: ParticlesSoA = undefined;
    soa.x = try allocator.alloc(f32, count);
    soa.y = try allocator.alloc(f32, count);
    soa.vx = try allocator.alloc(f32, count);
    soa.vy = try allocator.alloc(f32, count);
    soa.active = try allocator.alloc(bool, count);
    defer {
        allocator.free(soa.x);
        allocator.free(soa.y);
        allocator.free(soa.vx);
        allocator.free(soa.vy);
        allocator.free(soa.active);
    }
    for (soa.x) |*v| v.* = 1.0;
    for (soa.y) |*v| v.* = 2.0;
    for (soa.vx) |*v| v.* = 0.5;
    for (soa.vy) |*v| v.* = -0.3;
    for (soa.active, 0..) |*v, i| v.* = aos[i].active;

    // bench update
    var best_aos: u64 = std.math.maxInt(u64);
    for (0..20) |_| {
        var t = try std.time.Timer.start();
        updateAoS(aos, 0.016);
        const e = t.read();
        if (e < best_aos) best_aos = e;
    }
    var best_soa: u64 = std.math.maxInt(u64);
    for (0..20) |_| {
        var t = try std.time.Timer.start();
        updateSoA(soa, 0.016);
        const e = t.read();
        if (e < best_soa) best_soa = e;
    }

    std.debug.print("update AoS: {d:.2} ms\n", .{@as(f64, @floatFromInt(best_aos)) / 1e6});
    std.debug.print("update SoA: {d:.2} ms\n", .{@as(f64, @floatFromInt(best_soa)) / 1e6});
    std.debug.print("count AoS:  {d}\n", .{countActiveAoS(aos)});
    std.debug.print("count SoA:  {d}\n", .{countActiveSoA(soa.active)});
}

The SoA layout wins for updatePositions because the hot loop only touches x, vx, and active -- three contiguous arrays vs a struct where those fields are interleaved with y, vy. The countActive comparison is even more dramatic in SoA: one packed bool array vs scanning 20-byte structs just to read a single flag. Regarding the bit-packed variant with ArrayBitSet -- in practice it can be slightly faster for countActive (popcount over packed bits is very efficient) but slightly slower for the conditional update because extracting individual bits has more overhead than reading []bool. The tradeoff depends on whether you're memory-bound (bit packing helps) or compute-bound (direct bool access helps).

Exercise 3 - Naive vs transposed vs tiled matrix multiply:

const std = @import("std");

fn matmulNaive(a: []const f64, b: []const f64, c: []f64, n: usize) void {
    for (0..n) |i| {
        for (0..n) |j| {
            var sum: f64 = 0;
            for (0..n) |k| sum += a[i * n + k] * b[k * n + j];
            c[i * n + j] = sum;
        }
    }
}

fn matmulTransposed(a: []const f64, b: []const f64, c: []f64, bt: []f64, n: usize) void {
    // transpose b into bt
    for (0..n) |i| {
        for (0..n) |j| bt[j * n + i] = b[i * n + j];
    }
    for (0..n) |i| {
        for (0..n) |j| {
            var sum: f64 = 0;
            for (0..n) |k| sum += a[i * n + k] * bt[j * n + k];
            c[i * n + j] = sum;
        }
    }
}

fn matmulTiled(a: []const f64, b: []const f64, c: []f64, n: usize) void {
    const tile = 32;
    @memset(c, 0);
    var ii: usize = 0;
    while (ii < n) : (ii += tile) {
        var jj: usize = 0;
        while (jj < n) : (jj += tile) {
            var kk: usize = 0;
            while (kk < n) : (kk += tile) {
                const i_end = @min(ii + tile, n);
                const j_end = @min(jj + tile, n);
                const k_end = @min(kk + tile, n);
                for (ii..i_end) |i| {
                    for (kk..k_end) |k| {
                        const a_ik = a[i * n + k];
                        for (jj..j_end) |j| {
                            c[i * n + j] += a_ik * b[k * n + j];
                        }
                    }
                }
            }
        }
    }
}

pub fn main() !void {
    const allocator = std.heap.page_allocator;
    inline for ([_]usize{ 128, 256, 512 }) |n| {
        const a = try allocator.alloc(f64, n * n);
        const b = try allocator.alloc(f64, n * n);
        const c = try allocator.alloc(f64, n * n);
        const bt = try allocator.alloc(f64, n * n);
        defer {
            allocator.free(a);
            allocator.free(b);
            allocator.free(c);
            allocator.free(bt);
        }
        var prng = std.Random.DefaultPrng.init(42);
        for (a) |*v| v.* = prng.random().float(f64);
        for (b) |*v| v.* = prng.random().float(f64);

        const iters = if (n <= 256) 10 else 3;

        var best_naive: u64 = std.math.maxInt(u64);
        for (0..iters) |_| {
            var t = try std.time.Timer.start();
            matmulNaive(a, b, c, n);
            std.mem.doNotOptimizeAway(c.ptr);
            const e = t.read();
            if (e < best_naive) best_naive = e;
        }
        var best_trans: u64 = std.math.maxInt(u64);
        for (0..iters) |_| {
            var t = try std.time.Timer.start();
            matmulTransposed(a, b, c, bt, n);
            std.mem.doNotOptimizeAway(c.ptr);
            const e = t.read();
            if (e < best_trans) best_trans = e;
        }
        var best_tiled: u64 = std.math.maxInt(u64);
        for (0..iters) |_| {
            var t = try std.time.Timer.start();
            matmulTiled(a, b, c, n);
            std.mem.doNotOptimizeAway(c.ptr);
            const e = t.read();
            if (e < best_tiled) best_tiled = e;
        }

        std.debug.print("n={d}: naive={d:.2}ms trans={d:.2}ms tiled={d:.2}ms\n", .{
            n,
            @as(f64, @floatFromInt(best_naive)) / 1e6,
            @as(f64, @floatFromInt(best_trans)) / 1e6,
            @as(f64, @floatFromInt(best_tiled)) / 1e6,
        });
    }
}

The naive version suffers from terrible cache behavior in the inner loop: b[k * n + j] strides through column elements of b, jumping n * 8 bytes per iteration -- which is a cache miss on every access at larger sizes. The transposed version fixes this by making the b access contiguous (bt[j * n + k]). The tiled version keeps 32x32 blocks in L1 cache, reducing memory traffic dramatically for n=512. At size 128 all three are similar (1281288 = 128KB fits in L1), at 256 you see the first divergence, and at 512 the tiled version is typically 2-4x faster than naive. LLVM does vectorize the inner j loop in the tiled version -- you can confirm by checking for vfmadd or vmulpd/vaddpd instructions in the objdump output.

Alright, today we're leaving the single-machine world behind. Everything we've built so far -- the data structures, the performance-tuned code, the state machines, the memory-mapped I/O -- all of that was compiled for whatever machine you happened to be sitting at. But one of Zig's most celebrated features is that it can compile for virtually ANY target platform from ANY host platform, with zero external toolchains required. No installing cross-compilers. No docker containers with ARM sysroots. No struggling with apt-get install gcc-aarch64-linux-gnu. Just Zig ;-)

If you've ever tried cross-compiling a C or C++ project -- say, building an ARM Linux binary on your x86 Mac -- you know the pain. You need a cross-compiler, the right libc headers, matching shared libraries, and usually quite some voodoo with pkg-config paths and sysroot directories. Zig ships all of this BUILT IN, and it's not some second-class feature -- it's a core design goal of the language.

What target triples are: arch-os-abi

When you compile code, the compiler needs to know three things about the target machine: what CPU architecture it runs (x86_64, aarch64, riscv64, etc.), what operating system it uses (linux, macos, windows, freestanding, etc.), and what ABI (Application Binary Interface) it expects (gnu, musl, msvc, eabi, etc.).

Zig combines these into a target string formatted as arch-os-abi. Some examples:

x86_64-linux-gnu -- 64-bit Intel/AMD Linux with glibc
x86_64-linux-musl -- 64-bit Intel/AMD Linux with musl libc
aarch64-linux-gnu -- 64-bit ARM Linux (like Raspberry Pi 4, AWS Graviton)
aarch64-macos-none -- Apple Silicon macOS
x86_64-windows-gnu -- 64-bit Windows with MinGW ABI
riscv64-linux-gnu -- 64-bit RISC-V Linux
wasm32-freestanding-none -- WebAssembly (no OS)

You can see the full list of supported targets by running:

zig targets | python3 -m json.tool | less

This dumps a JSON blob with every supported arch, OS, and ABI combination. The list is enormous -- Zig supports over 60 architectures and dozens of OS/ABI combinations. Not all combos are valid (you can't have x86_64-macos-gnu because macOS doesn't use glibc), but the valid combinations cover basically every platform you'd want to target.

The ABI part is the one most people find confusing. On Linux, you mostly care about two choices: gnu (glibc -- the standard C library on most distros) and musl (a minimal, statically-linkable alternative). Programs linked against musl produce fully static binaries with zero runtime dependencies -- you can literally scp the binary to any Linux machine and it runs. Programs linked against gnu typically need the matching glibc version on the target, which is why you sometimes see errors like "GLIBC_2.34 not found" when running binaries on older distros. For maximum portability on Linux, musl is almost always what you want for Zig programs.

On Windows the ABI is gnu (MinGW-style) or msvc (Visual Studio-style). Zig can target both but gnu doesn't require the MSVC runtime DLLs, so it's simpler for standalone tools.

Cross-compiling from one OS to another

Here's the part that blows people's minds if they're coming from C/C++. Let's say you're on a Linux x86_64 machine and you want to build a binary for macOS on Apple Silicon. In C, you'd need to install a cross-compiler toolchain, get the macOS SDK headers, configure your build system to find everything, and pray. In Zig:

zig build-exe main.zig -target aarch64-macos-none

That's it. One flag. Zig produces a Mach-O binary that runs natively on Apple Silicon Macs. No SDK needed, no Xcode, no additional downloads. Zig ships with the necessary header definitions and linker support built in.

Going the other direction works too. On your Mac:

zig build-exe main.zig -target x86_64-linux-musl

You now have a static Linux binary. Copy it to any x86_64 Linux box via scp and run it. No dependencies whatsoever because musl was linked statically.

Let's write a simple program to demonstrate this in practice:

const std = @import("std");
const builtin = @import("builtin");

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();
    try stdout.print("Hello from Zig!\n", .{});
    try stdout.print("  arch:   {s}\n", .{@tagName(builtin.cpu.arch)});
    try stdout.print("  os:     {s}\n", .{@tagName(builtin.os.tag)});
    try stdout.print("  abi:    {s}\n", .{@tagName(builtin.abi)});
    try stdout.print("  endian: {s}\n", .{if (builtin.cpu.arch.endian() == .little) "little" else "big"});
}

Compile this for multiple targets from a single machine:

zig build-exe hello.zig -target x86_64-linux-musl -O ReleaseSafe
zig build-exe hello.zig -target aarch64-linux-musl -O ReleaseSafe
zig build-exe hello.zig -target x86_64-windows-gnu -O ReleaseSafe
zig build-exe hello.zig -target aarch64-macos-none -O ReleaseSafe

Each command produces a binary for the respective platform. The builtin import (which we covered conceptually in episode 9 on comptime) gives you compile-time access to the target's architecture, OS, and ABI. The values are baked into the binary at compile time -- there's no runtime detection happening.

Building for ARM, RISC-V, and WebAssembly

ARM is everywhere now. Raspberry Pis, AWS Graviton servers, Apple Silicon Macs, Android phones, countless embedded boards. RISC-V is the rising open-source architecture showing up in development boards and slowly in production hardware. And WebAssembly lets you run native-speed code in browsers and edge runtimes. Zig targets all three.

ARM (aarch64):

zig build-exe myapp.zig -target aarch64-linux-musl -O ReleaseFast

This produces a static ARM64 Linux binary. If you have a Raspberry Pi 4 (or newer) with a 64-bit OS, this binary runs directly on it. If you have an older 32-bit ARM board, use arm-linux-musleabihf instead (the hf means hardware floating point).

RISC-V:

zig build-exe myapp.zig -target riscv64-linux-musl -O ReleaseFast

RISC-V support in Zig is solid for the base integer instruction set. If you have a RISC-V development board (like the SiFive HiFive or StarFive VisionFive), you can build and run binaries directly. No external toolchains necessary.

WebAssembly:

WebAssembly is a bit different because there's no operating system -- it runs in a sandbox (browser, Node.js, Wasmtime, etc.). Zig targets it as wasm32-freestanding-none:

// wasm_example.zig
export fn add(a: i32, b: i32) i32 {
    return a + b;
}

export fn fibonacci(n: u32) u32 {
    if (n <= 1) return n;
    var prev: u32 = 0;
    var curr: u32 = 1;
    for (2..n + 1) |_| {
        const next = prev +% curr;
        prev = curr;
        curr = next;
    }
    return curr;
}

Compile to Wasm:

zig build-lib wasm_example.zig -target wasm32-freestanding-none -O ReleaseSmall -dynamic

This produces a .wasm file you can load in a browser:

<script>
  fetch('wasm_example.wasm')
    .then(r => r.arrayBuffer())
    .then(bytes => WebAssembly.instantiate(bytes))
    .then(obj => {
      console.log('add(3, 4) =', obj.instance.exports.add(3, 4));
      console.log('fib(10) =', obj.instance.exports.fibonacci(10));
    });
</script>

The export keyword in Zig (different from pub -- pub is Zig-module-visible, export is linker-visible) marks functions as available to the Wasm host. The -dynamic flag tells Zig to produce a shared library (which is what .wasm modules are). -O ReleaseSmall minimizes binary size, which matters a lot for Wasm since the module gets downloaded over the network.

Zig's bundled libc for cross-compilation

This is what makes Zig's cross-compilation story fundamentally different from every other compiled language. When you cross-compile a C program for aarch64-linux-gnu, you need matching glibc headers and libraries for that target. Getting those headers is the hard part -- you either build them from source (painful), extract them from a Docker image (hacky), or use a pre-packaged sysroot (limited availability).

Zig doesn't have this problem because it ships glibc, musl, and several other C libraries as source code and compiles them on-the-fly for your target. When you compile Zig code that uses libc (directly or through the standard library), Zig's build system compiles exactly the libc components you need for the target platform, links them in, and produces a working binary.

This means Zig is also a surprisingly competent C cross-compiler. You can compile C code for any Zig-supported target:

zig cc -target aarch64-linux-musl -O2 hello.c -o hello_arm

zig cc acts as a drop-in replacement for gcc or clang, but with Zig's cross-compilation superpowers. Many C/C++ projects can be cross-compiled just by setting CC=zig cc in their Makefiles. Some larger open-source projects (like Redis and SQLite) have been cross-compiled this way with zero modifications.

For Zig programs specifically, when you link against libc (via const c = @cImport(@cInclude("stdio.h")) or by using C libraries as we did in episode 27), the bundled libc is compiled automatically for the target. You don't even think about it -- it just works.

The practical implication is huge: you can set up a single CI machine (say, a Linux x86_64 box in the cloud) and use it to build release binaries for Linux (x86, ARM, RISC-V), macOS (x86, ARM), Windows (x86), and WebAssembly -- all from one build.zig script, all in one pipeline, with zero external dependencies beyond the Zig compiler itself.

Conditional compilation with `@import("builtin")`

When writing cross-platform code, you often need different behavior depending on the target. Zig handles this entirely at compile time using @import("builtin") -- a special module that the compiler injects with information about the current build target.

const std = @import("std");
const builtin = @import("builtin");

const cache_line_size: usize = switch (builtin.cpu.arch) {
    .x86_64 => 64,
    .aarch64 => 128,  // Apple M-series and some ARM chips use 128-byte lines
    .riscv64 => 64,
    else => 64,       // conservative default
};

const default_allocator = if (builtin.os.tag == .freestanding)
    @compileError("freestanding targets must provide their own allocator")
else if (builtin.os.tag == .windows)
    std.heap.page_allocator
else
    std.heap.page_allocator;

pub fn getConfigDir(buf: []u8) ![]const u8 {
    if (builtin.os.tag == .windows) {
        return std.process.getEnvVarOwned(std.heap.page_allocator, "APPDATA") catch
            return "C:\\Users\\Default\\AppData\\Roaming";
    } else if (builtin.os.tag == .macos) {
        const home = std.posix.getenv("HOME") orelse "/tmp";
        return std.fmt.bufPrint(buf, "{s}/Library/Application Support", .{home});
    } else {
        // Linux and other POSIX
        if (std.posix.getenv("XDG_CONFIG_HOME")) |xdg| return xdg;
        const home = std.posix.getenv("HOME") orelse "/tmp";
        return std.fmt.bufPrint(buf, "{s}/.config", .{home});
    }
}

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();
    try stdout.print("cache line size for {s}: {d} bytes\n", .{
        @tagName(builtin.cpu.arch),
        cache_line_size,
    });

    var buf: [512]u8 = undefined;
    const config = try getConfigDir(&buf);
    try stdout.print("config dir: {s}\n", .{config});
}

The critical thing to understand is that builtin.os.tag, builtin.cpu.arch, and builtin.abi are all comptime-known values. When you write if (builtin.os.tag == .windows), the compiler evaluates this condition at compile time. In a Linux build, the entire Windows branch is completely eliminated from the binary -- not just skipped at runtime, but never compiled at all. This means you can have platform-specific code that references headers or APIs that don't exist on other platforms, and it won't cause errors as long as it's behind the correct comptime branch.

This is a massive advantage over C's #ifdef preprocessor approach. In C, conditional compilation uses string-based preprocessor directives that don't participate in the type system. In Zig, it's just regular if and switch on typed comptime values, with full IDE support, type checking, and refactoring tools.

Platform-specific code paths with `switch (builtin.os.tag)`

For more complex platform differences, a switch on the OS tag (or CPU arch) is cleaner than nested if-else chains:

const std = @import("std");
const builtin = @import("builtin");

const PlatformTimer = struct {
    start_ns: i128,

    pub fn now() PlatformTimer {
        return .{ .start_ns = std.time.nanoTimestamp() };
    }

    pub fn elapsedMs(self: PlatformTimer) f64 {
        const end = std.time.nanoTimestamp();
        return @as(f64, @floatFromInt(end - self.start_ns)) / 1_000_000.0;
    }
};

fn getPlatformInfo() struct { name: []const u8, sep: u8, line_ending: []const u8 } {
    return switch (builtin.os.tag) {
        .linux => .{
            .name = "Linux",
            .sep = '/',
            .line_ending = "\n",
        },
        .macos => .{
            .name = "macOS",
            .sep = '/',
            .line_ending = "\n",
        },
        .windows => .{
            .name = "Windows",
            .sep = '\\',
            .line_ending = "\r\n",
        },
        else => .{
            .name = "Unknown",
            .sep = '/',
            .line_ending = "\n",
        },
    };
}

fn getPageSize() usize {
    return switch (builtin.os.tag) {
        .linux, .macos => std.mem.page_size,
        .windows => 4096,
        else => 4096,
    };
}

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();
    const info = getPlatformInfo();

    try stdout.print("Running on {s}\n", .{info.name});
    try stdout.print("Path separator: '{c}'\n", .{info.sep});
    try stdout.print("Page size: {d} bytes\n", .{getPageSize()});
    try stdout.print("Pointer size: {d} bytes\n", .{@sizeOf(usize)});
    try stdout.print("Endianness: {s}\n", .{
        if (builtin.cpu.arch.endian() == .little) "little-endian" else "big-endian",
    });

    // Demo timing
    const timer = PlatformTimer.now();
    var sum: u64 = 0;
    for (0..10_000_000) |i| sum +%= i;
    std.mem.doNotOptimizeAway(sum);
    try stdout.print("10M iterations: {d:.2} ms\n", .{timer.elapsedMs()});
}

Because switch (builtin.os.tag) is evaluated at comptime, the compiler eliminates all branches that don't match the target platform. The resulting binary only contains the code for the platform it was built for. This gives you the convenience of a single source file that handles multiple platforms, with zero runtime overhead for the platform dispatch.

Note how this works naturally with Zig's error handling and type system. You don't need special macro languages or conditional compilation pragmas. It's just normal Zig code that happens to branch on comptime-known values. As we saw in episode 15 when we looked at build.zig, the same builtin values are available in build scripts too -- your build logic can add platform-specific source files, flags, or libraries based on the target.

Testing cross-compiled binaries with QEMU

Building for a diffrent platform is great, but how do you actually TEST those binaries without the physical hardware? QEMU to the rescue. QEMU is a machine emulator that can run binaries compiled for other architectures. For our purposes, QEMU's "user mode emulation" is the interesting bit -- it translates foreign instruction sets on-the-fly so you can run, say, an ARM binary on your x86 Linux machine.

Install QEMU user-mode emulators:

# Ubuntu/Debian
sudo apt install qemu-user qemu-user-binfmt

# After installing, ARM and RISC-V binaries "just run" transparently
# because binfmt_misc registers the handlers with the kernel

Now build and test:

zig build-exe hello.zig -target aarch64-linux-musl -O ReleaseSafe
qemu-aarch64 ./hello

zig build-exe hello.zig -target riscv64-linux-musl -O ReleaseSafe
qemu-riscv64 ./hello

With qemu-user-binfmt installed, the kernel intercepts foreign binaries automatically, so you can even run them directly without the qemu-aarch64 prefix:

zig build-exe hello.zig -target aarch64-linux-musl
./hello    # kernel detects it's ARM, routes through QEMU transparently

This makes it practical to run your test suite against cross-compiled builds. In your build.zig you can configure test steps that use QEMU as the test runner:

const std = @import("std");

pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});

    const exe = b.addExecutable(.{
        .name = "myapp",
        .root_source_file = b.path("src/main.zig"),
        .target = target,
        .optimize = optimize,
    });
    b.installArtifact(exe);

    const unit_tests = b.addTest(.{
        .root_source_file = b.path("src/main.zig"),
        .target = target,
        .optimize = optimize,
    });

    const run_tests = b.addRunArtifact(unit_tests);
    const test_step = b.step("test", "Run unit tests");
    test_step.dependOn(&run_tests.step);
}

Then run:

zig build test -Dtarget=aarch64-linux-musl

Zig's build system detects the foreign target and automatically invokes QEMU to run the test binary. Your tests run on emulated ARM without any additional configuration. The performance won't match native hardware (QEMU user-mode is roughly 5-10x slower than native), but it's fast enough for unit tests and integration tests. Save the real hardware testing for benchmarks and final validation.

Practical example: building a CLI tool for 4 platforms from one machine

Let's tie everything together. We'll write a small but real CLI tool -- a file hashing utility that reads a file and prints its SHA-256 hash -- and build it for four platforms in one go.

const std = @import("std");
const builtin = @import("builtin");

const version = "1.0.0";

fn hashFile(path: []const u8) ![64]u8 {
    const file = try std.fs.cwd().openFile(path, .{});
    defer file.close();

    var hasher = std.crypto.hash.sha2.Sha256.init(.{});
    var buf: [8192]u8 = undefined;

    while (true) {
        const n = try file.read(&buf);
        if (n == 0) break;
        hasher.update(buf[0..n]);
    }

    var digest: [32]u8 = undefined;
    hasher.final(&digest);

    var hex: [64]u8 = undefined;
    for (digest, 0..) |byte, i| {
        const hi = byte >> 4;
        const lo = byte & 0x0f;
        hex[i * 2] = if (hi < 10) '0' + hi else 'a' + hi - 10;
        hex[i * 2 + 1] = if (lo < 10) '0' + lo else 'a' + lo - 10;
    }
    return hex;
}

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();
    var args = std.process.args();
    _ = args.next(); // skip program name

    const path = args.next() orelse {
        try stdout.print("zighash v{s} ({s}-{s})\n", .{
            version,
            @tagName(builtin.cpu.arch),
            @tagName(builtin.os.tag),
        });
        try stdout.print("Usage: zighash <file>\n", .{});
        return;
    };

    const hex = hashFile(path) catch |err| {
        try stdout.print("error: {s}: {s}\n", .{ path, @errorName(err) });
        return;
    };
    try stdout.print("{s}  {s}\n", .{ &hex, path });
}

Now the build script that targets all four platforms:

const std = @import("std");

const targets: []const std.Target.Query = &.{
    .{ .cpu_arch = .x86_64, .os_tag = .linux, .abi = .musl },
    .{ .cpu_arch = .aarch64, .os_tag = .linux, .abi = .musl },
    .{ .cpu_arch = .x86_64, .os_tag = .windows, .abi = .gnu },
    .{ .cpu_arch = .aarch64, .os_tag = .macos },
};

pub fn build(b: *std.Build) void {
    const optimize = b.standardOptimizeOption(.{});

    for (targets) |t| {
        const name = b.fmt("zighash-{s}-{s}", .{
            @tagName(t.cpu_arch.?),
            @tagName(t.os_tag.?),
        });

        const exe = b.addExecutable(.{
            .name = name,
            .root_source_file = b.path("src/main.zig"),
            .target = b.resolveTargetQuery(t),
            .optimize = optimize,
        });
        b.installArtifact(exe);
    }
}

Build everything with one command:

zig build -Doptimize=ReleaseSafe

After this completes, zig-out/bin/ contains:

zighash-x86_64-linux
zighash-aarch64-linux
zighash-x86_64-windows.exe
zighash-aarch64-macos

Four platform-native binaries from one source file, one build command, one machine. No Docker, no VMs, no cross-compiler packages, no CI matrix of different OS runners. Just Zig.

The Linux builds are statically linked against musl, so they run on any Linux distro without shared library dependencies. The Windows build uses the MinGW ABI, so it runs without needing MSVC redistributables. The macOS build produces a Mach-O binary for Apple Silicon.

This is the pattern you'd use for release automation. Your CI pipeline runs zig build once, collects the four binaries from zig-out/bin/, and uploads them as release assets. If you need to support more platforms (32-bit ARM for older Raspberry Pis, RISC-V, FreeBSD, etc.), just add more entries to the targets array.

Wat we geleerd hebben

Target triples follow the arch-os-abi convention -- Zig supports over 60 architectures and dozens of OS/ABI combos out of the box
Cross-compiling in Zig is a single -target flag, no external toolchains needed -- build Linux ARM binaries on macOS, Windows binaries on Linux, anything from anywhere
WebAssembly is targeted as wasm32-freestanding-none with export functions and -dynamic linking
Zig bundles glibc and musl as source and compiles them on-the-fly for the target, which is also why zig cc works as a C cross-compiler
@import("builtin") gives you comptime access to the target's arch, OS, and ABI -- conditional branches on these values are eliminated entirely by the compiler, zero runtime cost
switch (builtin.os.tag) is the idiomatic way to write platform-specific code in Zig -- no preprocessor macros, just normal typed expressions
QEMU user-mode emulation lets you run and test cross-compiled binaries on your dev machine -- Zig's build system integrates with it automatically
A single build.zig can define targets for every platform, producing all binaries in one zig build invocation

Exercises

Write a program that prints detaled system information: CPU architecture, OS, ABI, pointer size, endianness, page size, and (on Linux/macOS) the value of the HOME environment variable or (on Windows) the USERPROFILE variable. Cross-compile it for x86_64-linux-musl, aarch64-linux-musl, and x86_64-windows-gnu. If you have QEMU installed, run the ARM binary and verify it reports aarch64 as its architecture.
Create a build.zig that builds a small library (a string utility with contains, trimLeft, trimRight, and toUpper functions) for 3 targets: native, aarch64-linux-musl, and wasm32-freestanding-none. The Wasm target should export all four functions. Write tests that run for the native and ARM targets (using QEMU for ARM). Verify the .wasm output exists after building.
Write a program that uses switch (builtin.os.tag) to implement a listDir function that prints directory contents. On Linux and macOS, use std.fs.Dir.iterate(). On Windows, also use std.fs.Dir.iterate() but prefix the output with the drive letter from std.fs.cwd().realpathAlloc(). On freestanding, make it a @compileError. Cross-compile for all three OS targets and verify that the freestanding build fails with your custom error message while the others compile successfully.

Bedankt en tot de volgende keer!

@scipio

stem stemsocial steemstem zig programming

0.000

0 comments

Learn Zig Series (#35) - Cross-Compilation and Target Triples

Learn Zig Series (#35) - Cross-Compilation and Target Triples

What will I learn

Requirements

Difficulty

Curriculum (of the Learn Zig Series):

Learn Zig Series (#35) - Cross-Compilation and Target Triples

Solutions to Episode 34 Exercises

What target triples are: arch-os-abi

Cross-compiling from one OS to another

Building for ARM, RISC-V, and WebAssembly

Zig's bundled libc for cross-compilation

Conditional compilation with @import("builtin")

Platform-specific code paths with switch (builtin.os.tag)

Testing cross-compiled binaries with QEMU

Practical example: building a CLI tool for 4 platforms from one machine

Wat we geleerd hebben

Exercises

Curriculum (of the `Learn Zig Series`):

Conditional compilation with `@import("builtin")`

Platform-specific code paths with `switch (builtin.os.tag)`