Learn Zig Series (#36) - Mini Project: CLI Task Runner

@scipio 69

11 days ago

StemSocial

Learn Zig Series (#36) - Mini Project: CLI Task Runner

What will I learn

You will learn how to write solutions for the Episode 35 exercises;
You will learn building a make-like task runner entirely in Zig;
You will learn defining tasks with names, dependencies, and shell commands;
You will learn dependency resolution using topological sort (Kahn's algorithm);
You will learn spawning child processes with std.process.Child;
You will learn capturing stdout/stderr from subprocesses;
You will learn designing a human-readable task file format and parsing it;
You will learn tying together file I/O, process management, data structures, and CLI argument parsing into one coherent tool.

Requirements

A working modern computer running macOS, Windows or Ubuntu;
An installed Zig 0.14+ distribution (download from ziglang.org);
The ambition to learn Zig programming.

Difficulty

Intermediate

Curriculum (of the `Learn Zig Series`):

Learn Zig Series (#36) - Mini Project: CLI Task Runner

Solutions to Episode 35 Exercises

Exercise 1 - Cross-platform system info printer:

const std = @import("std");
const builtin = @import("builtin");

pub fn main() !void {
    const stdout = std.io.getStdOut().writer();

    try stdout.print("=== System Information ===\n", .{});
    try stdout.print("Architecture: {s}\n", .{@tagName(builtin.cpu.arch)});
    try stdout.print("OS:           {s}\n", .{@tagName(builtin.os.tag)});
    try stdout.print("ABI:          {s}\n", .{@tagName(builtin.abi)});
    try stdout.print("Pointer size: {d} bytes\n", .{@sizeOf(usize)});
    try stdout.print("Endianness:   {s}\n", .{
        if (builtin.cpu.arch.endian() == .little) "little-endian" else "big-endian",
    });
    try stdout.print("Page size:    {d} bytes\n", .{std.mem.page_size});

    if (builtin.os.tag == .windows) {
        if (std.process.getEnvVarOwned(std.heap.page_allocator, "USERPROFILE")) |profile| {
            defer std.heap.page_allocator.free(profile);
            try stdout.print("Home dir:     {s}\n", .{profile});
        } else |_| {
            try stdout.print("Home dir:     (not set)\n", .{});
        }
    } else {
        const home = std.posix.getenv("HOME") orelse "(not set)";
        try stdout.print("Home dir:     {s}\n", .{home});
    }
}

The key insight is that builtin.os.tag == .windows is evaluated at compile time, so the Windows branch referencing USERPROFILE is never compiled into a Linux or macOS binary, and vice versa. When you cross-compile with -target aarch64-linux-musl and run it under QEMU, it correctly reports aarch64 because those values are baked in at compile time -- QEMU doesn't change what the binary "thinks" it is.

Exercise 2 - Multi-target library build with tests:

// build.zig
const std = @import("std");

pub fn build(b: *std.Build) void {
    const optimize = b.standardOptimizeOption(.{});

    // Native target
    const native_lib = b.addStaticLibrary(.{
        .name = "strutil-native",
        .root_source_file = b.path("src/strutil.zig"),
        .target = b.host,
        .optimize = optimize,
    });
    b.installArtifact(native_lib);

    // ARM target
    const arm_lib = b.addStaticLibrary(.{
        .name = "strutil-aarch64",
        .root_source_file = b.path("src/strutil.zig"),
        .target = b.resolveTargetQuery(.{ .cpu_arch = .aarch64, .os_tag = .linux, .abi = .musl }),
        .optimize = optimize,
    });
    b.installArtifact(arm_lib);

    // Wasm target - shared lib so exports are visible
    const wasm_lib = b.addSharedLibrary(.{
        .name = "strutil-wasm",
        .root_source_file = b.path("src/strutil.zig"),
        .target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .os_tag = .freestanding }),
        .optimize = optimize,
    });
    b.installArtifact(wasm_lib);

    // Native tests
    const native_tests = b.addTest(.{
        .root_source_file = b.path("src/strutil.zig"),
        .target = b.host,
        .optimize = optimize,
    });
    const run_native = b.addRunArtifact(native_tests);

    // ARM tests (runs via QEMU automatically)
    const arm_tests = b.addTest(.{
        .root_source_file = b.path("src/strutil.zig"),
        .target = b.resolveTargetQuery(.{ .cpu_arch = .aarch64, .os_tag = .linux, .abi = .musl }),
        .optimize = optimize,
    });
    const run_arm = b.addRunArtifact(arm_tests);

    const test_step = b.step("test", "Run all tests");
    test_step.dependOn(&run_native.step);
    test_step.dependOn(&run_arm.step);
}

// src/strutil.zig
const std = @import("std");

export fn contains(haystack: [*]const u8, hay_len: usize, needle: [*]const u8, ndl_len: usize) bool {
    const h = haystack[0..hay_len];
    const n = needle[0..ndl_len];
    return std.mem.indexOf(u8, h, n) != null;
}

export fn trimLeft(ptr: [*]const u8, len: usize, out_start: *usize) void {
    var i: usize = 0;
    while (i < len and (ptr[i] == ' ' or ptr[i] == '\t')) : (i += 1) {}
    out_start.* = i;
}

export fn trimRight(ptr: [*]const u8, len: usize, out_end: *usize) void {
    var end = len;
    while (end > 0 and (ptr[end - 1] == ' ' or ptr[end - 1] == '\t')) : (end -= 1) {}
    out_end.* = end;
}

export fn toUpper(ptr: [*]u8, len: usize) void {
    for (ptr[0..len]) |*c| {
        if (c.* >= 'a' and c.* <= 'z') c.* -= 32;
    }
}

test "contains finds substring" {
    const hay = "hello world";
    try std.testing.expect(contains(hay.ptr, hay.len, "world".ptr, 5));
    try std.testing.expect(!contains(hay.ptr, hay.len, "xyz".ptr, 3));
}

test "trimLeft removes leading spaces" {
    const s = "   hello";
    var start: usize = undefined;
    trimLeft(s.ptr, s.len, &start);
    try std.testing.expectEqual(@as(usize, 3), start);
}

test "trimRight removes trailing spaces" {
    const s = "hello   ";
    var end: usize = undefined;
    trimRight(s.ptr, s.len, &end);
    try std.testing.expectEqual(@as(usize, 5), end);
}

test "toUpper converts lowercase" {
    var buf = "hello World".*;
    toUpper(&buf, buf.len);
    try std.testing.expectEqualStrings("HELLO WORLD", &buf);
}

The Wasm target requires export functions with C-compatible signatures (pointers + lengths, no slices). The same source file works for both native Zig tests (which use slices internally) and Wasm exports. Running zig build test executes native tests directly and ARM tests through QEMU user-mode emulation -- Zig detects the foreign target and invokes the appropriate emulator automatically.

Exercise 3 - Platform-branched listDir with freestanding @compileError:

const std = @import("std");
const builtin = @import("builtin");

fn listDir(path: []const u8) !void {
    if (builtin.os.tag == .freestanding) {
        @compileError("listDir is not available on freestanding targets");
    }

    const stdout = std.io.getStdOut().writer();

    if (builtin.os.tag == .windows) {
        // On Windows, show drive letter prefix
        var real_buf: [std.fs.max_path_bytes]u8 = undefined;
        const real = try std.fs.cwd().realpath(path, &real_buf);
        if (real.len >= 2 and real[1] == ':') {
            try stdout.print("[{c}:] ", .{real[0]});
        }
    }

    try stdout.print("Contents of '{s}':\n", .{path});

    var dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
    defer dir.close();

    var iter = dir.iterate();
    while (try iter.next()) |entry| {
        const kind_str: []const u8 = switch (entry.kind) {
            .file => "FILE",
            .directory => "DIR ",
            .sym_link => "LINK",
            else => "????",
        };
        try stdout.print("  [{s}] {s}\n", .{ kind_str, entry.name });
    }
}

pub fn main() !void {
    const arg = blk: {
        var args = std.process.args();
        _ = args.next();
        break :blk args.next() orelse ".";
    };
    try listDir(arg);
}

When compiled for wasm32-freestanding-none, the @compileError triggers at compile time with the custom message. For Linux and macOS the code is identical (both POSIX), and for Windows it adds the drive letter prefix. The @compileError inside a comptime-evaluated if branch is Zig's idiomatic pattern for "this platform is not supported" -- cleaner than a runtime panic, and caught at build time rather than deploy time.

Alright! Here we go with our third mini project in this series ;-)

After 35 episodes of building up individual skills -- from memory management to hash maps, from thread safety to cross-compilation -- it's time to combine a whole bunch of them into one useful tool. We're going to build zigrun: a lightweight task runner, similar in spirit to make or just, but written from scratch in Zig.

The idea is straightforward: you define tasks in a simple text file (what each task is called, what shell commands it runs, what other tasks it depends on), and then zigrun reads that file, resolves the dependency graph, and executes tasks in the correct order. Sounds simple? It actually touches quite some territory we've covered: file I/O (episode 10), hash maps (episode 22), error handling (episode 4), process spawning, string parsing, and topological sorting.

Unlike make which relies on file timestamps and implicit rules, our tool is explicit: you tell it what to run and in what order. More like a shellscript with dependency awareness. If task B depends on task A, running zigrun B will automatically run A first. And if A has already been marked done in the current invocation, it won't run again.

The task file format

First we need to decide what our task definitions look like. I wanted something dead simple -- no YAML, no TOML, no JSON. Just a plain text format that's obvious at first glance:

task: clean
cmd: rm -rf zig-out zig-cache

task: build
deps: clean
cmd: zig build -Doptimize=ReleaseSafe

task: test
deps: build
cmd: zig build test

task: release
deps: test
cmd: tar czf release.tar.gz zig-out/bin/*
cmd: echo "Release package created"

task: deploy
deps: release
cmd: scp release.tar.gz server:/opt/app/
cmd: ssh server "cd /opt/app && tar xzf release.tar.gz"

Each task starts with task: name, followed by optional deps: dep1, dep2 (comma-separated), followed by one or more cmd: shell command lines. Blank lines separate tasks. That's the entire format.

Here's what I want from the parser:

Task names are alphanumeric plus hyphens and underscores
Dependencies reference other task names
Commands are passed verbatim to /bin/sh -c (or cmd /c on Windows)
Unknown keys are ignored (forward compatibility)
Duplicate task names are an error

Data structures

Let's start with how we represent a parsed task file in memory:

const std = @import("std");

const Task = struct {
    name: []const u8,
    commands: std.ArrayList([]const u8),
    deps: std.ArrayList([]const u8),

    fn init(allocator: std.mem.Allocator, name: []const u8) Task {
        return .{
            .name = name,
            .commands = std.ArrayList([]const u8).init(allocator),
            .deps = std.ArrayList([]const u8).init(allocator),
        };
    }

    fn deinit(self: *Task) void {
        self.commands.deinit();
        self.deps.deinit();
    }
};

const TaskFile = struct {
    tasks: std.StringHashMap(Task),
    allocator: std.mem.Allocator,
    order: std.ArrayList([]const u8),

    fn init(allocator: std.mem.Allocator) TaskFile {
        return .{
            .tasks = std.StringHashMap(Task).init(allocator),
            .allocator = allocator,
            .order = std.ArrayList([]const u8).init(allocator),
        };
    }

    fn deinit(self: *TaskFile) void {
        var iter = self.tasks.valueIterator();
        while (iter.next()) |task| {
            task.deinit();
        }
        self.tasks.deinit();
        self.order.deinit();
    }

    fn getTask(self: *const TaskFile, name: []const u8) ?*Task {
        return self.tasks.getPtr(name);
    }
};

We use a StringHashMap keyed by task name so we can look up tasks in O(1) when resolving dependencies. The order list tracks the insertion order so we can give a meaningful "list all tasks" output. Each Task holds an ArrayList of commands and dependencies -- both are variable-length.

Parsing the task file

The parser reads the file line by line, building up Task objects as it encounters task: directives:

const ParseError = error{
    DuplicateTask,
    CommandWithoutTask,
    InvalidLine,
};

fn parseTaskFile(allocator: std.mem.Allocator, content: []const u8) !TaskFile {
    var tf = TaskFile.init(allocator);
    errdefer tf.deinit();

    var current_task: ?*Task = null;
    var line_iter = std.mem.splitScalar(u8, content, '\n');

    while (line_iter.next()) |raw_line| {
        // trim carriage return (windows line endings) and whitespace
        const line = std.mem.trim(u8, raw_line, " \t\r");

        if (line.len == 0) continue;
        if (line[0] == '#') continue; // comments

        if (std.mem.startsWith(u8, line, "task:")) {
            const name = std.mem.trim(u8, line["task:".len..], " \t");
            if (name.len == 0) return ParseError.InvalidLine;

            if (tf.tasks.contains(name)) {
                return ParseError.DuplicateTask;
            }

            var task = Task.init(allocator, name);
            try tf.tasks.put(name, task);
            current_task = tf.tasks.getPtr(name);
            try tf.order.append(name);
        } else if (std.mem.startsWith(u8, line, "deps:")) {
            const task = current_task orelse return ParseError.CommandWithoutTask;
            const deps_str = std.mem.trim(u8, line["deps:".len..], " \t");

            var dep_iter = std.mem.splitScalar(u8, deps_str, ',');
            while (dep_iter.next()) |dep_raw| {
                const dep = std.mem.trim(u8, dep_raw, " \t");
                if (dep.len > 0) {
                    try task.deps.append(dep);
                }
            }
        } else if (std.mem.startsWith(u8, line, "cmd:")) {
            const task = current_task orelse return ParseError.CommandWithoutTask;
            const cmd = std.mem.trim(u8, line["cmd:".len..], " \t");
            if (cmd.len > 0) {
                try task.commands.append(cmd);
            }
        }
        // unknown keys silently ignored
    }

    return tf;
}

Notice how we use errdefer tf.deinit() -- if parsing fails halfway through, we still clean up all the memory we allocated. This is one of those Zig patterns that makes resource management genuinely pleasant. In C you'd have scattered goto cleanup labels; in Zig, errdefer handles the unhappy path cleanly.

The parser ignores blank lines and lines starting with # (comments). Each task: line starts a new task context, and subsequent deps: and cmd: lines add to whatever task is "current". Unknown prefixes are silently skipped -- this means we can add new directives later without breaking older parsers.

Dependency resolution: topological sort

Here's where it gets interesting. If you ask zigrun deploy, we need to figure out the full execution order: clean -> build -> test -> release -> deploy. This is a classic topological sort problem on a directed acyclic graph (DAG).

We'll use Kahn's algorithm because it's intuitive and naturally detects cycles. The idea: maintain a count of incoming edges for each node. Start with nodes that have zero incoming edges (no dependencies). Process them, decrement the counts for their dependents, and repeat. If you process all nodes, you have a valid topological order. If you can't, there's a cycle.

const TopoError = error{
    CycleDetected,
    UnknownDependency,
};

fn resolveDependencies(
    allocator: std.mem.Allocator,
    tf: *const TaskFile,
    target: []const u8,
) !std.ArrayList([]const u8) {
    // First, collect all tasks reachable from target
    var needed = std.StringHashMap(void).init(allocator);
    defer needed.deinit();

    // BFS to find all tasks we need
    var queue = std.ArrayList([]const u8).init(allocator);
    defer queue.deinit();
    try queue.append(target);
    try needed.put(target, {});

    while (queue.items.len > 0) {
        const current = queue.orderedRemove(0);
        const task = tf.getTask(current) orelse return TopoError.UnknownDependency;

        for (task.deps.items) |dep| {
            if (!needed.contains(dep)) {
                if (!tf.tasks.contains(dep)) return TopoError.UnknownDependency;
                try needed.put(dep, {});
                try queue.append(dep);
            }
        }
    }

    // Now topological sort over the needed set
    var in_degree = std.StringHashMap(u32).init(allocator);
    defer in_degree.deinit();

    // Initialize in-degrees to 0
    var needed_iter = needed.keyIterator();
    while (needed_iter.next()) |key| {
        try in_degree.put(key.*, 0);
    }

    // Count incoming edges
    needed_iter = needed.keyIterator();
    while (needed_iter.next()) |key| {
        const task = tf.getTask(key.*).?;
        for (task.deps.items) |dep| {
            if (needed.contains(dep)) {
                const current = in_degree.get(dep) orelse 0;
                try in_degree.put(dep, current + 1);
            }
        }
    }

    // Wait -- that's backwards. in_degree should count how many tasks
    // depend ON this task... no. Actually, in_degree counts how many
    // prerequisites THIS task has. Let me redo this.

    // Reset
    var deg_iter = in_degree.iterator();
    while (deg_iter.next()) |entry| {
        entry.value_ptr.* = 0;
    }

    // in_degree[X] = number of deps X has (within the needed set)
    needed_iter = needed.keyIterator();
    while (needed_iter.next()) |key| {
        const task = tf.getTask(key.*).?;
        var count: u32 = 0;
        for (task.deps.items) |dep| {
            if (needed.contains(dep)) count += 1;
        }
        try in_degree.put(key.*, count);
    }

    // Start with zero-degree nodes
    var result = std.ArrayList([]const u8).init(allocator);
    var process_queue = std.ArrayList([]const u8).init(allocator);
    defer process_queue.deinit();

    var init_iter = in_degree.iterator();
    while (init_iter.next()) |entry| {
        if (entry.value_ptr.* == 0) {
            try process_queue.append(entry.key_ptr.*);
        }
    }

    while (process_queue.items.len > 0) {
        const node = process_queue.orderedRemove(0);
        try result.append(node);

        // For every task that depends on `node`, decrease its in_degree
        needed_iter = needed.keyIterator();
        while (needed_iter.next()) |key| {
            const task = tf.getTask(key.*).?;
            for (task.deps.items) |dep| {
                if (std.mem.eql(u8, dep, node)) {
                    const deg = in_degree.getPtr(key.*).?;
                    deg.* -= 1;
                    if (deg.* == 0) {
                        try process_queue.append(key.*);
                    }
                    break;
                }
            }
        }
    }

    if (result.items.len != needed.count()) {
        result.deinit();
        return TopoError.CycleDetected;
    }

    return result;
}

I intentionally left the "wait, that's backwards" comment in the code because that's exactly how it went when I wrote it ;-) Topological sort is one of those algorithms where you get the direction of edges confused at least once per implementation. The key realization: in_degree[X] means "how many tasks must run BEFORE X can run". Nodes with in_degree 0 have no unsatiesfied prerequisites and can run immediately.

The cycle detection is elegant: if Kahn's algorithm processes fewer nodes than are in the graph, it means some nodes could never reach in_degree 0 -- they're stuck in a cycle. We just compare result.items.len against needed.count() and know immediately whether there's a cycle, without needing to track visited sets or do DFS-with-coloring.

Spawning child processes

Once we have the execution order, we need to actually run the shell commands. Zig's std.process.Child is the interface for this. It's low-level compared to Python's subprocess or Go's exec.Command, but it gives you full control:

const ExecError = error{
    CommandFailed,
    SpawnFailed,
};

fn runCommand(allocator: std.mem.Allocator, cmd: []const u8, verbose: bool) !void {
    const stdout_w = std.io.getStdOut().writer();
    const stderr_w = std.io.getStdErr().writer();

    if (verbose) {
        try stdout_w.print("  $ {s}\n", .{cmd});
    }

    const argv = [_][]const u8{ "/bin/sh", "-c", cmd };
    var child = std.process.Child.init(&argv, allocator);
    child.stderr_behavior = .Pipe;
    child.stdout_behavior = .Pipe;

    try child.spawn();

    // Read stdout and stderr
    const stdout_data = try child.stdout.?.reader().readAllAlloc(allocator, 10 * 1024 * 1024);
    defer allocator.free(stdout_data);
    const stderr_data = try child.stderr.?.reader().readAllAlloc(allocator, 10 * 1024 * 1024);
    defer allocator.free(stderr_data);

    const term = try child.wait();

    if (stdout_data.len > 0) {
        try stdout_w.writeAll(stdout_data);
    }
    if (stderr_data.len > 0) {
        try stderr_w.writeAll(stderr_data);
    }

    switch (term) {
        .exited => |code| {
            if (code != 0) {
                try stderr_w.print("  command exited with code {d}\n", .{code});
                return ExecError.CommandFailed;
            }
        },
        .signal => |sig| {
            try stderr_w.print("  command killed by signal {d}\n", .{sig});
            return ExecError.CommandFailed;
        },
        else => {
            return ExecError.CommandFailed;
        },
    }
}

A few important things here. We set both stdout_behavior and stderr_behavior to .Pipe, which means the child's output streams are redirected to pipes that we can read from the parent process. The alternative is .Inherit (child writes directly to our terminal) which is simpler but doesn't let us capture or format the output.

The readAllAlloc call reads up to 10MB from each stream. In production you might want streaming reads instead (to handle very large outputs without buffering everything in memory), but for a task runner where commands typically output a few KB at most, this is fine.

After reading both streams, we call child.wait() which blocks until the process exits and gives us the termination status. On POSIX systems this is either an exit code (.exited) or a signal number (.signal). We treat anything except exit code 0 as a failure.

Having said that, there's a subtlety with the read-then-wait pattern: if the child produces more output than the pipe buffer can hold (typically 64KB on Linux), and we read stdout first, the child might block writing to stderr (or vice versa). For truly robust handling you'd use poll/epoll to read from both pipes simultaneously. But for a task runner where individual commands rarely produce huge output to both streams, sequential reads work fine.

Capturing and displaying output

Let's make the output a bit nicer. When running multiple tasks, we want to clearly show which task is running and whether it succeeded:

fn executeTasks(
    allocator: std.mem.Allocator,
    tf: *const TaskFile,
    order: []const []const u8,
    verbose: bool,
) !void {
    const stdout = std.io.getStdOut().writer();
    const stderr = std.io.getStdErr().writer();

    for (order) |task_name| {
        const task = tf.getTask(task_name).?;

        if (task.commands.items.len == 0) {
            if (verbose) {
                try stdout.print("[skip] {s} (no commands)\n", .{task_name});
            }
            continue;
        }

        try stdout.print("[run]  {s}\n", .{task_name});

        for (task.commands.items) |cmd| {
            runCommand(allocator, cmd, verbose) catch |err| {
                try stderr.print("[FAIL] {s}\n", .{task_name});
                return err;
            };
        }

        try stdout.print("[done] {s}\n", .{task_name});
    }
}

The output looks like:

[run]  clean
  $ rm -rf zig-out zig-cache
[done] clean
[run]  build
  $ zig build -Doptimize=ReleaseSafe
[done] build
[run]  test
  $ zig build test
[done] test

Clean, readable, and you can imediately see where things failed if a command returns non-zero. The [skip] line shows when a task has no commands (maybe it's just an aggregation point for dependencies, like a test-all task that depends on test-unit and test-integration but runs nothing itself).

CLI argument parsing

Now we need a proper command-line interface. Our tool should support:

zigrun <task> -- run a specific task (and its deps)
zigrun --list -- show all available tasks
zigrun --file path -- specify a custom task file (default: zigrun.txt)
zigrun --verbose -- show commands before executing
zigrun --dry-run -- show what would run without executing

const CliOptions = struct {
    task: ?[]const u8 = null,
    file: []const u8 = "zigrun.txt",
    verbose: bool = false,
    dry_run: bool = false,
    list: bool = false,
    help: bool = false,
};

fn parseArgs(allocator: std.mem.Allocator) !CliOptions {
    _ = allocator;
    var opts = CliOptions{};
    var args = std.process.args();
    _ = args.next(); // skip program name

    while (args.next()) |arg| {
        if (std.mem.eql(u8, arg, "--list") or std.mem.eql(u8, arg, "-l")) {
            opts.list = true;
        } else if (std.mem.eql(u8, arg, "--verbose") or std.mem.eql(u8, arg, "-v")) {
            opts.verbose = true;
        } else if (std.mem.eql(u8, arg, "--dry-run") or std.mem.eql(u8, arg, "-n")) {
            opts.dry_run = true;
        } else if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) {
            opts.help = true;
        } else if (std.mem.eql(u8, arg, "--file") or std.mem.eql(u8, arg, "-f")) {
            opts.file = args.next() orelse {
                return error.MissingArgument;
            };
        } else if (arg.len > 0 and arg[0] != '-') {
            opts.task = arg;
        }
    }

    return opts;
}

Simple flag-based parsing without any external libraries. We iterate through std.process.args(), matching known flags and collecting the positional argument as the task name. The --file flag consumes the NEXT argument as its value -- note the args.next() orelse pattern which handles the case where someone types --file with nothing after it.

Putting it all together: main.zig

Here's the complete main function that ties everything together:

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const stdout = std.io.getStdOut().writer();
    const stderr = std.io.getStdErr().writer();

    const opts = parseArgs(allocator) catch {
        try stderr.print("error: invalid arguments. Use --help for usage.\n", .{});
        std.process.exit(1);
    };

    if (opts.help) {
        try stdout.print(
            \\zigrun - a simple task runner
            \\
            \\Usage: zigrun [options] [task]
            \\
            \\Options:
            \\  -f, --file <path>   Task file (default: zigrun.txt)
            \\  -l, --list          List available tasks
            \\  -v, --verbose       Show commands before executing
            \\  -n, --dry-run       Show execution plan without running
            \\  -h, --help          Show this help
            \\
        , .{});
        return;
    }

    // Read task file
    const content = std.fs.cwd().readFileAlloc(allocator, opts.file, 1024 * 1024) catch |err| {
        try stderr.print("error: cannot read '{s}': {s}\n", .{ opts.file, @errorName(err) });
        std.process.exit(1);
    };
    defer allocator.free(content);

    // Parse
    var tf = parseTaskFile(allocator, content) catch |err| {
        try stderr.print("error: parse failed: {s}\n", .{@errorName(err)});
        std.process.exit(1);
    };
    defer tf.deinit();

    // List mode
    if (opts.list) {
        try stdout.print("Available tasks:\n", .{});
        for (tf.order.items) |name| {
            const task = tf.getTask(name).?;
            try stdout.print("  {s}", .{name});
            if (task.deps.items.len > 0) {
                try stdout.print(" (deps: ", .{});
                for (task.deps.items, 0..) |dep, i| {
                    if (i > 0) try stdout.print(", ", .{});
                    try stdout.print("{s}", .{dep});
                }
                try stdout.print(")", .{});
            }
            try stdout.print("\n", .{});
        }
        return;
    }

    // Need a target task
    const target = opts.task orelse {
        try stderr.print("error: no task specified. Use --list to see available tasks.\n", .{});
        std.process.exit(1);
    };

    // Resolve dependencies
    var order = resolveDependencies(allocator, &tf, target) catch |err| {
        switch (err) {
            TopoError.CycleDetected => try stderr.print("error: dependency cycle detected\n", .{}),
            TopoError.UnknownDependency => try stderr.print("error: unknown task in dependency chain\n", .{}),
            else => try stderr.print("error: {s}\n", .{@errorName(err)}),
        }
        std.process.exit(1);
    };
    defer order.deinit();

    // Dry run
    if (opts.dry_run) {
        try stdout.print("Execution plan for '{s}':\n", .{target});
        for (order.items, 1..) |name, i| {
            const task = tf.getTask(name).?;
            try stdout.print("  {d}. {s}\n", .{ i, name });
            for (task.commands.items) |cmd| {
                try stdout.print("     $ {s}\n", .{cmd});
            }
        }
        return;
    }

    // Execute
    executeTasks(allocator, &tf, order.items, opts.verbose) catch {
        std.process.exit(1);
    };
}

We use the GeneralPurposeAllocator (as we discussed in episode 7) because it gives us leak detection in debug builds. If we forget to free something, it'll tell us at program exit.

The flow is: parse CLI args -> read file -> parse tasks -> resolve dependencies -> execute (or list/dry-run). Each stage can fail, and errors are reported with context before calling std.process.exit(1). The defer statements ensure all memory is freed on both success and failure paths.

The build.zig

Since we're building a real tool, let's set up a proper project structure with build.zig:

const std = @import("std");

pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});

    const exe = b.addExecutable(.{
        .name = "zigrun",
        .root_source_file = b.path("src/main.zig"),
        .target = target,
        .optimize = optimize,
    });
    b.installArtifact(exe);

    // Run step
    const run_cmd = b.addRunArtifact(exe);
    run_cmd.step.dependOn(b.getInstallStep());
    if (b.args) |args| run_cmd.addArgs(args);
    const run_step = b.step("run", "Run zigrun");
    run_step.dependOn(&run_cmd.step);

    // Tests
    const unit_tests = b.addTest(.{
        .root_source_file = b.path("src/main.zig"),
        .target = target,
        .optimize = optimize,
    });
    const run_tests = b.addRunArtifact(unit_tests);
    const test_step = b.step("test", "Run tests");
    test_step.dependOn(&run_tests.step);
}

With this you can:

zig build -- compile zigrun
zig build run -- build -- compile and immediately run task "build"
zig build test -- run the test suite

And because of what we learned last episode about cross-compilation, you can also do:

zig build -Dtarget=aarch64-linux-musl -Doptimize=ReleaseSafe

...to get a static ARM64 binary of your task runner that you can deploy to any Linux ARM machine. Pretty neat for a tool we wrote from scratch ;-)

Testing the parser

Let's add some unit tests to verify our parsing logic. In Zig, tests live right next to the code they test (as we covered in episode 12):

test "parse simple task file" {
    const content =
        \\task: build
        \\cmd: zig build
        \\
        \\task: test
        \\deps: build
        \\cmd: zig build test
    ;

    var tf = try parseTaskFile(std.testing.allocator, content);
    defer tf.deinit();

    try std.testing.expectEqual(@as(usize, 2), tf.tasks.count());

    const build_task = tf.getTask("build").?;
    try std.testing.expectEqual(@as(usize, 1), build_task.commands.items.len);
    try std.testing.expectEqual(@as(usize, 0), build_task.deps.items.len);
    try std.testing.expectEqualStrings("zig build", build_task.commands.items[0]);

    const test_task = tf.getTask("test").?;
    try std.testing.expectEqual(@as(usize, 1), test_task.commands.items.len);
    try std.testing.expectEqual(@as(usize, 1), test_task.deps.items.len);
    try std.testing.expectEqualStrings("build", test_task.deps.items[0]);
}

test "parse rejects duplicate tasks" {
    const content =
        \\task: build
        \\cmd: echo one
        \\
        \\task: build
        \\cmd: echo two
    ;

    const result = parseTaskFile(std.testing.allocator, content);
    try std.testing.expectError(ParseError.DuplicateTask, result);
}

test "parse handles comments and blank lines" {
    const content =
        \\# This is a comment
        \\
        \\task: hello
        \\# Another comment
        \\cmd: echo hello
        \\
    ;

    var tf = try parseTaskFile(std.testing.allocator, content);
    defer tf.deinit();

    try std.testing.expectEqual(@as(usize, 1), tf.tasks.count());
    try std.testing.expectEqualStrings("echo hello", tf.getTask("hello").?.commands.items[0]);
}

test "topological sort linear chain" {
    const content =
        \\task: a
        \\cmd: echo a
        \\
        \\task: b
        \\deps: a
        \\cmd: echo b
        \\
        \\task: c
        \\deps: b
        \\cmd: echo c
    ;

    var tf = try parseTaskFile(std.testing.allocator, content);
    defer tf.deinit();

    var order = try resolveDependencies(std.testing.allocator, &tf, "c");
    defer order.deinit();

    try std.testing.expectEqual(@as(usize, 3), order.items.len);
    try std.testing.expectEqualStrings("a", order.items[0]);
    try std.testing.expectEqualStrings("b", order.items[1]);
    try std.testing.expectEqualStrings("c", order.items[2]);
}

test "topological sort detects cycles" {
    const content =
        \\task: a
        \\deps: b
        \\cmd: echo a
        \\
        \\task: b
        \\deps: a
        \\cmd: echo b
    ;

    var tf = try parseTaskFile(std.testing.allocator, content);
    defer tf.deinit();

    const result = resolveDependencies(std.testing.allocator, &tf, "a");
    try std.testing.expectError(TopoError.CycleDetected, result);
}

Using std.testing.allocator is important here -- it detects memory leaks in tests. If our parser allocates memory that it doesn't properly clean up on error paths, the test allocator will flag it as a failure. This catches bugs that GeneralPurposeAllocator would only report at process exit.

Example usage

Create a zigrun.txt in your project root:

# Build pipeline for a Zig project

task: clean
cmd: rm -rf zig-out .zig-cache

task: fmt
cmd: zig fmt src/

task: build
deps: fmt
cmd: zig build

task: test
deps: build
cmd: zig build test

task: check
deps: fmt
cmd: zig build -Doptimize=ReleaseSafe 2>&1 | head -20

task: all
deps: test, check

Then:

$ zigrun --list
Available tasks:
  clean
  fmt
  build (deps: fmt)
  test (deps: build)
  check (deps: fmt)
  all (deps: test, check)

$ zigrun --dry-run all
Execution plan for 'all':
  1. fmt
     $ zig fmt src/
  2. build
     $ zig build
  3. test
     $ zig build test
  4. check
     $ zig build -Doptimize=ReleaseSafe 2>&1 | head -20
  5. all

$ zigrun all
[run]  fmt
[done] fmt
[run]  build
[done] build
[run]  test
[done] test
[run]  check
[done] check
[skip] all (no commands)

Notice how all has no commands of its own -- it's purely a dependency aggregation task. And notice how fmt only runs once even though both build (via test) and check depend on it. The topological sort naturally deduplicates.

Ideas for extensions

We've built a functional task runner in about 300 lines of Zig. There's plenty you could add from here:

Environment variables: Add an env: KEY=VALUE directive per task
Working directory: Add a dir: /some/path directive that chdirs before running commands
Timestamps: Track when each task last ran and skip tasks whose inputs haven't changed (like make)
Parallel execution: Tasks at the same topological level (no dependencies between them) can run concurrently using std.Thread
Task arguments: Allow zigrun build --release to pass --release through to the task's commands via variable substitution
Include files: An include: other.txt directive that pulls in tasks from another file

The parallel execution one is particularly interesting. In our example above, test and check both depend on build but NOT on each other -- they could run in parallel once build finishes. You'd use the thread primitives we covered in episode 30 to spawn multiple tasks simultaneously and wait for all of them before proceeding to the next level.

Wat we geleerd hebben

How to design a simple but extensible plain-text file format that's easy for both humans and machines to read
Parsing line-by-line with std.mem.splitScalar and std.mem.startsWith -- no regex, no parser generators, just direct string matching
Kahn's algorithm for topological sorting -- maintain in-degrees, process zero-degree nodes, detect cycles by counting processed nodes
Spawning processes with std.process.Child including stdout/stderr capture via .Pipe behavior
Connecting all the pieces: file I/O reads the task file, hash maps give O(1) task lookup, ArrayLists hold variable-length data, error handling propagates failures cleanly from child process through to user-facing error messages
Using errdefer to clean up partial allocations when parsing fails halfway through
How a single build.zig gives you compile, run, and test commands -- plus cross-compilation for free

This project touches about half the topics from this series so far, which is kind of the point of mini projects. Next time we'll start a new multi-part project that goes even deeper into string processing and data transformation.

Thanks for reading!

@scipio

stem stemsocial steemstem zig programming

0.000

1 comments

@stemsocial 64

10 days ago

Thanks for your contribution to the STEMsocial community. Feel free to join us on discord to get to know the rest of us!

Please consider delegating to the @stemsocial account (85% of the curation rewards are returned).

Consider setting @stemsocial as a beneficiary of this post's rewards if you would like to support the community and contribute to its mission of promoting science and education on Hive.

0.000

Learn Zig Series (#36) - Mini Project: CLI Task Runner

Learn Zig Series (#36) - Mini Project: CLI Task Runner

What will I learn

Requirements

Difficulty

Curriculum (of the Learn Zig Series):

Learn Zig Series (#36) - Mini Project: CLI Task Runner

Solutions to Episode 35 Exercises

The task file format

Data structures

Parsing the task file

Dependency resolution: topological sort

Spawning child processes

Capturing and displaying output

CLI argument parsing

Putting it all together: main.zig

The build.zig

Testing the parser

Example usage

Ideas for extensions

Wat we geleerd hebben

Curriculum (of the `Learn Zig Series`):