From e7fa254387e450154f03b2d1bdef361a0adb80d1 Mon Sep 17 00:00:00 2001
From: Anhgelus Morhtuuzh <william@herges.fr>
Date: Fri, 24 Apr 2026 17:40:33 +0200
Subject: perf(lexer): do not alloc

---
 src/content.zig     |  14 +++----
 src/lexer/Lexed.zig |  57 ---------------------------
 src/lexer/Lexer.zig | 108 ++++++++++++++++++++++------------------------------
 src/lexer/Token.zig |  47 +++++++++++++++++++++++
 src/link.zig        |  27 ++++++-------
 src/paragraph.zig   |   7 ++--
 src/parser.zig      |  11 +++---
 src/root.zig        |   2 +-
 src/title.zig       |  10 ++---
 9 files changed, 123 insertions(+), 160 deletions(-)
 delete mode 100644 src/lexer/Lexed.zig
 create mode 100644 src/lexer/Token.zig

(limited to 'src')

diff --git a/src/content.zig b/src/content.zig
index 01d933b..e91576d 100644
--- a/src/content.zig
+++ b/src/content.zig
@@ -1,6 +1,6 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
-const Lexed = @import("lexer/Lexed.zig");
+const Token = @import("lexer/Token.zig");
 const Lexer = @import("lexer/Lexer.zig");
 const Element = @import("dom/Element.zig");
 const parser = @import("parser.zig");
@@ -9,16 +9,15 @@ const testing = @import("testing.zig");
 const doTest = testing.do;
 const doTestError = testing.doError;
 
-pub const Error = error{ ModifierNotClosed, IllegalPlacement } || Lexer.Error;
+pub const Error = error{ ModifierNotClosed, IllegalPlacement } || Lexer.Error || Allocator.Error;
 
 pub fn parse(alloc: Allocator, l: *Lexer) Error!Element {
     var content = Element.initEmpty(alloc);
     errdefer content.deinit();
-    var v = (try l.next(alloc)).?;
-    defer v.deinit();
+    const v = l.next().?;
     switch (v.kind) {
         .literal => {
-            const el = try Element.initLitEscaped(alloc, v.content.items);
+            const el = try Element.initLitEscaped(alloc, v.content);
             try content.appendContent(el);
         },
         .bold => try content.appendContent(try parseModifier(alloc, l, .bold, "b")),
@@ -29,14 +28,13 @@ pub fn parse(alloc: Allocator, l: *Lexer) Error!Element {
     return content;
 }
 
-fn parseModifier(alloc: Allocator, l: *Lexer, knd: Lexed.Kind, tag: []const u8) Error!Element {
+fn parseModifier(alloc: Allocator, l: *Lexer, knd: Token.Kind, tag: []const u8) Error!Element {
     var el = try Element.init(alloc, .content, tag);
     errdefer el.deinit();
     while (l.nextKind()) |it| {
         if (it == knd) {
             // consuming the finisher
-            var v = (try l.next(alloc)).?;
-            v.deinit();
+            _ = l.next();
             return el;
         }
         if (it.isDelimiter()) return Error.ModifierNotClosed;
diff --git a/src/lexer/Lexed.zig b/src/lexer/Lexed.zig
deleted file mode 100644
index 46fd552..0000000
--- a/src/lexer/Lexed.zig
+++ /dev/null
@@ -1,57 +0,0 @@
-const std = @import("std");
-const Allocator = std.mem.Allocator;
-
-pub const Kind = enum {
-    literal,
-    weak_delimiter,
-    strong_delimiter,
-    title,
-    quote,
-    code,
-    code_block,
-    math,
-    math_block,
-    image,
-    link,
-    bold,
-    italic,
-    ref,
-    callout,
-    list_ordored,
-    list_unordored,
-    tag,
-
-    pub fn isDelimiter(self: @This()) bool {
-        return switch (self) {
-            .weak_delimiter, .strong_delimiter => true,
-            else => false,
-        };
-    }
-};
-
-allocator: Allocator,
-kind: Kind,
-content: std.ArrayList(u8),
-
-const Self = @This();
-
-pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Self {
-    return .{
-        .allocator = alloc,
-        .kind = kind,
-        .content = content,
-    };
-}
-
-pub fn deinit(self: *Self) void {
-    self.content.deinit(self.allocator);
-}
-
-pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) {
-    return self.content.clone(alloc);
-}
-
-pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool {
-    if (self.kind != kind) return false;
-    return std.mem.eql(u8, self.content.items, content);
-}
diff --git a/src/lexer/Lexer.zig b/src/lexer/Lexer.zig
index 8b3893d..fa45b5a 100644
--- a/src/lexer/Lexer.zig
+++ b/src/lexer/Lexer.zig
@@ -2,8 +2,9 @@ const std = @import("std");
 const Allocator = std.mem.Allocator;
 const eql = std.mem.eql;
 const unicode = std.unicode;
-const Lexed = @import("Lexed.zig");
+const Token = @import("Token.zig");
 
+content: []const u8,
 iter: unicode.Utf8Iterator,
 force_lit: bool = false,
 
@@ -11,11 +12,11 @@ const Self = @This();
 
 pub const Error = error{
     InvalidUtf8,
-} || Allocator.Error;
+};
 
 pub fn init(content: []const u8) error{InvalidUtf8}!Self {
     const view = try unicode.Utf8View.init(content);
-    return .{ .iter = view.iterator() };
+    return .{ .content = content, .iter = view.iterator() };
 }
 
 // Must free bytes in iter.
@@ -25,17 +26,16 @@ pub fn initReader(alloc: Allocator, r: *std.io.Reader) !Self {
     return init(try acc.toOwnedSlice(alloc));
 }
 
-pub fn nextKind(self: *Self) ?Lexed.Kind {
+pub fn nextKind(self: *Self) ?Token.Kind {
     const next_rune = self.iter.peek(1);
     if (next_rune.len == 0) return null;
     return self.getCurrentKind(null, next_rune, &[0]u8{}).kind;
 }
 
-pub fn next(self: *Self, alloc: Allocator) Error!?Lexed {
-    var acc = try std.ArrayList(u8).initCapacity(alloc, 2);
-    errdefer acc.deinit(alloc);
-
-    var current_kind: ?Lexed.Kind = null;
+pub fn next(self: *Self) ?Token {
+    const beg = self.iter.i;
+    var end = self.iter.i;
+    var current_kind: ?Token.Kind = null;
     while (self.iter.nextCodepointSlice()) |rune| {
         if (eql(u8, rune, "\r")) continue;
         var override_if: ?[]const u8 = null;
@@ -45,14 +45,14 @@ pub fn next(self: *Self, alloc: Allocator) Error!?Lexed {
             current_kind = .literal;
         } else {
             self.force_lit = false;
-            const res = self.getCurrentKind(current_kind, rune, acc.items);
+            const res = self.getCurrentKind(current_kind, rune, self.content[beg..end]);
             current_kind = res.kind;
             override_if = res.override_if;
-            try acc.appendSlice(alloc, rune);
+            end = self.iter.i;
         }
         // conds here to avoid creating complex condition in while
         const next_rune = self.iter.peek(1);
-        const next_kind = self.getCurrentKind(current_kind, next_rune, acc.items).kind;
+        const next_kind = self.getCurrentKind(current_kind, next_rune, self.content[beg..end]).kind;
         if (requiresSpace(current_kind.?) and next_kind != current_kind.?) {
             if (eql(u8, next_rune, " ")) {
                 // consume next space
@@ -60,7 +60,7 @@ pub fn next(self: *Self, alloc: Allocator) Error!?Lexed {
                 break;
             }
             current_kind = switch (current_kind.?) {
-                .title => if (acc.items.len == 1) .tag else .literal,
+                .title => if (end - beg == 1) .tag else .literal,
                 else => .literal,
             };
         }
@@ -69,15 +69,12 @@ pub fn next(self: *Self, alloc: Allocator) Error!?Lexed {
             (override_if == null or !eql(u8, override_if.?, next_rune)))
             break;
     }
-    const kind = current_kind orelse {
-        acc.deinit(alloc);
-        return null;
-    };
-    return .init(alloc, kind, acc);
+    const kind = current_kind orelse return null;
+    return .{ .kind = kind, .content = self.content[beg..end] };
 }
 
 const kindRes = struct {
-    kind: Lexed.Kind,
+    kind: Token.Kind,
     override_if: ?[]const u8 = null,
 
     fn equals(self: @This(), v: @This()) bool {
@@ -89,11 +86,11 @@ const kindRes = struct {
     }
 };
 
-fn requiresDelimiter(before: ?Lexed.Kind, knd: Lexed.Kind) Lexed.Kind {
+fn requiresDelimiter(before: ?Token.Kind, knd: Token.Kind) Token.Kind {
     return if (before == null or before.?.isDelimiter() or before.? == knd) knd else .literal;
 }
 
-fn getCurrentKind(self: *Self, before: ?Lexed.Kind, rune: []const u8, acc: []const u8) kindRes {
+fn getCurrentKind(self: *Self, before: ?Token.Kind, rune: []const u8, acc: []const u8) kindRes {
     if (self.force_lit) return .{ .kind = .literal };
     if (eql(u8, rune, "\n")) return .{
         .kind = if (before == .weak_delimiter) .strong_delimiter else .weak_delimiter,
@@ -122,7 +119,7 @@ fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool {
 
 const links = &[_][]const u8{ "[", "](", ")" };
 
-fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Lexed.Kind, now: Lexed.Kind) bool {
+fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Token.Kind, now: Token.Kind) bool {
     var acc = p;
     if (before) |b| {
         if (now != b) acc = &[_]u8{};
@@ -135,7 +132,7 @@ fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Lexed
     return false;
 }
 
-fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Lexed.Kind, three: Lexed.Kind) ?kindRes {
+fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Token.Kind, three: Token.Kind) ?kindRes {
     if (!eql(u8, rune, op)) return null;
     var acc = p;
     if (acc.len < op.len or !eql(u8, acc[0..op.len], op)) acc = &[_]u8{};
@@ -160,18 +157,17 @@ fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Lexed.Kind
     };
 }
 
-fn requiresSpace(k: Lexed.Kind) bool {
+fn requiresSpace(k: Token.Kind) bool {
     return switch (k) {
         .title, .list_ordored, .list_unordored => true,
         else => false,
     };
 }
 
-fn doTest(alloc: Allocator, l: *Self, k: Lexed.Kind, v: []const u8) !void {
-    var first = (try l.next(alloc)).?;
-    defer first.deinit();
+fn doTest(l: *Self, k: Token.Kind, v: []const u8) !void {
+    var first = l.next().?;
     std.testing.expect(first.equals(k, v)) catch |err| {
-        std.debug.print("{}({s})\n", .{ first.kind, first.content.items });
+        std.debug.print("{}({s})\n", .{ first.kind, first.content });
         return err;
     };
 }
@@ -208,29 +204,17 @@ test "is" {
 }
 
 test "lexer common" {
-    const expect = std.testing.expect;
-
-    var arena = std.heap.DebugAllocator(.{}).init;
-    defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{});
-    const alloc = arena.allocator();
-
     var l = try init("## hello world :)");
 
-    try doTest(alloc, &l, .title, "##");
-    try doTest(alloc, &l, .literal, "hello world ");
-    try doTest(alloc, &l, .ref, ":");
-    try doTest(alloc, &l, .link, ")");
+    try doTest(&l, .title, "##");
+    try doTest(&l, .literal, "hello world ");
+    try doTest(&l, .ref, ":");
+    try doTest(&l, .link, ")");
 
-    try expect(try l.next(alloc) == null);
+    try std.testing.expect(l.next() == null);
 }
 
 test "lexer multiline" {
-    const expect = std.testing.expect;
-
-    var arena = std.heap.DebugAllocator(.{}).init;
-    defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{});
-    const alloc = arena.allocator();
-
     var l = try init(
         \\# Title
         \\
@@ -242,21 +226,21 @@ test "lexer multiline" {
         \\#tag2
     );
 
-    try doTest(alloc, &l, .title, "#");
-    try doTest(alloc, &l, .literal, "Title");
-    try doTest(alloc, &l, .strong_delimiter, "\n\n");
-    try doTest(alloc, &l, .literal, "paragraph");
-    try doTest(alloc, &l, .weak_delimiter, "\n");
-    try doTest(alloc, &l, .title, "#");
-    try doTest(alloc, &l, .literal, "a title");
-    try doTest(alloc, &l, .weak_delimiter, "\n");
-    try doTest(alloc, &l, .literal, "a # in sentence");
-    try doTest(alloc, &l, .strong_delimiter, "\n\n");
-    try doTest(alloc, &l, .tag, "#");
-    try doTest(alloc, &l, .literal, "tag");
-    try doTest(alloc, &l, .weak_delimiter, "\n");
-    try doTest(alloc, &l, .tag, "#");
-    try doTest(alloc, &l, .literal, "tag2");
-
-    try expect(try l.next(alloc) == null);
+    try doTest(&l, .title, "#");
+    try doTest(&l, .literal, "Title");
+    try doTest(&l, .strong_delimiter, "\n\n");
+    try doTest(&l, .literal, "paragraph");
+    try doTest(&l, .weak_delimiter, "\n");
+    try doTest(&l, .title, "#");
+    try doTest(&l, .literal, "a title");
+    try doTest(&l, .weak_delimiter, "\n");
+    try doTest(&l, .literal, "a # in sentence");
+    try doTest(&l, .strong_delimiter, "\n\n");
+    try doTest(&l, .tag, "#");
+    try doTest(&l, .literal, "tag");
+    try doTest(&l, .weak_delimiter, "\n");
+    try doTest(&l, .tag, "#");
+    try doTest(&l, .literal, "tag2");
+
+    try std.testing.expect(l.next() == null);
 }
diff --git a/src/lexer/Token.zig b/src/lexer/Token.zig
new file mode 100644
index 0000000..cfe78f3
--- /dev/null
+++ b/src/lexer/Token.zig
@@ -0,0 +1,47 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+pub const Kind = enum {
+    literal,
+    weak_delimiter,
+    strong_delimiter,
+    title,
+    quote,
+    code,
+    code_block,
+    math,
+    math_block,
+    image,
+    link,
+    bold,
+    italic,
+    ref,
+    callout,
+    list_ordored,
+    list_unordored,
+    tag,
+
+    pub fn isDelimiter(self: @This()) bool {
+        return switch (self) {
+            .weak_delimiter, .strong_delimiter => true,
+            else => false,
+        };
+    }
+};
+
+pub const Loc = struct {
+    begin: usize,
+    end: usize,
+
+    pub fn get(self: @This(), content: []const u8) []const u8 {
+        return content[self.begin..self.end];
+    }
+};
+
+kind: Kind,
+content: []const u8,
+
+pub fn equals(self: @This(), kind: Kind, v: []const u8) bool {
+    if (self.kind != kind) return false;
+    return std.mem.eql(u8, self.content, v);
+}
diff --git a/src/link.zig b/src/link.zig
index 74cf08c..d5ffe3c 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -1,7 +1,7 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
 const eql = std.mem.eql;
-const Lexed = @import("lexer/Lexed.zig");
+const Token = @import("lexer/Token.zig");
 const Lexer = @import("lexer/Lexer.zig");
 const Element = @import("dom/Element.zig");
 const content = @import("content.zig");
@@ -9,12 +9,11 @@ const testing = @import("testing.zig");
 const doTest = testing.do;
 const doTestError = testing.doError;
 
-pub const Error = error{InvalidLink} || Lexer.Error || content.Error;
+pub const Error = error{InvalidLink} || Lexer.Error || content.Error || Allocator.Error;
 
 pub fn parse(alloc: Allocator, l: *Lexer) Error!Element {
     const data = try parseData(alloc, l);
     const second = data.second orelse return data.first.?;
-    defer alloc.free(second);
     var in = if (data.first) |first| first else try Element.initLitEscaped(alloc, second);
     errdefer in.deinit();
     var el = try Element.init(alloc, .content, "a");
@@ -30,11 +29,10 @@ pub const Data = struct {
 };
 
 pub fn parseData(alloc: Allocator, l: *Lexer) Error!Data {
-    var v = (try l.next(alloc)).?;
-    defer v.deinit();
+    const v = l.next().?;
     if (v.kind != .link) return Error.InvalidLink;
-    if (!eql(u8, v.content.items, "[")) {
-        const el = try Element.initLitEscaped(alloc, v.content.items);
+    if (!eql(u8, v.content, "[")) {
+        const el = try Element.initLitEscaped(alloc, v.content);
         return .{ .first = el, .second = null };
     }
     var el = Element.initEmpty(alloc);
@@ -43,9 +41,8 @@ pub fn parseData(alloc: Allocator, l: *Lexer) Error!Data {
         switch (kind) {
             .weak_delimiter, .strong_delimiter => return Error.InvalidLink,
             .link => {
-                var next = (try l.next(alloc)).?;
-                defer next.deinit();
-                if (!eql(u8, next.content.items, "](")) return Error.InvalidLink;
+                const next = l.next().?;
+                if (!eql(u8, next.content, "](")) return Error.InvalidLink;
                 break;
             },
             else => {
@@ -54,15 +51,13 @@ pub fn parseData(alloc: Allocator, l: *Lexer) Error!Data {
             },
         }
     }
-    var href = try l.next(alloc) orelse return Error.InvalidLink;
-    defer href.deinit();
+    const href = l.next() orelse return Error.InvalidLink;
     if (href.kind != .literal) return Error.InvalidLink;
-    var finisher = try l.next(alloc) orelse return Error.InvalidLink;
-    defer finisher.deinit();
-    if (finisher.kind != .link or !eql(u8, finisher.content.items, ")")) return Error.InvalidLink;
+    const finisher = l.next() orelse return Error.InvalidLink;
+    if (!finisher.equals(.link, ")")) return Error.InvalidLink;
     return .{
         .first = if (el.content.items.len > 0) el else null,
-        .second = try href.content.toOwnedSlice(alloc),
+        .second = href.content,
     };
 }
 
diff --git a/src/paragraph.zig b/src/paragraph.zig
index 0382e5d..c8c6798 100644
--- a/src/paragraph.zig
+++ b/src/paragraph.zig
@@ -1,6 +1,6 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
-const Lexed = @import("lexer/Lexed.zig");
+const Token = @import("lexer/Token.zig");
 const Lexer = @import("lexer/Lexer.zig");
 const Element = @import("dom/Element.zig");
 const parser = @import("parser.zig");
@@ -10,7 +10,7 @@ const testing = @import("testing.zig");
 const doTest = testing.do;
 const doTestError = testing.doError;
 
-pub const Error = content.Error || link.Error || Lexer.Error;
+pub const Error = content.Error || link.Error || Lexer.Error || Allocator.Error;
 
 pub fn parse(alloc: Allocator, l: *Lexer) Error!Element {
     var el = try Element.init(alloc, .content, "p");
@@ -19,8 +19,7 @@ pub fn parse(alloc: Allocator, l: *Lexer) Error!Element {
         switch (kind) {
             // because nextKind returns only an hint for the next rune
             .weak_delimiter => {
-                var v = (try l.next(alloc)).?;
-                defer v.deinit();
+                const v = l.next().?;
                 if (v.kind == .strong_delimiter) return el;
                 const next = l.nextKind() orelse return el;
                 switch (next) {
diff --git a/src/parser.zig b/src/parser.zig
index 85a757d..90f16fb 100644
--- a/src/parser.zig
+++ b/src/parser.zig
@@ -1,6 +1,6 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
-const Lexed = @import("lexer/Lexed.zig");
+const Token = @import("lexer/Token.zig");
 const Lexer = @import("lexer/Lexer.zig");
 const Element = @import("dom/Element.zig");
 const paragraph = @import("paragraph.zig");
@@ -9,7 +9,7 @@ const link = @import("link.zig");
 
 pub const Error = error{
     FeatureNotSupported,
-} || Lexer.Error || paragraph.Error || title.Error || link.Error;
+} || Lexer.Error || paragraph.Error || title.Error || link.Error || Allocator.Error;
 
 pub fn parseReader(parent: Allocator, r: *std.io.Reader) ![]const u8 {
     var l = try Lexer.initReader(parent, r);
@@ -29,16 +29,15 @@ fn gen(parent: Allocator, l: *Lexer) Error![]const u8 {
 
     var elements = try std.ArrayList(Element).initCapacity(alloc, 2);
 
-    base: while (l.nextKind()) |it| {
+    while (l.nextKind()) |it| {
         try elements.append(alloc, switch (it) {
             // block paragraph
             .literal, .bold, .italic, .code, .link => try paragraph.parse(alloc, l),
             // other blocks
             .title => try title.parse(alloc, l),
             .weak_delimiter, .strong_delimiter => {
-                var v = (try l.next(alloc)).?;
-                v.deinit();
-                continue :base;
+                _ = l.next();
+                continue;
             },
             else => return Error.FeatureNotSupported,
         });
diff --git a/src/root.zig b/src/root.zig
index f0e3f98..ac95f31 100644
--- a/src/root.zig
+++ b/src/root.zig
@@ -50,7 +50,7 @@ export fn typdown_parse(content: [*:0]const u8, code: *u8) ?[*:0]const u8 {
 }
 
 /// Parse the content.
-/// 
+///
 /// Use parse if you are not in Zig.
 pub fn parse(alloc: std.mem.Allocator, content: []const u8) Error![]const u8 {
     return parser.parse(alloc, content);
diff --git a/src/title.zig b/src/title.zig
index 352460f..c7845c6 100644
--- a/src/title.zig
+++ b/src/title.zig
@@ -1,6 +1,6 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;
-const Lexed = @import("lexer/Lexed.zig");
+const Token = @import("lexer/Token.zig");
 const Lexer = @import("lexer/Lexer.zig");
 const Element = @import("dom/Element.zig");
 const paragraph = @import("paragraph.zig");
@@ -11,9 +11,8 @@ const doTestError = testing.doError;
 pub const Error = error{InvalidTitleContent} || paragraph.Error || Lexer.Error;
 
 pub fn parse(alloc: Allocator, l: *Lexer) Error!Element {
-    var v = (try l.next(alloc)).?;
-    defer v.deinit();
-    var el = try Element.init(alloc, .content, switch (v.content.items.len) {
+    const v = l.next().?;
+    var el = try Element.init(alloc, .content, switch (v.content.len) {
         1 => "h1",
         2 => "h2",
         3 => "h3",
@@ -27,8 +26,7 @@ pub fn parse(alloc: Allocator, l: *Lexer) Error!Element {
         paragraph.Error.IllegalPlacement => return Error.InvalidTitleContent,
         else => return err,
     });
-    var next = (try l.next(alloc)) orelse return el;
-    defer next.deinit();
+    var next = l.next() orelse return el;
     if (!next.kind.isDelimiter()) return Error.InvalidTitleContent;
     return el;
 }
-- 
cgit v1.2.3