From e7fa254387e450154f03b2d1bdef361a0adb80d1 Mon Sep 17 00:00:00 2001 From: Anhgelus Morhtuuzh Date: Fri, 24 Apr 2026 17:40:33 +0200 Subject: perf(lexer): do not alloc --- src/content.zig | 14 +++---- src/lexer/Lexed.zig | 57 --------------------------- src/lexer/Lexer.zig | 108 ++++++++++++++++++++++------------------------------ src/lexer/Token.zig | 47 +++++++++++++++++++++++ src/link.zig | 27 ++++++------- src/paragraph.zig | 7 ++-- src/parser.zig | 11 +++--- src/root.zig | 2 +- src/title.zig | 10 ++--- 9 files changed, 123 insertions(+), 160 deletions(-) delete mode 100644 src/lexer/Lexed.zig create mode 100644 src/lexer/Token.zig (limited to 'src') diff --git a/src/content.zig b/src/content.zig index 01d933b..e91576d 100644 --- a/src/content.zig +++ b/src/content.zig @@ -1,6 +1,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const Lexed = @import("lexer/Lexed.zig"); +const Token = @import("lexer/Token.zig"); const Lexer = @import("lexer/Lexer.zig"); const Element = @import("dom/Element.zig"); const parser = @import("parser.zig"); @@ -9,16 +9,15 @@ const testing = @import("testing.zig"); const doTest = testing.do; const doTestError = testing.doError; -pub const Error = error{ ModifierNotClosed, IllegalPlacement } || Lexer.Error; +pub const Error = error{ ModifierNotClosed, IllegalPlacement } || Lexer.Error || Allocator.Error; pub fn parse(alloc: Allocator, l: *Lexer) Error!Element { var content = Element.initEmpty(alloc); errdefer content.deinit(); - var v = (try l.next(alloc)).?; - defer v.deinit(); + const v = l.next().?; switch (v.kind) { .literal => { - const el = try Element.initLitEscaped(alloc, v.content.items); + const el = try Element.initLitEscaped(alloc, v.content); try content.appendContent(el); }, .bold => try content.appendContent(try parseModifier(alloc, l, .bold, "b")), @@ -29,14 +28,13 @@ pub fn parse(alloc: Allocator, l: *Lexer) Error!Element { return content; } -fn parseModifier(alloc: Allocator, l: *Lexer, knd: Lexed.Kind, tag: []const u8) Error!Element { +fn parseModifier(alloc: Allocator, l: *Lexer, knd: Token.Kind, tag: []const u8) Error!Element { var el = try Element.init(alloc, .content, tag); errdefer el.deinit(); while (l.nextKind()) |it| { if (it == knd) { // consuming the finisher - var v = (try l.next(alloc)).?; - v.deinit(); + _ = l.next(); return el; } if (it.isDelimiter()) return Error.ModifierNotClosed; diff --git a/src/lexer/Lexed.zig b/src/lexer/Lexed.zig deleted file mode 100644 index 46fd552..0000000 --- a/src/lexer/Lexed.zig +++ /dev/null @@ -1,57 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; - -pub const Kind = enum { - literal, - weak_delimiter, - strong_delimiter, - title, - quote, - code, - code_block, - math, - math_block, - image, - link, - bold, - italic, - ref, - callout, - list_ordored, - list_unordored, - tag, - - pub fn isDelimiter(self: @This()) bool { - return switch (self) { - .weak_delimiter, .strong_delimiter => true, - else => false, - }; - } -}; - -allocator: Allocator, -kind: Kind, -content: std.ArrayList(u8), - -const Self = @This(); - -pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Self { - return .{ - .allocator = alloc, - .kind = kind, - .content = content, - }; -} - -pub fn deinit(self: *Self) void { - self.content.deinit(self.allocator); -} - -pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) { - return self.content.clone(alloc); -} - -pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool { - if (self.kind != kind) return false; - return std.mem.eql(u8, self.content.items, content); -} diff --git a/src/lexer/Lexer.zig b/src/lexer/Lexer.zig index 8b3893d..fa45b5a 100644 --- a/src/lexer/Lexer.zig +++ b/src/lexer/Lexer.zig @@ -2,8 +2,9 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const eql = std.mem.eql; const unicode = std.unicode; -const Lexed = @import("Lexed.zig"); +const Token = @import("Token.zig"); +content: []const u8, iter: unicode.Utf8Iterator, force_lit: bool = false, @@ -11,11 +12,11 @@ const Self = @This(); pub const Error = error{ InvalidUtf8, -} || Allocator.Error; +}; pub fn init(content: []const u8) error{InvalidUtf8}!Self { const view = try unicode.Utf8View.init(content); - return .{ .iter = view.iterator() }; + return .{ .content = content, .iter = view.iterator() }; } // Must free bytes in iter. @@ -25,17 +26,16 @@ pub fn initReader(alloc: Allocator, r: *std.io.Reader) !Self { return init(try acc.toOwnedSlice(alloc)); } -pub fn nextKind(self: *Self) ?Lexed.Kind { +pub fn nextKind(self: *Self) ?Token.Kind { const next_rune = self.iter.peek(1); if (next_rune.len == 0) return null; return self.getCurrentKind(null, next_rune, &[0]u8{}).kind; } -pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { - var acc = try std.ArrayList(u8).initCapacity(alloc, 2); - errdefer acc.deinit(alloc); - - var current_kind: ?Lexed.Kind = null; +pub fn next(self: *Self) ?Token { + const beg = self.iter.i; + var end = self.iter.i; + var current_kind: ?Token.Kind = null; while (self.iter.nextCodepointSlice()) |rune| { if (eql(u8, rune, "\r")) continue; var override_if: ?[]const u8 = null; @@ -45,14 +45,14 @@ pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { current_kind = .literal; } else { self.force_lit = false; - const res = self.getCurrentKind(current_kind, rune, acc.items); + const res = self.getCurrentKind(current_kind, rune, self.content[beg..end]); current_kind = res.kind; override_if = res.override_if; - try acc.appendSlice(alloc, rune); + end = self.iter.i; } // conds here to avoid creating complex condition in while const next_rune = self.iter.peek(1); - const next_kind = self.getCurrentKind(current_kind, next_rune, acc.items).kind; + const next_kind = self.getCurrentKind(current_kind, next_rune, self.content[beg..end]).kind; if (requiresSpace(current_kind.?) and next_kind != current_kind.?) { if (eql(u8, next_rune, " ")) { // consume next space @@ -60,7 +60,7 @@ pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { break; } current_kind = switch (current_kind.?) { - .title => if (acc.items.len == 1) .tag else .literal, + .title => if (end - beg == 1) .tag else .literal, else => .literal, }; } @@ -69,15 +69,12 @@ pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { (override_if == null or !eql(u8, override_if.?, next_rune))) break; } - const kind = current_kind orelse { - acc.deinit(alloc); - return null; - }; - return .init(alloc, kind, acc); + const kind = current_kind orelse return null; + return .{ .kind = kind, .content = self.content[beg..end] }; } const kindRes = struct { - kind: Lexed.Kind, + kind: Token.Kind, override_if: ?[]const u8 = null, fn equals(self: @This(), v: @This()) bool { @@ -89,11 +86,11 @@ const kindRes = struct { } }; -fn requiresDelimiter(before: ?Lexed.Kind, knd: Lexed.Kind) Lexed.Kind { +fn requiresDelimiter(before: ?Token.Kind, knd: Token.Kind) Token.Kind { return if (before == null or before.?.isDelimiter() or before.? == knd) knd else .literal; } -fn getCurrentKind(self: *Self, before: ?Lexed.Kind, rune: []const u8, acc: []const u8) kindRes { +fn getCurrentKind(self: *Self, before: ?Token.Kind, rune: []const u8, acc: []const u8) kindRes { if (self.force_lit) return .{ .kind = .literal }; if (eql(u8, rune, "\n")) return .{ .kind = if (before == .weak_delimiter) .strong_delimiter else .weak_delimiter, @@ -122,7 +119,7 @@ fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool { const links = &[_][]const u8{ "[", "](", ")" }; -fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Lexed.Kind, now: Lexed.Kind) bool { +fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Token.Kind, now: Token.Kind) bool { var acc = p; if (before) |b| { if (now != b) acc = &[_]u8{}; @@ -135,7 +132,7 @@ fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Lexed return false; } -fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Lexed.Kind, three: Lexed.Kind) ?kindRes { +fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Token.Kind, three: Token.Kind) ?kindRes { if (!eql(u8, rune, op)) return null; var acc = p; if (acc.len < op.len or !eql(u8, acc[0..op.len], op)) acc = &[_]u8{}; @@ -160,18 +157,17 @@ fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Lexed.Kind }; } -fn requiresSpace(k: Lexed.Kind) bool { +fn requiresSpace(k: Token.Kind) bool { return switch (k) { .title, .list_ordored, .list_unordored => true, else => false, }; } -fn doTest(alloc: Allocator, l: *Self, k: Lexed.Kind, v: []const u8) !void { - var first = (try l.next(alloc)).?; - defer first.deinit(); +fn doTest(l: *Self, k: Token.Kind, v: []const u8) !void { + var first = l.next().?; std.testing.expect(first.equals(k, v)) catch |err| { - std.debug.print("{}({s})\n", .{ first.kind, first.content.items }); + std.debug.print("{}({s})\n", .{ first.kind, first.content }); return err; }; } @@ -208,29 +204,17 @@ test "is" { } test "lexer common" { - const expect = std.testing.expect; - - var arena = std.heap.DebugAllocator(.{}).init; - defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{}); - const alloc = arena.allocator(); - var l = try init("## hello world :)"); - try doTest(alloc, &l, .title, "##"); - try doTest(alloc, &l, .literal, "hello world "); - try doTest(alloc, &l, .ref, ":"); - try doTest(alloc, &l, .link, ")"); + try doTest(&l, .title, "##"); + try doTest(&l, .literal, "hello world "); + try doTest(&l, .ref, ":"); + try doTest(&l, .link, ")"); - try expect(try l.next(alloc) == null); + try std.testing.expect(l.next() == null); } test "lexer multiline" { - const expect = std.testing.expect; - - var arena = std.heap.DebugAllocator(.{}).init; - defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{}); - const alloc = arena.allocator(); - var l = try init( \\# Title \\ @@ -242,21 +226,21 @@ test "lexer multiline" { \\#tag2 ); - try doTest(alloc, &l, .title, "#"); - try doTest(alloc, &l, .literal, "Title"); - try doTest(alloc, &l, .strong_delimiter, "\n\n"); - try doTest(alloc, &l, .literal, "paragraph"); - try doTest(alloc, &l, .weak_delimiter, "\n"); - try doTest(alloc, &l, .title, "#"); - try doTest(alloc, &l, .literal, "a title"); - try doTest(alloc, &l, .weak_delimiter, "\n"); - try doTest(alloc, &l, .literal, "a # in sentence"); - try doTest(alloc, &l, .strong_delimiter, "\n\n"); - try doTest(alloc, &l, .tag, "#"); - try doTest(alloc, &l, .literal, "tag"); - try doTest(alloc, &l, .weak_delimiter, "\n"); - try doTest(alloc, &l, .tag, "#"); - try doTest(alloc, &l, .literal, "tag2"); - - try expect(try l.next(alloc) == null); + try doTest(&l, .title, "#"); + try doTest(&l, .literal, "Title"); + try doTest(&l, .strong_delimiter, "\n\n"); + try doTest(&l, .literal, "paragraph"); + try doTest(&l, .weak_delimiter, "\n"); + try doTest(&l, .title, "#"); + try doTest(&l, .literal, "a title"); + try doTest(&l, .weak_delimiter, "\n"); + try doTest(&l, .literal, "a # in sentence"); + try doTest(&l, .strong_delimiter, "\n\n"); + try doTest(&l, .tag, "#"); + try doTest(&l, .literal, "tag"); + try doTest(&l, .weak_delimiter, "\n"); + try doTest(&l, .tag, "#"); + try doTest(&l, .literal, "tag2"); + + try std.testing.expect(l.next() == null); } diff --git a/src/lexer/Token.zig b/src/lexer/Token.zig new file mode 100644 index 0000000..cfe78f3 --- /dev/null +++ b/src/lexer/Token.zig @@ -0,0 +1,47 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub const Kind = enum { + literal, + weak_delimiter, + strong_delimiter, + title, + quote, + code, + code_block, + math, + math_block, + image, + link, + bold, + italic, + ref, + callout, + list_ordored, + list_unordored, + tag, + + pub fn isDelimiter(self: @This()) bool { + return switch (self) { + .weak_delimiter, .strong_delimiter => true, + else => false, + }; + } +}; + +pub const Loc = struct { + begin: usize, + end: usize, + + pub fn get(self: @This(), content: []const u8) []const u8 { + return content[self.begin..self.end]; + } +}; + +kind: Kind, +content: []const u8, + +pub fn equals(self: @This(), kind: Kind, v: []const u8) bool { + if (self.kind != kind) return false; + return std.mem.eql(u8, self.content, v); +} diff --git a/src/link.zig b/src/link.zig index 74cf08c..d5ffe3c 100644 --- a/src/link.zig +++ b/src/link.zig @@ -1,7 +1,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const eql = std.mem.eql; -const Lexed = @import("lexer/Lexed.zig"); +const Token = @import("lexer/Token.zig"); const Lexer = @import("lexer/Lexer.zig"); const Element = @import("dom/Element.zig"); const content = @import("content.zig"); @@ -9,12 +9,11 @@ const testing = @import("testing.zig"); const doTest = testing.do; const doTestError = testing.doError; -pub const Error = error{InvalidLink} || Lexer.Error || content.Error; +pub const Error = error{InvalidLink} || Lexer.Error || content.Error || Allocator.Error; pub fn parse(alloc: Allocator, l: *Lexer) Error!Element { const data = try parseData(alloc, l); const second = data.second orelse return data.first.?; - defer alloc.free(second); var in = if (data.first) |first| first else try Element.initLitEscaped(alloc, second); errdefer in.deinit(); var el = try Element.init(alloc, .content, "a"); @@ -30,11 +29,10 @@ pub const Data = struct { }; pub fn parseData(alloc: Allocator, l: *Lexer) Error!Data { - var v = (try l.next(alloc)).?; - defer v.deinit(); + const v = l.next().?; if (v.kind != .link) return Error.InvalidLink; - if (!eql(u8, v.content.items, "[")) { - const el = try Element.initLitEscaped(alloc, v.content.items); + if (!eql(u8, v.content, "[")) { + const el = try Element.initLitEscaped(alloc, v.content); return .{ .first = el, .second = null }; } var el = Element.initEmpty(alloc); @@ -43,9 +41,8 @@ pub fn parseData(alloc: Allocator, l: *Lexer) Error!Data { switch (kind) { .weak_delimiter, .strong_delimiter => return Error.InvalidLink, .link => { - var next = (try l.next(alloc)).?; - defer next.deinit(); - if (!eql(u8, next.content.items, "](")) return Error.InvalidLink; + const next = l.next().?; + if (!eql(u8, next.content, "](")) return Error.InvalidLink; break; }, else => { @@ -54,15 +51,13 @@ pub fn parseData(alloc: Allocator, l: *Lexer) Error!Data { }, } } - var href = try l.next(alloc) orelse return Error.InvalidLink; - defer href.deinit(); + const href = l.next() orelse return Error.InvalidLink; if (href.kind != .literal) return Error.InvalidLink; - var finisher = try l.next(alloc) orelse return Error.InvalidLink; - defer finisher.deinit(); - if (finisher.kind != .link or !eql(u8, finisher.content.items, ")")) return Error.InvalidLink; + const finisher = l.next() orelse return Error.InvalidLink; + if (!finisher.equals(.link, ")")) return Error.InvalidLink; return .{ .first = if (el.content.items.len > 0) el else null, - .second = try href.content.toOwnedSlice(alloc), + .second = href.content, }; } diff --git a/src/paragraph.zig b/src/paragraph.zig index 0382e5d..c8c6798 100644 --- a/src/paragraph.zig +++ b/src/paragraph.zig @@ -1,6 +1,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const Lexed = @import("lexer/Lexed.zig"); +const Token = @import("lexer/Token.zig"); const Lexer = @import("lexer/Lexer.zig"); const Element = @import("dom/Element.zig"); const parser = @import("parser.zig"); @@ -10,7 +10,7 @@ const testing = @import("testing.zig"); const doTest = testing.do; const doTestError = testing.doError; -pub const Error = content.Error || link.Error || Lexer.Error; +pub const Error = content.Error || link.Error || Lexer.Error || Allocator.Error; pub fn parse(alloc: Allocator, l: *Lexer) Error!Element { var el = try Element.init(alloc, .content, "p"); @@ -19,8 +19,7 @@ pub fn parse(alloc: Allocator, l: *Lexer) Error!Element { switch (kind) { // because nextKind returns only an hint for the next rune .weak_delimiter => { - var v = (try l.next(alloc)).?; - defer v.deinit(); + const v = l.next().?; if (v.kind == .strong_delimiter) return el; const next = l.nextKind() orelse return el; switch (next) { diff --git a/src/parser.zig b/src/parser.zig index 85a757d..90f16fb 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -1,6 +1,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const Lexed = @import("lexer/Lexed.zig"); +const Token = @import("lexer/Token.zig"); const Lexer = @import("lexer/Lexer.zig"); const Element = @import("dom/Element.zig"); const paragraph = @import("paragraph.zig"); @@ -9,7 +9,7 @@ const link = @import("link.zig"); pub const Error = error{ FeatureNotSupported, -} || Lexer.Error || paragraph.Error || title.Error || link.Error; +} || Lexer.Error || paragraph.Error || title.Error || link.Error || Allocator.Error; pub fn parseReader(parent: Allocator, r: *std.io.Reader) ![]const u8 { var l = try Lexer.initReader(parent, r); @@ -29,16 +29,15 @@ fn gen(parent: Allocator, l: *Lexer) Error![]const u8 { var elements = try std.ArrayList(Element).initCapacity(alloc, 2); - base: while (l.nextKind()) |it| { + while (l.nextKind()) |it| { try elements.append(alloc, switch (it) { // block paragraph .literal, .bold, .italic, .code, .link => try paragraph.parse(alloc, l), // other blocks .title => try title.parse(alloc, l), .weak_delimiter, .strong_delimiter => { - var v = (try l.next(alloc)).?; - v.deinit(); - continue :base; + _ = l.next(); + continue; }, else => return Error.FeatureNotSupported, }); diff --git a/src/root.zig b/src/root.zig index f0e3f98..ac95f31 100644 --- a/src/root.zig +++ b/src/root.zig @@ -50,7 +50,7 @@ export fn typdown_parse(content: [*:0]const u8, code: *u8) ?[*:0]const u8 { } /// Parse the content. -/// +/// /// Use parse if you are not in Zig. pub fn parse(alloc: std.mem.Allocator, content: []const u8) Error![]const u8 { return parser.parse(alloc, content); diff --git a/src/title.zig b/src/title.zig index 352460f..c7845c6 100644 --- a/src/title.zig +++ b/src/title.zig @@ -1,6 +1,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -const Lexed = @import("lexer/Lexed.zig"); +const Token = @import("lexer/Token.zig"); const Lexer = @import("lexer/Lexer.zig"); const Element = @import("dom/Element.zig"); const paragraph = @import("paragraph.zig"); @@ -11,9 +11,8 @@ const doTestError = testing.doError; pub const Error = error{InvalidTitleContent} || paragraph.Error || Lexer.Error; pub fn parse(alloc: Allocator, l: *Lexer) Error!Element { - var v = (try l.next(alloc)).?; - defer v.deinit(); - var el = try Element.init(alloc, .content, switch (v.content.items.len) { + const v = l.next().?; + var el = try Element.init(alloc, .content, switch (v.content.len) { 1 => "h1", 2 => "h2", 3 => "h3", @@ -27,8 +26,7 @@ pub fn parse(alloc: Allocator, l: *Lexer) Error!Element { paragraph.Error.IllegalPlacement => return Error.InvalidTitleContent, else => return err, }); - var next = (try l.next(alloc)) orelse return el; - defer next.deinit(); + var next = l.next() orelse return el; if (!next.kind.isDelimiter()) return Error.InvalidTitleContent; return el; } -- cgit v1.2.3