From 11cc71f3b59fa62fd2fb2cb3b84e689317fb1268 Mon Sep 17 00:00:00 2001 From: Anhgelus Morhtuuzh Date: Thu, 16 Apr 2026 20:26:44 +0200 Subject: refactor(lexer): flatten struct --- src/lexer/lexed.zig | 44 +++++++------- src/lexer/lexer.zig | 168 ++++++++++++++++++++++++++-------------------------- src/root.zig | 8 --- 3 files changed, 104 insertions(+), 116 deletions(-) diff --git a/src/lexer/lexed.zig b/src/lexer/lexed.zig index 63395bb..b7c3b2c 100644 --- a/src/lexer/lexed.zig +++ b/src/lexer/lexed.zig @@ -21,31 +21,29 @@ pub const Kind = enum { tag, }; -pub const Lexed = struct { - allocator: Allocator, - kind: Kind, - content: std.ArrayList(u8), +allocator: Allocator, +kind: Kind, +content: std.ArrayList(u8), - const Self = @This(); +const Self = @This(); - pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Lexed { - return Lexed{ - .allocator = alloc, - .kind = kind, - .content = content, - }; - } +pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Self { + return .{ + .allocator = alloc, + .kind = kind, + .content = content, + }; +} - pub fn deinit(self: *Self) void { - self.content.deinit(self.allocator); - } +pub fn deinit(self: *Self) void { + self.content.deinit(self.allocator); +} - pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) { - return self.content.clone(alloc); - } +pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) { + return self.content.clone(alloc); +} - pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool { - if (self.kind != kind) return false; - return std.mem.eql(u8, self.content.items, content); - } -}; +pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool { + if (self.kind != kind) return false; + return std.mem.eql(u8, self.content.items, content); +} diff --git a/src/lexer/lexer.zig b/src/lexer/lexer.zig index e2a4f7b..1144ebc 100644 --- a/src/lexer/lexer.zig +++ b/src/lexer/lexer.zig @@ -2,100 +2,98 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const eql = std.mem.eql; const unicode = std.unicode; -const lexed = @import("lexed.zig"); +const Lexed = @import("lexed.zig"); -pub const Lexer = struct { - iter: unicode.Utf8Iterator, - force_lit: bool = false, +iter: unicode.Utf8Iterator, +force_lit: bool = false, - const Self = @This(); +const Self = @This(); - pub const Error = error{ - InvalidUtf8, - } || Allocator.Error; +pub const Error = error{ + InvalidUtf8, +} || Allocator.Error; - pub fn init(content: []const u8) Error!Lexer { - const view = try unicode.Utf8View.init(content); - return .{ .iter = view.iterator() }; - } +pub fn init(content: []const u8) Error!Self { + const view = try unicode.Utf8View.init(content); + return .{ .iter = view.iterator() }; +} - pub fn next(self: *Self, alloc: Allocator) Error!?lexed.Lexed { - var acc = try std.ArrayList(u8).initCapacity(alloc, 2); - errdefer acc.deinit(alloc); - - var current_kind: ?lexed.Kind = null; - while (self.iter.nextCodepointSlice()) |rune| { - if (eql(u8, rune, "\r")) continue; - var override_if: ?[]const u8 = null; - // escape chars - if (eql(u8, rune, "\\")) { - self.force_lit = true; - current_kind = .literal; - } else { - self.force_lit = false; - const res = self.getCurrentKind(current_kind, rune, acc.items); - current_kind = res.kind; - override_if = res.override_if; - try acc.appendSlice(alloc, rune); - } - // conds here to avoid creating complex condition in while - const next_rune = self.iter.peek(1); - if (next_rune.len > 0) { - if (self.getCurrentKind(current_kind, next_rune, acc.items).kind != current_kind.? and - (override_if == null or !eql(u8, override_if.?, next_rune))) - { - if (!requiresSpace(current_kind.?)) break; - if (eql(u8, next_rune, " ")) { - // consume next space - _ = self.iter.nextCodepoint(); - break; - } - current_kind = switch (current_kind.?) { - .title => if (acc.items.len == 1) .tag else .literal, - else => .literal, - }; +pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { + var acc = try std.ArrayList(u8).initCapacity(alloc, 2); + errdefer acc.deinit(alloc); + + var current_kind: ?Lexed.Kind = null; + while (self.iter.nextCodepointSlice()) |rune| { + if (eql(u8, rune, "\r")) continue; + var override_if: ?[]const u8 = null; + // escape chars + if (eql(u8, rune, "\\")) { + self.force_lit = true; + current_kind = .literal; + } else { + self.force_lit = false; + const res = self.getCurrentKind(current_kind, rune, acc.items); + current_kind = res.kind; + override_if = res.override_if; + try acc.appendSlice(alloc, rune); + } + // conds here to avoid creating complex condition in while + const next_rune = self.iter.peek(1); + if (next_rune.len > 0) { + if (self.getCurrentKind(current_kind, next_rune, acc.items).kind != current_kind.? and + (override_if == null or !eql(u8, override_if.?, next_rune))) + { + if (!requiresSpace(current_kind.?)) break; + if (eql(u8, next_rune, " ")) { + // consume next space + _ = self.iter.nextCodepoint(); + break; } + current_kind = switch (current_kind.?) { + .title => if (acc.items.len == 1) .tag else .literal, + else => .literal, + }; } } - const kind = current_kind orelse { - acc.deinit(alloc); - return null; - }; - return lexed.Lexed.init(alloc, kind, acc); } - - const kindRes = struct { - kind: lexed.Kind, - override_if: ?[]const u8 = null, - - fn equals(self: @This(), v: @This()) bool { - if (self.kind != v.kind) return false; - if (self.override_if == null and v.override_if != null) return false; - if (self.override_if != null and v.override_if == null) return false; - if (self.override_if) |it| return eql(u8, it, v.override_if.?); - return true; - } + const kind = current_kind orelse { + acc.deinit(alloc); + return null; }; + return Lexed.init(alloc, kind, acc); +} - fn getCurrentKind(self: *Self, before: ?lexed.Kind, rune: []const u8, acc: []const u8) kindRes { - if (self.force_lit) return .{ .kind = .literal }; - if (eql(u8, rune, "\n")) return .{ .kind = .delimiter }; - if (eql(u8, rune, "*")) return .{ .kind = .bold }; - if (eql(u8, rune, "_")) return .{ .kind = .italic }; - if (eql(u8, rune, ">")) return .{ .kind = .quote }; - if (eql(u8, rune, "-")) return .{ .kind = .list_unordored }; - if (eql(u8, rune, ".")) return .{ .kind = .list_ordored }; - if (eql(u8, rune, "!")) return .{ .kind = .image }; - if (eql(u8, rune, "<")) return .{ .kind = .ref }; - if (is('#', 6, rune, acc)) return .{ .kind = .title }; - if (isIn(links, rune, acc, before, .link)) return .{ .kind = .link }; - if (isOneOrThree(":", rune, acc, .ref, .callout)) |it| return it; - if (isOneOrThree("$", rune, acc, .math, .math_block)) |it| return it; - if (isOneOrThree("`", rune, acc, .code, .code_block)) |it| return it; - return .{ .kind = .literal }; +const kindRes = struct { + kind: Lexed.Kind, + override_if: ?[]const u8 = null, + + fn equals(self: @This(), v: @This()) bool { + if (self.kind != v.kind) return false; + if (self.override_if == null and v.override_if != null) return false; + if (self.override_if != null and v.override_if == null) return false; + if (self.override_if) |it| return eql(u8, it, v.override_if.?); + return true; } }; +fn getCurrentKind(self: *Self, before: ?Lexed.Kind, rune: []const u8, acc: []const u8) kindRes { + if (self.force_lit) return .{ .kind = .literal }; + if (eql(u8, rune, "\n")) return .{ .kind = .delimiter }; + if (eql(u8, rune, "*")) return .{ .kind = .bold }; + if (eql(u8, rune, "_")) return .{ .kind = .italic }; + if (eql(u8, rune, ">")) return .{ .kind = .quote }; + if (eql(u8, rune, "-")) return .{ .kind = .list_unordored }; + if (eql(u8, rune, ".")) return .{ .kind = .list_ordored }; + if (eql(u8, rune, "!")) return .{ .kind = .image }; + if (eql(u8, rune, "<")) return .{ .kind = .ref }; + if (is('#', 6, rune, acc)) return .{ .kind = .title }; + if (isIn(links, rune, acc, before, .link)) return .{ .kind = .link }; + if (isOneOrThree(":", rune, acc, .ref, .callout)) |it| return it; + if (isOneOrThree("$", rune, acc, .math, .math_block)) |it| return it; + if (isOneOrThree("`", rune, acc, .code, .code_block)) |it| return it; + return .{ .kind = .literal }; +} + fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool { if (acc.len >= maxLen) return false; for (0..acc.len) |i| if (acc[i] != v) return false; @@ -104,7 +102,7 @@ fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool { const links = &[_][]const u8{ "[", "](", ")" }; -fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?lexed.Kind, now: lexed.Kind) bool { +fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Lexed.Kind, now: Lexed.Kind) bool { var acc = p; if (before) |b| { if (now != b) acc = &[_]u8{}; @@ -117,7 +115,7 @@ fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?lexed return false; } -fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: lexed.Kind, three: lexed.Kind) ?Lexer.kindRes { +fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Lexed.Kind, three: Lexed.Kind) ?kindRes { if (!eql(u8, rune, op)) return null; var acc = p; if (acc.len < op.len or !eql(u8, acc[0..op.len], op)) acc = &[_]u8{}; @@ -142,7 +140,7 @@ fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: lexed.Kind }; } -fn requiresSpace(k: lexed.Kind) bool { +fn requiresSpace(k: Lexed.Kind) bool { return switch (k) { .title => true, .list_ordored => true, @@ -151,7 +149,7 @@ fn requiresSpace(k: lexed.Kind) bool { }; } -fn doTest(alloc: Allocator, l: *Lexer, k: lexed.Kind, v: []const u8) !void { +fn doTest(alloc: Allocator, l: *Self, k: Lexed.Kind, v: []const u8) !void { var first = (try l.next(alloc)).?; defer first.deinit(); std.testing.expect(first.equals(k, v)) catch |err| { @@ -182,7 +180,7 @@ test "lexer common" { defer _ = arena.deinit(); const alloc = arena.allocator(); - var l = try Lexer.init("# hello world :)"); + var l = try init("# hello world :)"); try doTest(alloc, &l, .title, "#"); try doTest(alloc, &l, .literal, "hello world "); diff --git a/src/root.zig b/src/root.zig index 00ba5a5..2bcf565 100644 --- a/src/root.zig +++ b/src/root.zig @@ -14,14 +14,6 @@ pub fn bufferedPrint() !void { try stdout.flush(); // Don't forget to flush! } -pub fn add(a: i32, b: i32) i32 { - return a + b; -} - -test "basic add functionality" { - try std.testing.expect(add(3, 7) == 10); -} - test { std.testing.refAllDeclsRecursive(@This()); } -- cgit v1.2.3