diff options
| author | Anhgelus Morhtuuzh <william@herges.fr> | 2026-04-16 22:33:44 +0200 |
|---|---|---|
| committer | Anhgelus Morhtuuzh <william@herges.fr> | 2026-04-16 22:33:44 +0200 |
| commit | 6df64050b1442a5f3a0f566cd816639ac1fd298f (patch) | |
| tree | 81de54495e09c501d7d2839828523eaabb7a0569 /src/lexer/lexer.zig | |
| parent | 11cc71f3b59fa62fd2fb2cb3b84e689317fb1268 (diff) | |
feat(dom): element generator
Diffstat (limited to 'src/lexer/lexer.zig')
| -rw-r--r-- | src/lexer/lexer.zig | 191 |
1 files changed, 0 insertions, 191 deletions
diff --git a/src/lexer/lexer.zig b/src/lexer/lexer.zig deleted file mode 100644 index 1144ebc..0000000 --- a/src/lexer/lexer.zig +++ /dev/null @@ -1,191 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const eql = std.mem.eql; -const unicode = std.unicode; -const Lexed = @import("lexed.zig"); - -iter: unicode.Utf8Iterator, -force_lit: bool = false, - -const Self = @This(); - -pub const Error = error{ - InvalidUtf8, -} || Allocator.Error; - -pub fn init(content: []const u8) Error!Self { - const view = try unicode.Utf8View.init(content); - return .{ .iter = view.iterator() }; -} - -pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { - var acc = try std.ArrayList(u8).initCapacity(alloc, 2); - errdefer acc.deinit(alloc); - - var current_kind: ?Lexed.Kind = null; - while (self.iter.nextCodepointSlice()) |rune| { - if (eql(u8, rune, "\r")) continue; - var override_if: ?[]const u8 = null; - // escape chars - if (eql(u8, rune, "\\")) { - self.force_lit = true; - current_kind = .literal; - } else { - self.force_lit = false; - const res = self.getCurrentKind(current_kind, rune, acc.items); - current_kind = res.kind; - override_if = res.override_if; - try acc.appendSlice(alloc, rune); - } - // conds here to avoid creating complex condition in while - const next_rune = self.iter.peek(1); - if (next_rune.len > 0) { - if (self.getCurrentKind(current_kind, next_rune, acc.items).kind != current_kind.? and - (override_if == null or !eql(u8, override_if.?, next_rune))) - { - if (!requiresSpace(current_kind.?)) break; - if (eql(u8, next_rune, " ")) { - // consume next space - _ = self.iter.nextCodepoint(); - break; - } - current_kind = switch (current_kind.?) { - .title => if (acc.items.len == 1) .tag else .literal, - else => .literal, - }; - } - } - } - const kind = current_kind orelse { - acc.deinit(alloc); - return null; - }; - return Lexed.init(alloc, kind, acc); -} - -const kindRes = struct { - kind: Lexed.Kind, - override_if: ?[]const u8 = null, - - fn equals(self: @This(), v: @This()) bool { - if (self.kind != v.kind) return false; - if (self.override_if == null and v.override_if != null) return false; - if (self.override_if != null and v.override_if == null) return false; - if (self.override_if) |it| return eql(u8, it, v.override_if.?); - return true; - } -}; - -fn getCurrentKind(self: *Self, before: ?Lexed.Kind, rune: []const u8, acc: []const u8) kindRes { - if (self.force_lit) return .{ .kind = .literal }; - if (eql(u8, rune, "\n")) return .{ .kind = .delimiter }; - if (eql(u8, rune, "*")) return .{ .kind = .bold }; - if (eql(u8, rune, "_")) return .{ .kind = .italic }; - if (eql(u8, rune, ">")) return .{ .kind = .quote }; - if (eql(u8, rune, "-")) return .{ .kind = .list_unordored }; - if (eql(u8, rune, ".")) return .{ .kind = .list_ordored }; - if (eql(u8, rune, "!")) return .{ .kind = .image }; - if (eql(u8, rune, "<")) return .{ .kind = .ref }; - if (is('#', 6, rune, acc)) return .{ .kind = .title }; - if (isIn(links, rune, acc, before, .link)) return .{ .kind = .link }; - if (isOneOrThree(":", rune, acc, .ref, .callout)) |it| return it; - if (isOneOrThree("$", rune, acc, .math, .math_block)) |it| return it; - if (isOneOrThree("`", rune, acc, .code, .code_block)) |it| return it; - return .{ .kind = .literal }; -} - -fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool { - if (acc.len >= maxLen) return false; - for (0..acc.len) |i| if (acc[i] != v) return false; - return eql(u8, rune, &[_]u8{v}); -} - -const links = &[_][]const u8{ "[", "](", ")" }; - -fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Lexed.Kind, now: Lexed.Kind) bool { - var acc = p; - if (before) |b| { - if (now != b) acc = &[_]u8{}; - } - for (ops) |op| { - const ln = acc.len + rune.len; - if (op.len >= ln and eql(u8, acc, op[0..acc.len]) and eql(u8, rune, op[acc.len..ln])) - return true; - } - return false; -} - -fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Lexed.Kind, three: Lexed.Kind) ?kindRes { - if (!eql(u8, rune, op)) return null; - var acc = p; - if (acc.len < op.len or !eql(u8, acc[0..op.len], op)) acc = &[_]u8{}; - - var iter = (unicode.Utf8View.init(acc) catch unreachable).iterator(); - var ln: usize = 1; // number of runes - while (iter.nextCodepointSlice()) |it| : (ln += 1) { - if (!eql(u8, it, op)) return null; - } - - return switch (ln) { - 1 => .{ - .kind = one, - .override_if = op, - }, - 2 => .{ - .kind = .literal, - .override_if = op, - }, - 3 => .{ .kind = three }, - else => unreachable, - }; -} - -fn requiresSpace(k: Lexed.Kind) bool { - return switch (k) { - .title => true, - .list_ordored => true, - .list_unordored => true, - else => false, - }; -} - -fn doTest(alloc: Allocator, l: *Self, k: Lexed.Kind, v: []const u8) !void { - var first = (try l.next(alloc)).?; - defer first.deinit(); - std.testing.expect(first.equals(k, v)) catch |err| { - std.debug.print("{}({s})\n", .{ first.kind, first.content.items }); - return err; - }; -} - -test "one or three" { - const expect = std.testing.expect; - - // valid - try expect(isOneOrThree(":", ":", "", .ref, .callout).?.equals(.{ .kind = .ref, .override_if = ":" })); - try expect(isOneOrThree(":", ":", ":", .ref, .callout).?.equals(.{ .kind = .literal, .override_if = ":" })); - try expect(isOneOrThree(":", ":", "::", .ref, .callout).?.equals(.{ .kind = .callout })); - try expect(isOneOrThree(":", ":", "a", .ref, .callout).?.equals(.{ .kind = .ref, .override_if = ":" })); - - // invalid - try expect(isOneOrThree(":", "a", "", .ref, .callout) == null); - try expect(isOneOrThree(":", "a", "b", .ref, .callout) == null); - try expect(isOneOrThree(":", "a", ":", .ref, .callout) == null); -} - -test "lexer common" { - const expect = std.testing.expect; - - var arena = std.heap.DebugAllocator(.{}).init; - defer _ = arena.deinit(); - const alloc = arena.allocator(); - - var l = try init("# hello world :)"); - - try doTest(alloc, &l, .title, "#"); - try doTest(alloc, &l, .literal, "hello world "); - try doTest(alloc, &l, .ref, ":"); - try doTest(alloc, &l, .link, ")"); - - try expect(try l.next(alloc) == null); -} |
