diff options
| author | Anhgelus Morhtuuzh <william@herges.fr> | 2026-04-18 18:19:30 +0200 |
|---|---|---|
| committer | Anhgelus Morhtuuzh <william@herges.fr> | 2026-04-18 18:19:30 +0200 |
| commit | 389691f4d11bf86c8be75927a2fbc01cff9c7059 (patch) | |
| tree | 42ea2d556b415daa361d73f1ffca2d521235f3fd /src | |
| parent | c4f41ad2502567f641652eb745707d2c2817973b (diff) | |
feat(ast): parse content
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.zig | 84 | ||||
| -rw-r--r-- | src/dom/Element.zig | 9 | ||||
| -rw-r--r-- | src/lexer/Lexer.zig | 8 | ||||
| -rw-r--r-- | src/root.zig | 3 |
4 files changed, 101 insertions, 3 deletions
diff --git a/src/ast.zig b/src/ast.zig new file mode 100644 index 0000000..94bb2c5 --- /dev/null +++ b/src/ast.zig @@ -0,0 +1,84 @@ +const std = @import("std"); +const Lexed = @import("lexer/Lexed.zig"); +const Lexer = @import("lexer/Lexer.zig"); +const Element = @import("dom/Element.zig"); +const Allocator = std.mem.Allocator; + +pub const Error = error{ + InvalidSequence, + UnclosedModifier, + FeatureNotSupported, +} || Lexer.Error; + +pub fn parse(parent: Allocator, content: []const u8) Error![]const u8 { + var arena = std.heap.ArenaAllocator.init(parent); + defer arena.deinit(); + const alloc = arena.allocator(); + + var elements = try std.ArrayList(Element).initCapacity(alloc, 2); + + var l = try Lexer.init(content); + while (l.nextKind()) |it| { + switch (it) { + .literal, .bold, .italic, .code => try elements.append(alloc, try parseContent(alloc, &l)), + else => return Error.FeatureNotSupported, + } + } + + var res = try std.ArrayList(u8).initCapacity(parent, elements.items.len); + for (elements.items) |it| { + var v = it; + try res.appendSlice(parent, try v.render(alloc)); + } + return res.toOwnedSlice(parent); +} + +fn parseContent(alloc: Allocator, l: *Lexer) Error!Element { + var content = Element.initEmpty(alloc); + const v = (try l.next(alloc)).?; + switch (v.kind) { + .literal => { + const el = try Element.initLitEscaped(alloc, v.content.items); + try content.appendContent(el); + }, + .bold => try content.appendContent(try parseModifier(alloc, l, .bold, "b")), + .italic => try content.appendContent(try parseModifier(alloc, l, .italic, "em")), + .code => try content.appendContent(try parseModifier(alloc, l, .code, "code")), + else => return Error.InvalidSequence, + } + return content; +} + +fn parseModifier(alloc: Allocator, l: *Lexer, knd: Lexed.Kind, tag: []const u8) Error!Element { + var el = try Element.init(alloc, .content, tag); + while (l.nextKind()) |it| { + if (it == knd) { + // consuming the finisher + var v = (try l.next(alloc)).?; + v.deinit(); + break; + } + if (it.isDelimiter()) return Error.UnclosedModifier; + try el.appendContent(try parseContent(alloc, l)); + } + return el; +} + +fn doTest(alloc: Allocator, t: []const u8, v: []const u8) !void { + const g = try parse(alloc, t); + defer alloc.free(g); + std.testing.expect(std.mem.eql(u8, g, v)) catch |err| { + std.debug.print("{s}\n", .{g}); + return err; + }; +} + +test "parse content" { + var arena = std.heap.DebugAllocator(.{}).init; + defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{}); + const alloc = arena.allocator(); + + try doTest(alloc, "hello world", "hello world"); + try doTest(alloc, "*hello* world", "<b>hello</b> world"); + try doTest(alloc, "*he_ll_o* world", "<b>he<em>ll</em>o</b> world"); +} diff --git a/src/dom/Element.zig b/src/dom/Element.zig index 524586c..6a9ac6e 100644 --- a/src/dom/Element.zig +++ b/src/dom/Element.zig @@ -34,6 +34,15 @@ pub fn init(alloc: Allocator, knd: Kind, tag: []const u8) !Self { return v; } +pub fn initEmpty(alloc: Allocator) Self { + return .{ + .kind = .content, + .arena = .init(alloc), + .attributes = .init(alloc), + .class_list = .init(alloc), + }; +} + /// Init a new literal element. /// The literal content will never be escaped, see initLitEscaped if you want to escape it. /// It always duplicates strings. diff --git a/src/lexer/Lexer.zig b/src/lexer/Lexer.zig index f492be6..7524479 100644 --- a/src/lexer/Lexer.zig +++ b/src/lexer/Lexer.zig @@ -13,11 +13,17 @@ pub const Error = error{ InvalidUtf8, } || Allocator.Error; -pub fn init(content: []const u8) Error!Self { +pub fn init(content: []const u8) error{InvalidUtf8}!Self { const view = try unicode.Utf8View.init(content); return .{ .iter = view.iterator() }; } +pub fn nextKind(self: *Self) ?Lexed.Kind { + const next_rune = self.iter.peek(1); + if (next_rune.len == 0) return null; + return self.getCurrentKind(null, next_rune, &[0]u8{}).kind; +} + pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { var acc = try std.ArrayList(u8).initCapacity(alloc, 2); errdefer acc.deinit(alloc); diff --git a/src/root.zig b/src/root.zig index 43a0d7d..98b0274 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,6 +1,5 @@ const std = @import("std"); -pub const lexer = @import("lexer/Lexer.zig"); -pub const element = @import("dom/Element.zig"); +pub const ast = @import("ast.zig"); pub fn bufferedPrint() !void { // Stdout is for the actual output of your application, for example if you |
