aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnhgelus Morhtuuzh <william@herges.fr>2026-04-18 18:19:30 +0200
committerAnhgelus Morhtuuzh <william@herges.fr>2026-04-18 18:19:30 +0200
commit389691f4d11bf86c8be75927a2fbc01cff9c7059 (patch)
tree42ea2d556b415daa361d73f1ffca2d521235f3fd
parentc4f41ad2502567f641652eb745707d2c2817973b (diff)
feat(ast): parse content
-rw-r--r--src/ast.zig84
-rw-r--r--src/dom/Element.zig9
-rw-r--r--src/lexer/Lexer.zig8
-rw-r--r--src/root.zig3
4 files changed, 101 insertions, 3 deletions
diff --git a/src/ast.zig b/src/ast.zig
new file mode 100644
index 0000000..94bb2c5
--- /dev/null
+++ b/src/ast.zig
@@ -0,0 +1,84 @@
+const std = @import("std");
+const Lexed = @import("lexer/Lexed.zig");
+const Lexer = @import("lexer/Lexer.zig");
+const Element = @import("dom/Element.zig");
+const Allocator = std.mem.Allocator;
+
+pub const Error = error{
+ InvalidSequence,
+ UnclosedModifier,
+ FeatureNotSupported,
+} || Lexer.Error;
+
+pub fn parse(parent: Allocator, content: []const u8) Error![]const u8 {
+ var arena = std.heap.ArenaAllocator.init(parent);
+ defer arena.deinit();
+ const alloc = arena.allocator();
+
+ var elements = try std.ArrayList(Element).initCapacity(alloc, 2);
+
+ var l = try Lexer.init(content);
+ while (l.nextKind()) |it| {
+ switch (it) {
+ .literal, .bold, .italic, .code => try elements.append(alloc, try parseContent(alloc, &l)),
+ else => return Error.FeatureNotSupported,
+ }
+ }
+
+ var res = try std.ArrayList(u8).initCapacity(parent, elements.items.len);
+ for (elements.items) |it| {
+ var v = it;
+ try res.appendSlice(parent, try v.render(alloc));
+ }
+ return res.toOwnedSlice(parent);
+}
+
+fn parseContent(alloc: Allocator, l: *Lexer) Error!Element {
+ var content = Element.initEmpty(alloc);
+ const v = (try l.next(alloc)).?;
+ switch (v.kind) {
+ .literal => {
+ const el = try Element.initLitEscaped(alloc, v.content.items);
+ try content.appendContent(el);
+ },
+ .bold => try content.appendContent(try parseModifier(alloc, l, .bold, "b")),
+ .italic => try content.appendContent(try parseModifier(alloc, l, .italic, "em")),
+ .code => try content.appendContent(try parseModifier(alloc, l, .code, "code")),
+ else => return Error.InvalidSequence,
+ }
+ return content;
+}
+
+fn parseModifier(alloc: Allocator, l: *Lexer, knd: Lexed.Kind, tag: []const u8) Error!Element {
+ var el = try Element.init(alloc, .content, tag);
+ while (l.nextKind()) |it| {
+ if (it == knd) {
+ // consuming the finisher
+ var v = (try l.next(alloc)).?;
+ v.deinit();
+ break;
+ }
+ if (it.isDelimiter()) return Error.UnclosedModifier;
+ try el.appendContent(try parseContent(alloc, l));
+ }
+ return el;
+}
+
+fn doTest(alloc: Allocator, t: []const u8, v: []const u8) !void {
+ const g = try parse(alloc, t);
+ defer alloc.free(g);
+ std.testing.expect(std.mem.eql(u8, g, v)) catch |err| {
+ std.debug.print("{s}\n", .{g});
+ return err;
+ };
+}
+
+test "parse content" {
+ var arena = std.heap.DebugAllocator(.{}).init;
+ defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{});
+ const alloc = arena.allocator();
+
+ try doTest(alloc, "hello world", "hello world");
+ try doTest(alloc, "*hello* world", "<b>hello</b> world");
+ try doTest(alloc, "*he_ll_o* world", "<b>he<em>ll</em>o</b> world");
+}
diff --git a/src/dom/Element.zig b/src/dom/Element.zig
index 524586c..6a9ac6e 100644
--- a/src/dom/Element.zig
+++ b/src/dom/Element.zig
@@ -34,6 +34,15 @@ pub fn init(alloc: Allocator, knd: Kind, tag: []const u8) !Self {
return v;
}
+pub fn initEmpty(alloc: Allocator) Self {
+ return .{
+ .kind = .content,
+ .arena = .init(alloc),
+ .attributes = .init(alloc),
+ .class_list = .init(alloc),
+ };
+}
+
/// Init a new literal element.
/// The literal content will never be escaped, see initLitEscaped if you want to escape it.
/// It always duplicates strings.
diff --git a/src/lexer/Lexer.zig b/src/lexer/Lexer.zig
index f492be6..7524479 100644
--- a/src/lexer/Lexer.zig
+++ b/src/lexer/Lexer.zig
@@ -13,11 +13,17 @@ pub const Error = error{
InvalidUtf8,
} || Allocator.Error;
-pub fn init(content: []const u8) Error!Self {
+pub fn init(content: []const u8) error{InvalidUtf8}!Self {
const view = try unicode.Utf8View.init(content);
return .{ .iter = view.iterator() };
}
+pub fn nextKind(self: *Self) ?Lexed.Kind {
+ const next_rune = self.iter.peek(1);
+ if (next_rune.len == 0) return null;
+ return self.getCurrentKind(null, next_rune, &[0]u8{}).kind;
+}
+
pub fn next(self: *Self, alloc: Allocator) Error!?Lexed {
var acc = try std.ArrayList(u8).initCapacity(alloc, 2);
errdefer acc.deinit(alloc);
diff --git a/src/root.zig b/src/root.zig
index 43a0d7d..98b0274 100644
--- a/src/root.zig
+++ b/src/root.zig
@@ -1,6 +1,5 @@
const std = @import("std");
-pub const lexer = @import("lexer/Lexer.zig");
-pub const element = @import("dom/Element.zig");
+pub const ast = @import("ast.zig");
pub fn bufferedPrint() !void {
// Stdout is for the actual output of your application, for example if you