From 6df64050b1442a5f3a0f566cd816639ac1fd298f Mon Sep 17 00:00:00 2001 From: Anhgelus Morhtuuzh Date: Thu, 16 Apr 2026 22:33:44 +0200 Subject: feat(dom): element generator --- src/dom/Element.zig | 150 +++++++++++++++++++++++++++++++++++++++++ src/lexer/Lexed.zig | 49 ++++++++++++++ src/lexer/Lexer.zig | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lexer/lexed.zig | 49 -------------- src/lexer/lexer.zig | 191 ---------------------------------------------------- src/root.zig | 3 +- 6 files changed, 392 insertions(+), 241 deletions(-) create mode 100644 src/dom/Element.zig create mode 100644 src/lexer/Lexed.zig create mode 100644 src/lexer/Lexer.zig delete mode 100644 src/lexer/lexed.zig delete mode 100644 src/lexer/lexer.zig diff --git a/src/dom/Element.zig b/src/dom/Element.zig new file mode 100644 index 0000000..ff8a3d1 --- /dev/null +++ b/src/dom/Element.zig @@ -0,0 +1,150 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const eql = std.mem.eql; + +pub const Kind = enum { + void, + content, + literal, +}; + +const Self = @This(); + +kind: Kind, +tag: ?[]const u8 = null, +attributes: std.StringArrayHashMap([]const u8), +class_list: std.BufSet, +content: ?[]*Self = null, +literal: ?[]const u8 = null, + +pub fn init(alloc: Allocator, knd: Kind, tag: []const u8) Self { + return .{ + .kind = knd, + .tag = tag, + .attributes = .init(alloc), + .class_list = .init(alloc), + }; +} + +pub fn initLit(alloc: Allocator, literal: []const u8) Self { + return .{ .kind = .literal, .literal = literal, .attributes = .init(alloc), .class_list = .init(alloc) }; +} + +pub fn deinit(self: *Self) void { + self.attributes.deinit(); + self.class_list.deinit(); +} + +pub fn render(self: *Self, alloc: Allocator) !std.ArrayList(u8) { + var attr = try self.renderAttribute(alloc); + defer attr.deinit(alloc); + var acc = try std.ArrayList(u8).initCapacity(alloc, 2); + errdefer acc.deinit(alloc); + if (self.tag) |tag| { + try acc.append(alloc, '<'); + try acc.appendSlice(alloc, tag); + try acc.appendSlice(alloc, attr.items); + try acc.append(alloc, '>'); + } + switch (self.kind) { + .void => return acc, + .content => { + if (self.content) |content| { + for (content) |it| { + var sub = try it.render(alloc); + defer sub.deinit(alloc); + try acc.appendSlice(alloc, sub.items); + } + } + }, + .literal => try acc.appendSlice(alloc, self.literal.?), + } + if (self.tag) |tag| { + try acc.appendSlice(alloc, "'); + } + return acc; +} + +fn renderAttribute(self: *Self, alloc: Allocator) !std.ArrayList(u8) { + var iter = self.attributes.iterator(); + if (iter.len == 0) return .empty; + var acc = try std.ArrayList(u8).initCapacity(alloc, 2); + errdefer acc.deinit(alloc); + try acc.append(alloc, ' '); + var i: usize = 0; + while (iter.next()) |it| : (i += 1) { + try acc.appendSlice(alloc, it.key_ptr.*); + try acc.appendSlice(alloc, "=\""); + // MISSING ESCAPING!!! + try acc.appendSlice(alloc, it.value_ptr.*); + try acc.append(alloc, '"'); + if (i < iter.len - 1) try acc.append(alloc, ' '); + } + return acc; +} + +pub fn setAttribute(self: *Self, k: []const u8, v: []const u8) !void { + try self.attributes.put(k, v); +} + +pub fn removeAttribute(self: *Self, k: []const u8) void { + _ = self.attributes.orderedRemove(k); +} + +pub fn hasAttribute(self: *Self, k: []const u8) bool { + return self.attributes.contains(k); +} + +fn doTest(alloc: Allocator, el: *Self, exp: []const u8) !void { + var rendered = try el.render(alloc); + defer rendered.deinit(alloc); + std.testing.expect(eql(u8, rendered.items, exp)) catch |err| { + std.debug.print("{s}\n", .{rendered.items}); + return err; + }; +} + +test "void element" { + var arena = std.heap.DebugAllocator(.{}).init; + defer _ = arena.deinit(); + const alloc = arena.allocator(); + + var br = init(alloc, .void, "br"); + defer br.deinit(); + + try doTest(alloc, &br, "
"); + + var img = init(alloc, .void, "img"); + defer img.deinit(); + try img.setAttribute("src", "foo"); + try img.setAttribute("alt", "bar"); + + try doTest(alloc, &img, "\"bar\""); +} + +test "content element" { + var arena = std.heap.DebugAllocator(.{}).init; + defer _ = arena.deinit(); + const alloc = arena.allocator(); + + var p = init(alloc, .content, "p"); + defer p.deinit(); + + var content = initLit(alloc, "hello world"); + defer content.deinit(); + var in = [_]*Self{&content}; + p.content = ∈ + + try doTest(alloc, &content, "hello world"); + try doTest(alloc, &p, "

hello world

"); + + var div = init(alloc, .content, "div"); + defer div.deinit(); + try div.setAttribute("class", "foo-bar"); + var in2 = [_]*Self{&p, &content}; + div.content = &in2; + + try doTest(alloc, &div, "

hello world

hello world
"); +} diff --git a/src/lexer/Lexed.zig b/src/lexer/Lexed.zig new file mode 100644 index 0000000..b7c3b2c --- /dev/null +++ b/src/lexer/Lexed.zig @@ -0,0 +1,49 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub const Kind = enum { + literal, + delimiter, + title, + quote, + code, + code_block, + math, + math_block, + image, + link, + bold, + italic, + ref, + callout, + list_ordored, + list_unordored, + tag, +}; + +allocator: Allocator, +kind: Kind, +content: std.ArrayList(u8), + +const Self = @This(); + +pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Self { + return .{ + .allocator = alloc, + .kind = kind, + .content = content, + }; +} + +pub fn deinit(self: *Self) void { + self.content.deinit(self.allocator); +} + +pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) { + return self.content.clone(alloc); +} + +pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool { + if (self.kind != kind) return false; + return std.mem.eql(u8, self.content.items, content); +} diff --git a/src/lexer/Lexer.zig b/src/lexer/Lexer.zig new file mode 100644 index 0000000..2705347 --- /dev/null +++ b/src/lexer/Lexer.zig @@ -0,0 +1,191 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const eql = std.mem.eql; +const unicode = std.unicode; +const Lexed = @import("Lexed.zig"); + +iter: unicode.Utf8Iterator, +force_lit: bool = false, + +const Self = @This(); + +pub const Error = error{ + InvalidUtf8, +} || Allocator.Error; + +pub fn init(content: []const u8) Error!Self { + const view = try unicode.Utf8View.init(content); + return .{ .iter = view.iterator() }; +} + +pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { + var acc = try std.ArrayList(u8).initCapacity(alloc, 2); + errdefer acc.deinit(alloc); + + var current_kind: ?Lexed.Kind = null; + while (self.iter.nextCodepointSlice()) |rune| { + if (eql(u8, rune, "\r")) continue; + var override_if: ?[]const u8 = null; + // escape chars + if (eql(u8, rune, "\\")) { + self.force_lit = true; + current_kind = .literal; + } else { + self.force_lit = false; + const res = self.getCurrentKind(current_kind, rune, acc.items); + current_kind = res.kind; + override_if = res.override_if; + try acc.appendSlice(alloc, rune); + } + // conds here to avoid creating complex condition in while + const next_rune = self.iter.peek(1); + if (next_rune.len > 0) { + if (self.getCurrentKind(current_kind, next_rune, acc.items).kind != current_kind.? and + (override_if == null or !eql(u8, override_if.?, next_rune))) + { + if (!requiresSpace(current_kind.?)) break; + if (eql(u8, next_rune, " ")) { + // consume next space + _ = self.iter.nextCodepoint(); + break; + } + current_kind = switch (current_kind.?) { + .title => if (acc.items.len == 1) .tag else .literal, + else => .literal, + }; + } + } + } + const kind = current_kind orelse { + acc.deinit(alloc); + return null; + }; + return .init(alloc, kind, acc); +} + +const kindRes = struct { + kind: Lexed.Kind, + override_if: ?[]const u8 = null, + + fn equals(self: @This(), v: @This()) bool { + if (self.kind != v.kind) return false; + if (self.override_if == null and v.override_if != null) return false; + if (self.override_if != null and v.override_if == null) return false; + if (self.override_if) |it| return eql(u8, it, v.override_if.?); + return true; + } +}; + +fn getCurrentKind(self: *Self, before: ?Lexed.Kind, rune: []const u8, acc: []const u8) kindRes { + if (self.force_lit) return .{ .kind = .literal }; + if (eql(u8, rune, "\n")) return .{ .kind = .delimiter }; + if (eql(u8, rune, "*")) return .{ .kind = .bold }; + if (eql(u8, rune, "_")) return .{ .kind = .italic }; + if (eql(u8, rune, ">")) return .{ .kind = .quote }; + if (eql(u8, rune, "-")) return .{ .kind = .list_unordored }; + if (eql(u8, rune, ".")) return .{ .kind = .list_ordored }; + if (eql(u8, rune, "!")) return .{ .kind = .image }; + if (eql(u8, rune, "<")) return .{ .kind = .ref }; + if (is('#', 6, rune, acc)) return .{ .kind = .title }; + if (isIn(links, rune, acc, before, .link)) return .{ .kind = .link }; + if (isOneOrThree(":", rune, acc, .ref, .callout)) |it| return it; + if (isOneOrThree("$", rune, acc, .math, .math_block)) |it| return it; + if (isOneOrThree("`", rune, acc, .code, .code_block)) |it| return it; + return .{ .kind = .literal }; +} + +fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool { + if (acc.len >= maxLen) return false; + for (0..acc.len) |i| if (acc[i] != v) return false; + return eql(u8, rune, &[_]u8{v}); +} + +const links = &[_][]const u8{ "[", "](", ")" }; + +fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Lexed.Kind, now: Lexed.Kind) bool { + var acc = p; + if (before) |b| { + if (now != b) acc = &[_]u8{}; + } + for (ops) |op| { + const ln = acc.len + rune.len; + if (op.len >= ln and eql(u8, acc, op[0..acc.len]) and eql(u8, rune, op[acc.len..ln])) + return true; + } + return false; +} + +fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Lexed.Kind, three: Lexed.Kind) ?kindRes { + if (!eql(u8, rune, op)) return null; + var acc = p; + if (acc.len < op.len or !eql(u8, acc[0..op.len], op)) acc = &[_]u8{}; + + var iter = (unicode.Utf8View.init(acc) catch unreachable).iterator(); + var ln: usize = 1; // number of runes + while (iter.nextCodepointSlice()) |it| : (ln += 1) { + if (!eql(u8, it, op)) return null; + } + + return switch (ln) { + 1 => .{ + .kind = one, + .override_if = op, + }, + 2 => .{ + .kind = .literal, + .override_if = op, + }, + 3 => .{ .kind = three }, + else => unreachable, + }; +} + +fn requiresSpace(k: Lexed.Kind) bool { + return switch (k) { + .title => true, + .list_ordored => true, + .list_unordored => true, + else => false, + }; +} + +fn doTest(alloc: Allocator, l: *Self, k: Lexed.Kind, v: []const u8) !void { + var first = (try l.next(alloc)).?; + defer first.deinit(); + std.testing.expect(first.equals(k, v)) catch |err| { + std.debug.print("{}({s})\n", .{ first.kind, first.content.items }); + return err; + }; +} + +test "one or three" { + const expect = std.testing.expect; + + // valid + try expect(isOneOrThree(":", ":", "", .ref, .callout).?.equals(.{ .kind = .ref, .override_if = ":" })); + try expect(isOneOrThree(":", ":", ":", .ref, .callout).?.equals(.{ .kind = .literal, .override_if = ":" })); + try expect(isOneOrThree(":", ":", "::", .ref, .callout).?.equals(.{ .kind = .callout })); + try expect(isOneOrThree(":", ":", "a", .ref, .callout).?.equals(.{ .kind = .ref, .override_if = ":" })); + + // invalid + try expect(isOneOrThree(":", "a", "", .ref, .callout) == null); + try expect(isOneOrThree(":", "a", "b", .ref, .callout) == null); + try expect(isOneOrThree(":", "a", ":", .ref, .callout) == null); +} + +test "lexer common" { + const expect = std.testing.expect; + + var arena = std.heap.DebugAllocator(.{}).init; + defer _ = arena.deinit(); + const alloc = arena.allocator(); + + var l = try init("# hello world :)"); + + try doTest(alloc, &l, .title, "#"); + try doTest(alloc, &l, .literal, "hello world "); + try doTest(alloc, &l, .ref, ":"); + try doTest(alloc, &l, .link, ")"); + + try expect(try l.next(alloc) == null); +} diff --git a/src/lexer/lexed.zig b/src/lexer/lexed.zig deleted file mode 100644 index b7c3b2c..0000000 --- a/src/lexer/lexed.zig +++ /dev/null @@ -1,49 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; - -pub const Kind = enum { - literal, - delimiter, - title, - quote, - code, - code_block, - math, - math_block, - image, - link, - bold, - italic, - ref, - callout, - list_ordored, - list_unordored, - tag, -}; - -allocator: Allocator, -kind: Kind, -content: std.ArrayList(u8), - -const Self = @This(); - -pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Self { - return .{ - .allocator = alloc, - .kind = kind, - .content = content, - }; -} - -pub fn deinit(self: *Self) void { - self.content.deinit(self.allocator); -} - -pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) { - return self.content.clone(alloc); -} - -pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool { - if (self.kind != kind) return false; - return std.mem.eql(u8, self.content.items, content); -} diff --git a/src/lexer/lexer.zig b/src/lexer/lexer.zig deleted file mode 100644 index 1144ebc..0000000 --- a/src/lexer/lexer.zig +++ /dev/null @@ -1,191 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const eql = std.mem.eql; -const unicode = std.unicode; -const Lexed = @import("lexed.zig"); - -iter: unicode.Utf8Iterator, -force_lit: bool = false, - -const Self = @This(); - -pub const Error = error{ - InvalidUtf8, -} || Allocator.Error; - -pub fn init(content: []const u8) Error!Self { - const view = try unicode.Utf8View.init(content); - return .{ .iter = view.iterator() }; -} - -pub fn next(self: *Self, alloc: Allocator) Error!?Lexed { - var acc = try std.ArrayList(u8).initCapacity(alloc, 2); - errdefer acc.deinit(alloc); - - var current_kind: ?Lexed.Kind = null; - while (self.iter.nextCodepointSlice()) |rune| { - if (eql(u8, rune, "\r")) continue; - var override_if: ?[]const u8 = null; - // escape chars - if (eql(u8, rune, "\\")) { - self.force_lit = true; - current_kind = .literal; - } else { - self.force_lit = false; - const res = self.getCurrentKind(current_kind, rune, acc.items); - current_kind = res.kind; - override_if = res.override_if; - try acc.appendSlice(alloc, rune); - } - // conds here to avoid creating complex condition in while - const next_rune = self.iter.peek(1); - if (next_rune.len > 0) { - if (self.getCurrentKind(current_kind, next_rune, acc.items).kind != current_kind.? and - (override_if == null or !eql(u8, override_if.?, next_rune))) - { - if (!requiresSpace(current_kind.?)) break; - if (eql(u8, next_rune, " ")) { - // consume next space - _ = self.iter.nextCodepoint(); - break; - } - current_kind = switch (current_kind.?) { - .title => if (acc.items.len == 1) .tag else .literal, - else => .literal, - }; - } - } - } - const kind = current_kind orelse { - acc.deinit(alloc); - return null; - }; - return Lexed.init(alloc, kind, acc); -} - -const kindRes = struct { - kind: Lexed.Kind, - override_if: ?[]const u8 = null, - - fn equals(self: @This(), v: @This()) bool { - if (self.kind != v.kind) return false; - if (self.override_if == null and v.override_if != null) return false; - if (self.override_if != null and v.override_if == null) return false; - if (self.override_if) |it| return eql(u8, it, v.override_if.?); - return true; - } -}; - -fn getCurrentKind(self: *Self, before: ?Lexed.Kind, rune: []const u8, acc: []const u8) kindRes { - if (self.force_lit) return .{ .kind = .literal }; - if (eql(u8, rune, "\n")) return .{ .kind = .delimiter }; - if (eql(u8, rune, "*")) return .{ .kind = .bold }; - if (eql(u8, rune, "_")) return .{ .kind = .italic }; - if (eql(u8, rune, ">")) return .{ .kind = .quote }; - if (eql(u8, rune, "-")) return .{ .kind = .list_unordored }; - if (eql(u8, rune, ".")) return .{ .kind = .list_ordored }; - if (eql(u8, rune, "!")) return .{ .kind = .image }; - if (eql(u8, rune, "<")) return .{ .kind = .ref }; - if (is('#', 6, rune, acc)) return .{ .kind = .title }; - if (isIn(links, rune, acc, before, .link)) return .{ .kind = .link }; - if (isOneOrThree(":", rune, acc, .ref, .callout)) |it| return it; - if (isOneOrThree("$", rune, acc, .math, .math_block)) |it| return it; - if (isOneOrThree("`", rune, acc, .code, .code_block)) |it| return it; - return .{ .kind = .literal }; -} - -fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool { - if (acc.len >= maxLen) return false; - for (0..acc.len) |i| if (acc[i] != v) return false; - return eql(u8, rune, &[_]u8{v}); -} - -const links = &[_][]const u8{ "[", "](", ")" }; - -fn isIn(ops: []const []const u8, rune: []const u8, p: []const u8, before: ?Lexed.Kind, now: Lexed.Kind) bool { - var acc = p; - if (before) |b| { - if (now != b) acc = &[_]u8{}; - } - for (ops) |op| { - const ln = acc.len + rune.len; - if (op.len >= ln and eql(u8, acc, op[0..acc.len]) and eql(u8, rune, op[acc.len..ln])) - return true; - } - return false; -} - -fn isOneOrThree(op: []const u8, rune: []const u8, p: []const u8, one: Lexed.Kind, three: Lexed.Kind) ?kindRes { - if (!eql(u8, rune, op)) return null; - var acc = p; - if (acc.len < op.len or !eql(u8, acc[0..op.len], op)) acc = &[_]u8{}; - - var iter = (unicode.Utf8View.init(acc) catch unreachable).iterator(); - var ln: usize = 1; // number of runes - while (iter.nextCodepointSlice()) |it| : (ln += 1) { - if (!eql(u8, it, op)) return null; - } - - return switch (ln) { - 1 => .{ - .kind = one, - .override_if = op, - }, - 2 => .{ - .kind = .literal, - .override_if = op, - }, - 3 => .{ .kind = three }, - else => unreachable, - }; -} - -fn requiresSpace(k: Lexed.Kind) bool { - return switch (k) { - .title => true, - .list_ordored => true, - .list_unordored => true, - else => false, - }; -} - -fn doTest(alloc: Allocator, l: *Self, k: Lexed.Kind, v: []const u8) !void { - var first = (try l.next(alloc)).?; - defer first.deinit(); - std.testing.expect(first.equals(k, v)) catch |err| { - std.debug.print("{}({s})\n", .{ first.kind, first.content.items }); - return err; - }; -} - -test "one or three" { - const expect = std.testing.expect; - - // valid - try expect(isOneOrThree(":", ":", "", .ref, .callout).?.equals(.{ .kind = .ref, .override_if = ":" })); - try expect(isOneOrThree(":", ":", ":", .ref, .callout).?.equals(.{ .kind = .literal, .override_if = ":" })); - try expect(isOneOrThree(":", ":", "::", .ref, .callout).?.equals(.{ .kind = .callout })); - try expect(isOneOrThree(":", ":", "a", .ref, .callout).?.equals(.{ .kind = .ref, .override_if = ":" })); - - // invalid - try expect(isOneOrThree(":", "a", "", .ref, .callout) == null); - try expect(isOneOrThree(":", "a", "b", .ref, .callout) == null); - try expect(isOneOrThree(":", "a", ":", .ref, .callout) == null); -} - -test "lexer common" { - const expect = std.testing.expect; - - var arena = std.heap.DebugAllocator(.{}).init; - defer _ = arena.deinit(); - const alloc = arena.allocator(); - - var l = try init("# hello world :)"); - - try doTest(alloc, &l, .title, "#"); - try doTest(alloc, &l, .literal, "hello world "); - try doTest(alloc, &l, .ref, ":"); - try doTest(alloc, &l, .link, ")"); - - try expect(try l.next(alloc) == null); -} diff --git a/src/root.zig b/src/root.zig index 2bcf565..43a0d7d 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,5 +1,6 @@ const std = @import("std"); -pub const lexer = @import("lexer/lexer.zig"); +pub const lexer = @import("lexer/Lexer.zig"); +pub const element = @import("dom/Element.zig"); pub fn bufferedPrint() !void { // Stdout is for the actual output of your application, for example if you -- cgit v1.2.3