aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAnhgelus Morhtuuzh <william@herges.fr>2026-04-18 15:35:46 +0200
committerAnhgelus Morhtuuzh <william@herges.fr>2026-04-18 15:35:46 +0200
commitc4f41ad2502567f641652eb745707d2c2817973b (patch)
treee72efc430924f05334219fc8ff690f4399d56d6c /src
parent348c06fdf2109b2953724bb50775e788fdde9356 (diff)
feat(lexer): enforce delimiter requirements
Diffstat (limited to 'src')
-rw-r--r--src/dom/Element.zig10
-rw-r--r--src/dom/html.zig2
-rw-r--r--src/lexer/Lexed.zig11
-rw-r--r--src/lexer/Lexer.zig91
4 files changed, 83 insertions, 31 deletions
diff --git a/src/dom/Element.zig b/src/dom/Element.zig
index 0cbddc4..524586c 100644
--- a/src/dom/Element.zig
+++ b/src/dom/Element.zig
@@ -21,7 +21,7 @@ literal: ?[]const u8 = null,
/// Init a new Element with the given kind.
/// The tag will never be escaped.
-/// The owernship is always taken.
+/// It always duplicates strings.
pub fn init(alloc: Allocator, knd: Kind, tag: []const u8) !Self {
var v = Self{
.kind = knd,
@@ -36,7 +36,7 @@ pub fn init(alloc: Allocator, knd: Kind, tag: []const u8) !Self {
/// Init a new literal element.
/// The literal content will never be escaped, see initLitEscaped if you want to escape it.
-/// The owernship is always taken.
+/// It always duplicates strings.
pub fn initLit(alloc: Allocator, literal: []const u8) !Self {
var v = Self{
.kind = .literal,
@@ -51,7 +51,7 @@ pub fn initLit(alloc: Allocator, literal: []const u8) !Self {
/// Init a new literal element that is escaped.
/// The literal content will be escaped, see initLit if you don't want this behavior.
-/// The owernship is always taken.
+/// It always duplicates strings.
pub fn initLitEscaped(alloc: Allocator, literal: []const u8) !Self {
const escaped = try html.escape(alloc, literal);
defer alloc.free(escaped);
@@ -198,7 +198,7 @@ fn doTest(alloc: Allocator, el: *Self, exp: []const u8) !void {
test "void element" {
var arena = std.heap.DebugAllocator(.{}).init;
- defer _ = arena.deinit();
+ defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{});
const alloc = arena.allocator();
var br = try init(alloc, .void, "br");
@@ -220,7 +220,7 @@ test "void element" {
test "content element" {
var arena = std.heap.DebugAllocator(.{}).init;
- defer _ = arena.deinit();
+ defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{});
const alloc = arena.allocator();
var p = try init(alloc, .content, "p");
diff --git a/src/dom/html.zig b/src/dom/html.zig
index 47de020..a3178f2 100644
--- a/src/dom/html.zig
+++ b/src/dom/html.zig
@@ -35,7 +35,7 @@ fn doTest(alloc: std.mem.Allocator, el: []const u8, exp: []const u8) !void {
test "escaping html" {
var arena = std.heap.DebugAllocator(.{}).init;
- defer _ = arena.deinit();
+ defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{});
const alloc = arena.allocator();
try doTest(alloc, "hello world", "hello world");
diff --git a/src/lexer/Lexed.zig b/src/lexer/Lexed.zig
index b7c3b2c..4101953 100644
--- a/src/lexer/Lexed.zig
+++ b/src/lexer/Lexed.zig
@@ -3,7 +3,8 @@ const Allocator = std.mem.Allocator;
pub const Kind = enum {
literal,
- delimiter,
+ weak_delimiter,
+ strong_delimiter,
title,
quote,
code,
@@ -19,6 +20,14 @@ pub const Kind = enum {
list_ordored,
list_unordored,
tag,
+
+ pub fn isDelimiter(self: @This()) bool {
+ return switch (self) {
+ .weak_delimiter => true,
+ .strong_delimiter => true,
+ else => false,
+ };
+ }
};
allocator: Allocator,
diff --git a/src/lexer/Lexer.zig b/src/lexer/Lexer.zig
index 2705347..f492be6 100644
--- a/src/lexer/Lexer.zig
+++ b/src/lexer/Lexer.zig
@@ -39,22 +39,21 @@ pub fn next(self: *Self, alloc: Allocator) Error!?Lexed {
}
// conds here to avoid creating complex condition in while
const next_rune = self.iter.peek(1);
- if (next_rune.len > 0) {
- if (self.getCurrentKind(current_kind, next_rune, acc.items).kind != current_kind.? and
- (override_if == null or !eql(u8, override_if.?, next_rune)))
- {
- if (!requiresSpace(current_kind.?)) break;
- if (eql(u8, next_rune, " ")) {
- // consume next space
- _ = self.iter.nextCodepoint();
- break;
- }
- current_kind = switch (current_kind.?) {
- .title => if (acc.items.len == 1) .tag else .literal,
- else => .literal,
- };
+ if (requiresSpace(current_kind.?)) {
+ if (eql(u8, next_rune, " ")) {
+ // consume next space
+ _ = self.iter.nextCodepoint();
+ break;
}
+ current_kind = switch (current_kind.?) {
+ .title => if (acc.items.len == 1) .tag else .literal,
+ else => .literal,
+ };
}
+ if (next_rune.len > 0 and
+ self.getCurrentKind(current_kind, next_rune, acc.items).kind != current_kind.? and
+ (override_if == null or !eql(u8, override_if.?, next_rune)))
+ break;
}
const kind = current_kind orelse {
acc.deinit(alloc);
@@ -76,17 +75,24 @@ const kindRes = struct {
}
};
+fn requiresDelimiter(before: ?Lexed.Kind, knd: Lexed.Kind) Lexed.Kind {
+ return if (before == null or before.?.isDelimiter()) knd else .literal;
+}
+
fn getCurrentKind(self: *Self, before: ?Lexed.Kind, rune: []const u8, acc: []const u8) kindRes {
if (self.force_lit) return .{ .kind = .literal };
- if (eql(u8, rune, "\n")) return .{ .kind = .delimiter };
+ if (eql(u8, rune, "\n")) return .{
+ .kind = if (before == .weak_delimiter) .strong_delimiter else .weak_delimiter,
+ .override_if = rune,
+ };
if (eql(u8, rune, "*")) return .{ .kind = .bold };
if (eql(u8, rune, "_")) return .{ .kind = .italic };
- if (eql(u8, rune, ">")) return .{ .kind = .quote };
- if (eql(u8, rune, "-")) return .{ .kind = .list_unordored };
- if (eql(u8, rune, ".")) return .{ .kind = .list_ordored };
- if (eql(u8, rune, "!")) return .{ .kind = .image };
+ if (eql(u8, rune, ">")) return .{ .kind = requiresDelimiter(before, .quote) };
+ if (eql(u8, rune, ".")) return .{ .kind = requiresDelimiter(before, .list_ordored) };
+ if (eql(u8, rune, "-")) return .{ .kind = requiresDelimiter(before, .list_unordored) };
+ if (eql(u8, rune, "!")) return .{ .kind = requiresDelimiter(before, .image) };
if (eql(u8, rune, "<")) return .{ .kind = .ref };
- if (is('#', 6, rune, acc)) return .{ .kind = .title };
+ if (is('#', 6, rune, acc)) return .{ .kind = requiresDelimiter(before, .title) };
if (isIn(links, rune, acc, before, .link)) return .{ .kind = .link };
if (isOneOrThree(":", rune, acc, .ref, .callout)) |it| return it;
if (isOneOrThree("$", rune, acc, .math, .math_block)) |it| return it;
@@ -95,9 +101,9 @@ fn getCurrentKind(self: *Self, before: ?Lexed.Kind, rune: []const u8, acc: []con
}
fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool {
- if (acc.len >= maxLen) return false;
- for (0..acc.len) |i| if (acc[i] != v) return false;
- return eql(u8, rune, &[_]u8{v});
+ if (!eql(u8, rune, &[_]u8{v})) return false;
+ for (acc) |it| if (it != v) return true;
+ return acc.len < maxLen;
}
const links = &[_][]const u8{ "[", "](", ")" };
@@ -177,7 +183,7 @@ test "lexer common" {
const expect = std.testing.expect;
var arena = std.heap.DebugAllocator(.{}).init;
- defer _ = arena.deinit();
+ defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{});
const alloc = arena.allocator();
var l = try init("# hello world :)");
@@ -189,3 +195,40 @@ test "lexer common" {
try expect(try l.next(alloc) == null);
}
+
+test "lexer multiline" {
+ const expect = std.testing.expect;
+
+ var arena = std.heap.DebugAllocator(.{}).init;
+ defer if (arena.deinit() == .leak) std.debug.print("leaking!\n", .{});
+ const alloc = arena.allocator();
+
+ var l = try init(
+ \\# Title
+ \\
+ \\paragraph
+ \\# a title
+ \\a # in sentence
+ \\
+ \\#tag
+ \\#tag2
+ );
+
+ try doTest(alloc, &l, .title, "#");
+ try doTest(alloc, &l, .literal, "Title");
+ try doTest(alloc, &l, .strong_delimiter, "\n\n");
+ try doTest(alloc, &l, .literal, "paragraph");
+ try doTest(alloc, &l, .weak_delimiter, "\n");
+ try doTest(alloc, &l, .title, "#");
+ try doTest(alloc, &l, .literal, "a title");
+ try doTest(alloc, &l, .weak_delimiter, "\n");
+ try doTest(alloc, &l, .literal, "a # in sentence");
+ try doTest(alloc, &l, .strong_delimiter, "\n\n");
+ try doTest(alloc, &l, .tag, "#");
+ try doTest(alloc, &l, .literal, "tag");
+ try doTest(alloc, &l, .weak_delimiter, "\n");
+ try doTest(alloc, &l, .tag, "#");
+ try doTest(alloc, &l, .literal, "tag2");
+
+ try expect(try l.next(alloc) == null);
+}