1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
const std = @import("std");
const Allocator = std.mem.Allocator;
const unicode = std.unicode;
const lexed = @import("lexed.zig");
pub const Lexer = struct {
iter: unicode.Utf8Iterator,
force_lit: bool = false,
const Self = @This();
pub const Error = error{
InvalidUtf8,
} || Allocator.Error;
pub fn init(content: []const u8) Error!Lexer {
const view = try unicode.Utf8View.init(content);
return .{ .iter = view.iterator() };
}
pub fn next(self: *Self, alloc: Allocator) Error!?lexed.Lexed {
var acc = try std.ArrayList(u8).initCapacity(alloc, 2);
errdefer acc.deinit(alloc);
var current_kind: ?lexed.Kind = null;
while (self.iter.nextCodepointSlice()) |rune| {
if (std.mem.eql(u8, rune, "\r")) continue;
// escape chars
if (std.mem.eql(u8, rune, "\\")) {
self.force_lit = true;
current_kind = .literal;
} else {
current_kind = self.getCurrentKind(rune, acc.items);
self.force_lit = false;
try acc.appendSlice(alloc, rune);
}
// conds here to avoid creating complex condition in while
const next_rune = self.iter.peek(1);
if (next_rune.len > 0) {
if (self.getCurrentKind(next_rune, acc.items) != current_kind.?) {
if (!requiresSpace(current_kind.?)) break;
if (std.mem.eql(u8, next_rune, " ")) {
// consume next space
_ = self.iter.nextCodepoint();
break;
}
current_kind = .literal;
}
}
}
const kind = current_kind orelse {
acc.deinit(alloc);
return null;
};
return lexed.Lexed.init(alloc, kind, acc);
}
fn getCurrentKind(self: *Self, rune: []const u8, acc: []const u8) lexed.Kind {
if (self.force_lit) return .literal;
if (std.mem.eql(u8, rune, ">")) return .quote;
if (std.mem.eql(u8, rune, "\n")) return .delimiter;
if (is('#', 6, rune, acc)) return .title;
if (is('`', 3, rune, acc)) return .code;
if (is('$', 3, rune, acc)) return .math;
return .literal;
}
};
fn is(v: u8, maxLen: usize, rune: []const u8, acc: []const u8) bool {
if (acc.len >= maxLen) return false;
for (0..acc.len) |i| if (acc[i] != v) return false;
return std.mem.eql(u8, rune, &[_]u8{v});
}
fn requiresSpace(k: lexed.Kind) bool {
return switch (k) {
.title => true,
else => false,
};
}
fn doTest(alloc: Allocator, l: *Lexer, k: lexed.Kind, v: []const u8) !void {
var first = (try l.next(alloc)).?;
defer first.deinit();
std.testing.expect(first.equals(k, v)) catch |err| {
std.debug.print("{}({s})\n", .{ first.kind, first.content.items });
return err;
};
}
test "lexer common" {
const expect = std.testing.expect;
var arena = std.heap.DebugAllocator(.{}).init;
defer _ = arena.deinit();
const alloc = arena.allocator();
var l = try Lexer.init("# hello world :)");
try doTest(alloc, &l, .title, "#");
try doTest(alloc, &l, .literal, "hello world :)");
try expect(try l.next(alloc) == null);
}
|