aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAnhgelus Morhtuuzh <william@herges.fr>2026-04-15 12:48:54 +0200
committerAnhgelus Morhtuuzh <william@herges.fr>2026-04-15 12:48:54 +0200
commitde077d10359a3bed5259b766c37c94c1d7678a2b (patch)
tree27a948b0c576480ecafa3c28d77408eae07da46c /src
parenta432d44d035508da5e0e67fde2327eaf1a41f382 (diff)
feat(lexer): simple separator
Diffstat (limited to 'src')
-rw-r--r--src/lexer/lexed.zig47
-rw-r--r--src/lexer/lexer.zig87
-rw-r--r--src/root.zig5
3 files changed, 139 insertions, 0 deletions
diff --git a/src/lexer/lexed.zig b/src/lexer/lexed.zig
new file mode 100644
index 0000000..3134705
--- /dev/null
+++ b/src/lexer/lexed.zig
@@ -0,0 +1,47 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+pub const Kind = enum {
+ literal,
+ delimiter,
+ operator,
+
+ const Self = @This();
+
+ pub fn string(self: *Self) []const u8 {
+ switch (*self) {
+ .literal => return "literal",
+ .delimiter => return "delimiter",
+ .operator => return "operator",
+ }
+ }
+};
+
+pub const Lexed = struct {
+ allocator: Allocator,
+ kind: Kind,
+ content: std.ArrayList(u8),
+
+ const Self = @This();
+
+ pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Lexed {
+ return Lexed{
+ .allocator = alloc,
+ .kind = kind,
+ .content = content,
+ };
+ }
+
+ pub fn deinit(self: *Self) void {
+ self.content.deinit(self.allocator);
+ }
+
+ pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) {
+ return self.content.clone(alloc);
+ }
+
+ pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool {
+ if (self.kind != kind) return false;
+ return std.mem.eql(u8, self.content.items, content);
+ }
+};
diff --git a/src/lexer/lexer.zig b/src/lexer/lexer.zig
new file mode 100644
index 0000000..8488a16
--- /dev/null
+++ b/src/lexer/lexer.zig
@@ -0,0 +1,87 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const unicode = std.unicode;
+const lexed = @import("lexed.zig");
+
+const operators = [_][]const u8{ "*", "_", "`", "<", ">", ":", "!", "[", "]", "(", ")", "$", "-", "." };
+const delimiters = [_][]const u8{"\n"};
+
+pub const Lexer = struct {
+ iter: unicode.Utf8Iterator,
+ force_lit: bool = false,
+
+ const Self = @This();
+
+ pub const Error = error{
+ InvalidUtf8,
+ } || Allocator.Error;
+
+ pub fn init(content: []const u8) Error!Lexer {
+ const view = try unicode.Utf8View.init(content);
+ return .{ .iter = view.iterator() };
+ }
+
+ pub fn next(self: *Self, alloc: Allocator) Error!?lexed.Lexed {
+ var acc = try std.ArrayList(u8).initCapacity(alloc, 2);
+ errdefer acc.deinit(alloc);
+
+ var current_kind: ?lexed.Kind = null;
+ while (self.iter.nextCodepointSlice()) |rune| {
+ if (std.mem.eql(u8, rune, "\r")) continue;
+ // escape chars
+ if (std.mem.eql(u8, rune, "\\")) {
+ self.force_lit = true;
+ current_kind = .literal;
+ } else {
+ current_kind = self.getCurrentKind(rune);
+ self.force_lit = false;
+ try acc.appendSlice(alloc, rune);
+ }
+ // conds here to avoid creating complex condition in while
+ const next_rune = self.iter.peek(1);
+ if (next_rune.len > 0) {
+ if (self.getCurrentKind(next_rune) != current_kind.?) break;
+ }
+ }
+ const kind = current_kind orelse {
+ acc.deinit(alloc);
+ return null;
+ };
+ return lexed.Lexed.init(alloc, kind, acc);
+ }
+
+ fn getCurrentKind(self: *Self, rune: []const u8) ?lexed.Kind {
+ if (self.force_lit) return .literal;
+ if (isIn(&operators, rune)) {
+ return .operator;
+ } else if (isIn(&delimiters, rune)) {
+ return .delimiter;
+ }
+ return .literal;
+ }
+};
+
+fn isIn(arr: []const []const u8, v: []const u8) bool {
+ for (arr) |it| if (std.mem.eql(u8, it, v)) return true;
+ return false;
+}
+
+test "literal" {
+ const expect = std.testing.expect;
+
+ var arena = std.heap.DebugAllocator(.{}){};
+ defer _ = arena.deinit();
+ const alloc = arena.allocator();
+
+ var l = try Lexer.init("hello world :)");
+
+ var first = (try l.next(alloc)).?;
+ defer first.deinit();
+ try expect(first.equals(.literal, "hello world "));
+
+ var second = (try l.next(alloc)).?;
+ defer second.deinit();
+ try expect(second.equals(.operator, ":)"));
+
+ try expect(try l.next(alloc) == null);
+}
diff --git a/src/root.zig b/src/root.zig
index 5f58050..8e90eb0 100644
--- a/src/root.zig
+++ b/src/root.zig
@@ -1,4 +1,5 @@
const std = @import("std");
+const lexer = @import("lexer/lexer.zig");
pub fn bufferedPrint() !void {
// Stdout is for the actual output of your application, for example if you
@@ -20,3 +21,7 @@ pub fn add(a: i32, b: i32) i32 {
test "basic add functionality" {
try std.testing.expect(add(3, 7) == 10);
}
+
+test {
+ std.testing.refAllDeclsRecursive(@This());
+}