diff options
| author | Anhgelus Morhtuuzh <william@herges.fr> | 2026-04-15 12:48:54 +0200 |
|---|---|---|
| committer | Anhgelus Morhtuuzh <william@herges.fr> | 2026-04-15 12:48:54 +0200 |
| commit | de077d10359a3bed5259b766c37c94c1d7678a2b (patch) | |
| tree | 27a948b0c576480ecafa3c28d77408eae07da46c | |
| parent | a432d44d035508da5e0e67fde2327eaf1a41f382 (diff) | |
feat(lexer): simple separator
| -rw-r--r-- | src/lexer/lexed.zig | 47 | ||||
| -rw-r--r-- | src/lexer/lexer.zig | 87 | ||||
| -rw-r--r-- | src/root.zig | 5 |
3 files changed, 139 insertions, 0 deletions
diff --git a/src/lexer/lexed.zig b/src/lexer/lexed.zig new file mode 100644 index 0000000..3134705 --- /dev/null +++ b/src/lexer/lexed.zig @@ -0,0 +1,47 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub const Kind = enum { + literal, + delimiter, + operator, + + const Self = @This(); + + pub fn string(self: *Self) []const u8 { + switch (*self) { + .literal => return "literal", + .delimiter => return "delimiter", + .operator => return "operator", + } + } +}; + +pub const Lexed = struct { + allocator: Allocator, + kind: Kind, + content: std.ArrayList(u8), + + const Self = @This(); + + pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Lexed { + return Lexed{ + .allocator = alloc, + .kind = kind, + .content = content, + }; + } + + pub fn deinit(self: *Self) void { + self.content.deinit(self.allocator); + } + + pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) { + return self.content.clone(alloc); + } + + pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool { + if (self.kind != kind) return false; + return std.mem.eql(u8, self.content.items, content); + } +}; diff --git a/src/lexer/lexer.zig b/src/lexer/lexer.zig new file mode 100644 index 0000000..8488a16 --- /dev/null +++ b/src/lexer/lexer.zig @@ -0,0 +1,87 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const unicode = std.unicode; +const lexed = @import("lexed.zig"); + +const operators = [_][]const u8{ "*", "_", "`", "<", ">", ":", "!", "[", "]", "(", ")", "$", "-", "." }; +const delimiters = [_][]const u8{"\n"}; + +pub const Lexer = struct { + iter: unicode.Utf8Iterator, + force_lit: bool = false, + + const Self = @This(); + + pub const Error = error{ + InvalidUtf8, + } || Allocator.Error; + + pub fn init(content: []const u8) Error!Lexer { + const view = try unicode.Utf8View.init(content); + return .{ .iter = view.iterator() }; + } + + pub fn next(self: *Self, alloc: Allocator) Error!?lexed.Lexed { + var acc = try std.ArrayList(u8).initCapacity(alloc, 2); + errdefer acc.deinit(alloc); + + var current_kind: ?lexed.Kind = null; + while (self.iter.nextCodepointSlice()) |rune| { + if (std.mem.eql(u8, rune, "\r")) continue; + // escape chars + if (std.mem.eql(u8, rune, "\\")) { + self.force_lit = true; + current_kind = .literal; + } else { + current_kind = self.getCurrentKind(rune); + self.force_lit = false; + try acc.appendSlice(alloc, rune); + } + // conds here to avoid creating complex condition in while + const next_rune = self.iter.peek(1); + if (next_rune.len > 0) { + if (self.getCurrentKind(next_rune) != current_kind.?) break; + } + } + const kind = current_kind orelse { + acc.deinit(alloc); + return null; + }; + return lexed.Lexed.init(alloc, kind, acc); + } + + fn getCurrentKind(self: *Self, rune: []const u8) ?lexed.Kind { + if (self.force_lit) return .literal; + if (isIn(&operators, rune)) { + return .operator; + } else if (isIn(&delimiters, rune)) { + return .delimiter; + } + return .literal; + } +}; + +fn isIn(arr: []const []const u8, v: []const u8) bool { + for (arr) |it| if (std.mem.eql(u8, it, v)) return true; + return false; +} + +test "literal" { + const expect = std.testing.expect; + + var arena = std.heap.DebugAllocator(.{}){}; + defer _ = arena.deinit(); + const alloc = arena.allocator(); + + var l = try Lexer.init("hello world :)"); + + var first = (try l.next(alloc)).?; + defer first.deinit(); + try expect(first.equals(.literal, "hello world ")); + + var second = (try l.next(alloc)).?; + defer second.deinit(); + try expect(second.equals(.operator, ":)")); + + try expect(try l.next(alloc) == null); +} diff --git a/src/root.zig b/src/root.zig index 5f58050..8e90eb0 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const lexer = @import("lexer/lexer.zig"); pub fn bufferedPrint() !void { // Stdout is for the actual output of your application, for example if you @@ -20,3 +21,7 @@ pub fn add(a: i32, b: i32) i32 { test "basic add functionality" { try std.testing.expect(add(3, 7) == 10); } + +test { + std.testing.refAllDeclsRecursive(@This()); +} |
