diff options
Diffstat (limited to 'src/lexer/lexer.zig')
| -rw-r--r-- | src/lexer/lexer.zig | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/src/lexer/lexer.zig b/src/lexer/lexer.zig new file mode 100644 index 0000000..8488a16 --- /dev/null +++ b/src/lexer/lexer.zig @@ -0,0 +1,87 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const unicode = std.unicode; +const lexed = @import("lexed.zig"); + +const operators = [_][]const u8{ "*", "_", "`", "<", ">", ":", "!", "[", "]", "(", ")", "$", "-", "." }; +const delimiters = [_][]const u8{"\n"}; + +pub const Lexer = struct { + iter: unicode.Utf8Iterator, + force_lit: bool = false, + + const Self = @This(); + + pub const Error = error{ + InvalidUtf8, + } || Allocator.Error; + + pub fn init(content: []const u8) Error!Lexer { + const view = try unicode.Utf8View.init(content); + return .{ .iter = view.iterator() }; + } + + pub fn next(self: *Self, alloc: Allocator) Error!?lexed.Lexed { + var acc = try std.ArrayList(u8).initCapacity(alloc, 2); + errdefer acc.deinit(alloc); + + var current_kind: ?lexed.Kind = null; + while (self.iter.nextCodepointSlice()) |rune| { + if (std.mem.eql(u8, rune, "\r")) continue; + // escape chars + if (std.mem.eql(u8, rune, "\\")) { + self.force_lit = true; + current_kind = .literal; + } else { + current_kind = self.getCurrentKind(rune); + self.force_lit = false; + try acc.appendSlice(alloc, rune); + } + // conds here to avoid creating complex condition in while + const next_rune = self.iter.peek(1); + if (next_rune.len > 0) { + if (self.getCurrentKind(next_rune) != current_kind.?) break; + } + } + const kind = current_kind orelse { + acc.deinit(alloc); + return null; + }; + return lexed.Lexed.init(alloc, kind, acc); + } + + fn getCurrentKind(self: *Self, rune: []const u8) ?lexed.Kind { + if (self.force_lit) return .literal; + if (isIn(&operators, rune)) { + return .operator; + } else if (isIn(&delimiters, rune)) { + return .delimiter; + } + return .literal; + } +}; + +fn isIn(arr: []const []const u8, v: []const u8) bool { + for (arr) |it| if (std.mem.eql(u8, it, v)) return true; + return false; +} + +test "literal" { + const expect = std.testing.expect; + + var arena = std.heap.DebugAllocator(.{}){}; + defer _ = arena.deinit(); + const alloc = arena.allocator(); + + var l = try Lexer.init("hello world :)"); + + var first = (try l.next(alloc)).?; + defer first.deinit(); + try expect(first.equals(.literal, "hello world ")); + + var second = (try l.next(alloc)).?; + defer second.deinit(); + try expect(second.equals(.operator, ":)")); + + try expect(try l.next(alloc) == null); +} |
