aboutsummaryrefslogtreecommitdiff
path: root/src/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer')
-rw-r--r--src/lexer/lexed.zig47
-rw-r--r--src/lexer/lexer.zig87
2 files changed, 134 insertions, 0 deletions
diff --git a/src/lexer/lexed.zig b/src/lexer/lexed.zig
new file mode 100644
index 0000000..3134705
--- /dev/null
+++ b/src/lexer/lexed.zig
@@ -0,0 +1,47 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+pub const Kind = enum {
+ literal,
+ delimiter,
+ operator,
+
+ const Self = @This();
+
+ pub fn string(self: *Self) []const u8 {
+ switch (*self) {
+ .literal => return "literal",
+ .delimiter => return "delimiter",
+ .operator => return "operator",
+ }
+ }
+};
+
+pub const Lexed = struct {
+ allocator: Allocator,
+ kind: Kind,
+ content: std.ArrayList(u8),
+
+ const Self = @This();
+
+ pub fn init(alloc: Allocator, kind: Kind, content: std.ArrayList(u8)) Lexed {
+ return Lexed{
+ .allocator = alloc,
+ .kind = kind,
+ .content = content,
+ };
+ }
+
+ pub fn deinit(self: *Self) void {
+ self.content.deinit(self.allocator);
+ }
+
+ pub fn clone(self: *const Self, alloc: Allocator) Allocator.Error!std.ArrayList(u8) {
+ return self.content.clone(alloc);
+ }
+
+ pub fn equals(self: *const Self, kind: Kind, content: []const u8) bool {
+ if (self.kind != kind) return false;
+ return std.mem.eql(u8, self.content.items, content);
+ }
+};
diff --git a/src/lexer/lexer.zig b/src/lexer/lexer.zig
new file mode 100644
index 0000000..8488a16
--- /dev/null
+++ b/src/lexer/lexer.zig
@@ -0,0 +1,87 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const unicode = std.unicode;
+const lexed = @import("lexed.zig");
+
+const operators = [_][]const u8{ "*", "_", "`", "<", ">", ":", "!", "[", "]", "(", ")", "$", "-", "." };
+const delimiters = [_][]const u8{"\n"};
+
+pub const Lexer = struct {
+ iter: unicode.Utf8Iterator,
+ force_lit: bool = false,
+
+ const Self = @This();
+
+ pub const Error = error{
+ InvalidUtf8,
+ } || Allocator.Error;
+
+ pub fn init(content: []const u8) Error!Lexer {
+ const view = try unicode.Utf8View.init(content);
+ return .{ .iter = view.iterator() };
+ }
+
+ pub fn next(self: *Self, alloc: Allocator) Error!?lexed.Lexed {
+ var acc = try std.ArrayList(u8).initCapacity(alloc, 2);
+ errdefer acc.deinit(alloc);
+
+ var current_kind: ?lexed.Kind = null;
+ while (self.iter.nextCodepointSlice()) |rune| {
+ if (std.mem.eql(u8, rune, "\r")) continue;
+ // escape chars
+ if (std.mem.eql(u8, rune, "\\")) {
+ self.force_lit = true;
+ current_kind = .literal;
+ } else {
+ current_kind = self.getCurrentKind(rune);
+ self.force_lit = false;
+ try acc.appendSlice(alloc, rune);
+ }
+ // conds here to avoid creating complex condition in while
+ const next_rune = self.iter.peek(1);
+ if (next_rune.len > 0) {
+ if (self.getCurrentKind(next_rune) != current_kind.?) break;
+ }
+ }
+ const kind = current_kind orelse {
+ acc.deinit(alloc);
+ return null;
+ };
+ return lexed.Lexed.init(alloc, kind, acc);
+ }
+
+ fn getCurrentKind(self: *Self, rune: []const u8) ?lexed.Kind {
+ if (self.force_lit) return .literal;
+ if (isIn(&operators, rune)) {
+ return .operator;
+ } else if (isIn(&delimiters, rune)) {
+ return .delimiter;
+ }
+ return .literal;
+ }
+};
+
+fn isIn(arr: []const []const u8, v: []const u8) bool {
+ for (arr) |it| if (std.mem.eql(u8, it, v)) return true;
+ return false;
+}
+
+test "literal" {
+ const expect = std.testing.expect;
+
+ var arena = std.heap.DebugAllocator(.{}){};
+ defer _ = arena.deinit();
+ const alloc = arena.allocator();
+
+ var l = try Lexer.init("hello world :)");
+
+ var first = (try l.next(alloc)).?;
+ defer first.deinit();
+ try expect(first.equals(.literal, "hello world "));
+
+ var second = (try l.next(alloc)).?;
+ defer second.deinit();
+ try expect(second.equals(.operator, ":)"));
+
+ try expect(try l.next(alloc) == null);
+}