From 83c7854b3f6e60d6e3c430b9c3dd59b3773c4ef2 Mon Sep 17 00:00:00 2001
From: Syndamia <kamen@syndamia.com>
Date: Tue, 29 Nov 2022 19:02:10 +0200
Subject: Added a somewhat working version of the parser

---
 src/ArzuParser.cpp           |  80 ++++++++++++++++++++++++
 src/ArzuParser.h             |  10 +++
 src/Memory.cpp               |   2 +
 src/Memory.h                 |  15 +++++
 src/MemoryData.cpp           |  24 +++++++
 src/MemoryData.h             |  46 ++++++++++++++
 src/PolishNotationParser.cpp | 145 +++++++++++++++++++++++++++++++++++++++++++
 src/PolishNotationParser.h   |  40 ++++++++++++
 src/main.cpp                 |  13 ++++
 src/test.arzu                |   2 +
 10 files changed, 377 insertions(+)
 create mode 100644 src/ArzuParser.cpp
 create mode 100644 src/ArzuParser.h
 create mode 100644 src/Memory.cpp
 create mode 100644 src/Memory.h
 create mode 100644 src/MemoryData.cpp
 create mode 100644 src/MemoryData.h
 create mode 100644 src/PolishNotationParser.cpp
 create mode 100644 src/PolishNotationParser.h
 create mode 100644 src/main.cpp
 create mode 100644 src/test.arzu
diff --git a/src/ArzuParser.cpp b/src/ArzuParser.cpp
new file mode 100644
index 0000000..0556a27
--- /dev/null
+++ b/src/ArzuParser.cpp
@@ -0,0 +1,80 @@
+#include "ArzuParser.h"
+#include "PolishNotationParser.h"
+#include "Memory.h"
+
+bool isDigit(char ch) {
+	return '0' <= ch && ch <= '9';
+}
+
+bool isName(char ch) {
+	return ch == '$';
+}
+
+bool isSeparator(char ch) {
+	return ch == ' ' || ch == '\t' || ch == '\n' || ch == EOF;
+}
+
+void _arzuIntOp(char op, Memory& mem) {
+	Int* right = static_cast<Int*>(mem.work.top());
+	mem.work.pop();
+	Int* left = static_cast<Int*>(mem.work.top());
+	switch (op) {
+		case '+': left->value += right->value; break;
+		case '-': left->value -= right->value; break;
+		case '*': left->value *= right->value; break;
+		case '/': left->value /= right->value; break;
+		 default: throw "Bad op";
+	}
+	delete right;
+}
+void arzu_sum(Memory& mem) {
+	_arzuIntOp('+', mem);
+}
+void arzu_sub(Memory& mem) {
+	_arzuIntOp('-', mem);
+}
+void arzu_mult(Memory& mem) {
+	_arzuIntOp('*', mem);
+}
+void arzu_div(Memory& mem) {
+	_arzuIntOp('/', mem);
+}
+
+void arzu_devar(Memory& mem) {
+	MemoryData* val = mem.work.top();
+	mem.work.pop();
+	Name* name = static_cast<Name*>(mem.work.top());
+	mem.work.pop();
+
+	mem.vars.push_back(name);
+	mem.vars.push_back(val);
+	mem.scopeVars.top()++;
+}
+
+
+MemoryData* arzu_Int_absorb(ifstream& inFile) {
+	Int* num = new Int(0);
+	while (!isSeparator(inFile.peek())) {
+		num->value = (num->value * 10) + (inFile.peek() - '0');
+		inFile.get();
+	}
+	return num;
+}
+
+MemoryData* arzu_Name_absorb(ifstream& inFile) {
+	Name* name = new Name();
+	while (isDigit(inFile.peek()) || ('0' <= inFile.peek() && inFile.peek() <= 'z'))
+		name->value.push_back(inFile.get());
+	return name;
+}
+
+ArzuParser::ArzuParser() : PNParser() {
+	this->addInstr(Instruction("+", 2, arzu_sum));
+	this->addInstr(Instruction("-", 2, arzu_sub));
+	this->addInstr(Instruction("*", 2, arzu_mult));
+	this->addInstr(Instruction("/", 2, arzu_div));
+	this->addInstr(Instruction("devar", 2, arzu_devar));
+
+	this->addAtom(Atom(isDigit, arzu_Int_absorb));
+	this->addAtom(Atom(isName, arzu_Name_absorb));
+}
diff --git a/src/ArzuParser.h b/src/ArzuParser.h
new file mode 100644
index 0000000..af47f0f
--- /dev/null
+++ b/src/ArzuParser.h
@@ -0,0 +1,10 @@
+#ifndef ARZU_INTERPRETER_ARZU_PARSER
+#define ARZU_INTERPRETER_ARZU_PARSER
+
+#include "PolishNotationParser.h"
+
+struct ArzuParser : public PNParser {
+	ArzuParser();
+};
+
+#endif
diff --git a/src/Memory.cpp b/src/Memory.cpp
new file mode 100644
index 0000000..4a4713f
--- /dev/null
+++ b/src/Memory.cpp
@@ -0,0 +1,2 @@
+#include "Memory.h"
+#include "MemoryData.h"
diff --git a/src/Memory.h b/src/Memory.h
new file mode 100644
index 0000000..d030b1e
--- /dev/null
+++ b/src/Memory.h
@@ -0,0 +1,15 @@
+#ifndef ARZU_INTERPRETER_MEMORY
+#define ARZU_INTERPRETER_MEMORY
+
+#include "MemoryData.h"
+#include <list>
+#include <stack>
+using namespace std;
+
+struct Memory {
+	list<MemoryData*> vars;
+	stack<MemoryData*> work;
+	stack<unsigned int> scopeVars;
+};
+
+#endif
diff --git a/src/MemoryData.cpp b/src/MemoryData.cpp
new file mode 100644
index 0000000..34e2877
--- /dev/null
+++ b/src/MemoryData.cpp
@@ -0,0 +1,24 @@
+#include "MemoryData.h"
+#include <cstring>
+
+MemoryData::Type MemoryData::get_type() const {
+	return this->type;
+}
+
+MemoryData::MemoryData() : type(TNone) {}
+
+Name::Name() : value() {
+	this->type = TName;
+}
+
+Name::Name(string& str) : value(str) {
+	this->type = TName;
+}
+
+Int::Int(int num) : value(num) {
+	this->type = TInt;
+}
+
+Function::Function(list<string> argNames, int scopeStart, int scopeEnd) : argumentNames(argNames), scopeStart(scopeStart), scopeEnd(scopeEnd) {
+	this->type = TFunc;
+}
diff --git a/src/MemoryData.h b/src/MemoryData.h
new file mode 100644
index 0000000..06f259a
--- /dev/null
+++ b/src/MemoryData.h
@@ -0,0 +1,46 @@
+#ifndef ARZU_INTERPRETER_MEMORY_DATA
+#define ARZU_INTERPRETER_MEMORY_DATA
+
+#include <list>
+#include <string>
+using namespace std;
+
+class MemoryData {
+public:
+	enum Type {
+		TNone,
+		TName,
+		TFunc,
+		TInt,
+	};
+
+protected:
+	Type type;
+
+public:
+
+	Type get_type() const;
+
+	MemoryData();
+	~MemoryData() = default;
+};
+
+struct Name : public MemoryData {
+	string value;
+	Name();
+	Name(string&);
+};
+
+struct Int : public MemoryData {
+	int value;
+	Int(int);
+};
+
+struct Function : public MemoryData {
+	list<string> argumentNames;
+	int scopeStart;
+	int scopeEnd;
+	Function(list<string>, int, int);
+};
+
+#endif
diff --git a/src/PolishNotationParser.cpp b/src/PolishNotationParser.cpp
new file mode 100644
index 0000000..f99f404
--- /dev/null
+++ b/src/PolishNotationParser.cpp
@@ -0,0 +1,145 @@
+#include "PolishNotationParser.h"
+#include "MemoryData.h"
+
+#include <iostream>
+
+Instruction::Instruction(const char* str, unsigned argc, void (*exec)(Memory&)) : name(str), argc(argc), exec(exec) {}
+
+Atom::Atom(bool (*isAtomChar)(char), MemoryData* (*absorb)(ifstream&)) : isAtomChar(isAtomChar), absorb(absorb) {}
+
+void PNParser::addInstr(Instruction i) {
+	this->instr.push_back(i);
+}
+
+void PNParser::addAtom(Atom a) {
+	this->atoms.push_back(a);
+}
+
+bool isSpacing(char ch) {
+	return ch == ' ' || ch == '\t' || ch == EOF;
+}
+
+void skipSpacing(ifstream& inFile) {
+	while(isSpacing(inFile.peek()))
+		inFile.get();
+}
+
+void PNParser::evaluateFunction(Memory& mem, Function* func, ifstream& inFile) {
+	if (func->scopeEnd < 0) {
+		instr[func->scopeStart].exec(mem);
+		delete func;
+		return;
+	}
+
+	for (auto i = func->argumentNames.rbegin(); i != func->argumentNames.rend(); ++i) {
+		mem.vars.push_back(new Name(*i));
+		mem.vars.push_back(mem.work.top());
+		mem.work.pop();
+	}
+	mem.scopeVars.push(func->argumentNames.size());
+
+	this->parseScope(inFile, mem, func->scopeStart, func->scopeEnd);
+}
+
+void PNParser::pushToWork(Memory& mem, MemoryData* data, ifstream& inFile) {
+	if (data->get_type() == MemoryData::TFunc) {
+		Function* func = static_cast<Function*>(data);
+		mem.work.push(data);
+		mem.work.push(new Int(func->argumentNames.size()));
+		return;
+	}
+
+	// We keep the amount of required arguments and the function
+	// on the top of the stack at all times
+	Int* argc = static_cast<Int*>(mem.work.top());
+	mem.work.pop();
+	Function* func = static_cast<Function*>(mem.work.top());
+	mem.work.pop();
+
+	mem.work.push(data);
+	argc->value--;
+
+	while (argc->value == 0) {
+		delete argc;
+		evaluateFunction(mem, func, inFile);
+		
+		if (mem.work.size() == 1) return;
+
+		data = mem.work.top();
+		mem.work.pop();
+		argc = static_cast<Int*>(mem.work.top());
+		mem.work.pop();
+		func = static_cast<Function*>(mem.work.top());
+		mem.work.pop();
+
+		mem.work.push(data);
+		argc->value--;
+	}
+
+	mem.work.push(func);
+	mem.work.push(argc);
+}
+
+void PNParser::parseScope(ifstream& inFile, Memory& mem, int start, int end) {
+	mem.scopeVars.push(0);
+	bool absorbed = false;
+	while (start < inFile.tellg() && inFile.tellg() < end && inFile.peek() != EOF) {
+		skipSpacing(inFile);
+
+		/* If value is an atom */
+
+		for (Atom& a : this->atoms) {
+			if (a.isAtomChar(inFile.peek())) {
+				pushToWork(mem, a.absorb(inFile), inFile);
+				absorbed = true;
+				break;
+			}
+		}
+
+		if (absorbed) { absorbed = false; continue; }
+
+		/* If value is a built-in function */
+
+		string name;
+		while (!isSpacing(inFile.peek()))
+			name.push_back(inFile.get());
+
+		for (int i = instr.size() - 1; i >= 0; i--) {
+			if (instr[i].name == name) {
+				list<string> temp;
+				temp.resize(instr[i].argc);
+				pushToWork(mem, new Function(temp, i, -1), inFile);
+				absorbed = true;
+				break;
+			}
+		}
+
+		if (absorbed) { absorbed = false; continue; }
+
+		/* If value is a variable */
+
+		Name* cname;
+		for (auto i = ++mem.vars.rbegin(); i != mem.vars.rend(); ++(++i)) {
+			cname = static_cast<Name*>(*i);
+			if (cname->value == name) {
+				MemoryData* value = new MemoryData(*static_cast<MemoryData*>(*(--i)));
+				pushToWork(mem, value, inFile);
+				break;
+			}
+		}
+	}
+	cout << mem.vars.size();
+	// cout << static_cast<Int*>(mem.work.top())->value << endl;
+
+	for (int i = mem.scopeVars.top(); i > 0; i--) {
+		delete mem.vars.back();
+		mem.vars.pop_back();
+		delete mem.vars.back();
+		mem.vars.pop_back();
+	}
+	mem.scopeVars.pop();
+}
+
+void PNParser::parse(ifstream& inFile, Memory& mem) {
+	parseScope(inFile, mem, -1, 100000); // global scope
+}
diff --git a/src/PolishNotationParser.h b/src/PolishNotationParser.h
new file mode 100644
index 0000000..1de6f26
--- /dev/null
+++ b/src/PolishNotationParser.h
@@ -0,0 +1,40 @@
+#ifndef ARZU_INTERPRETER_PARSER
+#define ARZU_INTERPRETER_PARSER
+
+#include <list>
+#include <deque>
+#include <string>
+#include <fstream>
+#include "Memory.h"
+using namespace std;
+
+struct Instruction {
+	string name;
+	unsigned argc;
+	void (*exec)(Memory&);
+
+	Instruction(const char* str, unsigned argc, void (*exec)(Memory&));
+};
+
+struct Atom {
+	bool (*isAtomChar)(char);
+	MemoryData* (*absorb)(ifstream&);
+
+	Atom(bool (*isAtomChar)(char), MemoryData* (*absorb)(ifstream&));
+};
+
+class PNParser {
+	deque<Instruction> instr;
+	list<Atom> atoms;
+
+	void evaluateFunction(Memory& mem, Function* func, ifstream& inFile);
+	void pushToWork(Memory& mem, MemoryData* data, ifstream& inFile);
+	void parseScope(ifstream& inFile, Memory& mem, int start, int end);
+public:
+	PNParser() = default;
+	void addInstr(Instruction);
+	void addAtom(Atom);
+	void parse(ifstream&, Memory&);
+};
+
+#endif
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..aa4a5dc
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,13 @@
+#include "ArzuParser.h"
+#include <fstream>
+#include <iostream>
+#include <string>
+using namespace std;
+
+int main() {
+	Memory userRAM;
+	ifstream inFile("test.arzu");
+	ArzuParser parser;
+	parser.parse(inFile, userRAM);
+	inFile.close();
+}
diff --git a/src/test.arzu b/src/test.arzu
new file mode 100644
index 0000000..cb31da8
--- /dev/null
+++ b/src/test.arzu
@@ -0,0 +1,2 @@
+devar $a 10
++ $a 1
-- 
cgit v1.2.3