From 83c7854b3f6e60d6e3c430b9c3dd59b3773c4ef2 Mon Sep 17 00:00:00 2001 From: Syndamia Date: Tue, 29 Nov 2022 19:02:10 +0200 Subject: Added a somewhat working version of the parser --- src/ArzuParser.cpp | 80 ++++++++++++++++++++++++ src/ArzuParser.h | 10 +++ src/Memory.cpp | 2 + src/Memory.h | 15 +++++ src/MemoryData.cpp | 24 +++++++ src/MemoryData.h | 46 ++++++++++++++ src/PolishNotationParser.cpp | 145 +++++++++++++++++++++++++++++++++++++++++++ src/PolishNotationParser.h | 40 ++++++++++++ src/main.cpp | 13 ++++ src/test.arzu | 2 + 10 files changed, 377 insertions(+) create mode 100644 src/ArzuParser.cpp create mode 100644 src/ArzuParser.h create mode 100644 src/Memory.cpp create mode 100644 src/Memory.h create mode 100644 src/MemoryData.cpp create mode 100644 src/MemoryData.h create mode 100644 src/PolishNotationParser.cpp create mode 100644 src/PolishNotationParser.h create mode 100644 src/main.cpp create mode 100644 src/test.arzu diff --git a/src/ArzuParser.cpp b/src/ArzuParser.cpp new file mode 100644 index 0000000..0556a27 --- /dev/null +++ b/src/ArzuParser.cpp @@ -0,0 +1,80 @@ +#include "ArzuParser.h" +#include "PolishNotationParser.h" +#include "Memory.h" + +bool isDigit(char ch) { + return '0' <= ch && ch <= '9'; +} + +bool isName(char ch) { + return ch == '$'; +} + +bool isSeparator(char ch) { + return ch == ' ' || ch == '\t' || ch == '\n' || ch == EOF; +} + +void _arzuIntOp(char op, Memory& mem) { + Int* right = static_cast(mem.work.top()); + mem.work.pop(); + Int* left = static_cast(mem.work.top()); + switch (op) { + case '+': left->value += right->value; break; + case '-': left->value -= right->value; break; + case '*': left->value *= right->value; break; + case '/': left->value /= right->value; break; + default: throw "Bad op"; + } + delete right; +} +void arzu_sum(Memory& mem) { + _arzuIntOp('+', mem); +} +void arzu_sub(Memory& mem) { + _arzuIntOp('-', mem); +} +void arzu_mult(Memory& mem) { + _arzuIntOp('*', mem); +} +void arzu_div(Memory& mem) { + _arzuIntOp('/', mem); +} + +void arzu_devar(Memory& mem) { + MemoryData* val = mem.work.top(); + mem.work.pop(); + Name* name = static_cast(mem.work.top()); + mem.work.pop(); + + mem.vars.push_back(name); + mem.vars.push_back(val); + mem.scopeVars.top()++; +} + + +MemoryData* arzu_Int_absorb(ifstream& inFile) { + Int* num = new Int(0); + while (!isSeparator(inFile.peek())) { + num->value = (num->value * 10) + (inFile.peek() - '0'); + inFile.get(); + } + return num; +} + +MemoryData* arzu_Name_absorb(ifstream& inFile) { + Name* name = new Name(); + while (isDigit(inFile.peek()) || ('0' <= inFile.peek() && inFile.peek() <= 'z')) + name->value.push_back(inFile.get()); + return name; +} + +ArzuParser::ArzuParser() : PNParser() { + this->addInstr(Instruction("+", 2, arzu_sum)); + this->addInstr(Instruction("-", 2, arzu_sub)); + this->addInstr(Instruction("*", 2, arzu_mult)); + this->addInstr(Instruction("/", 2, arzu_div)); + this->addInstr(Instruction("devar", 2, arzu_devar)); + + this->addAtom(Atom(isDigit, arzu_Int_absorb)); + this->addAtom(Atom(isName, arzu_Name_absorb)); +} diff --git a/src/ArzuParser.h b/src/ArzuParser.h new file mode 100644 index 0000000..af47f0f --- /dev/null +++ b/src/ArzuParser.h @@ -0,0 +1,10 @@ +#ifndef ARZU_INTERPRETER_ARZU_PARSER +#define ARZU_INTERPRETER_ARZU_PARSER + +#include "PolishNotationParser.h" + +struct ArzuParser : public PNParser { + ArzuParser(); +}; + +#endif diff --git a/src/Memory.cpp b/src/Memory.cpp new file mode 100644 index 0000000..4a4713f --- /dev/null +++ b/src/Memory.cpp @@ -0,0 +1,2 @@ +#include "Memory.h" +#include "MemoryData.h" diff --git a/src/Memory.h b/src/Memory.h new file mode 100644 index 0000000..d030b1e --- /dev/null +++ b/src/Memory.h @@ -0,0 +1,15 @@ +#ifndef ARZU_INTERPRETER_MEMORY +#define ARZU_INTERPRETER_MEMORY + +#include "MemoryData.h" +#include +#include +using namespace std; + +struct Memory { + list vars; + stack work; + stack scopeVars; +}; + +#endif diff --git a/src/MemoryData.cpp b/src/MemoryData.cpp new file mode 100644 index 0000000..34e2877 --- /dev/null +++ b/src/MemoryData.cpp @@ -0,0 +1,24 @@ +#include "MemoryData.h" +#include + +MemoryData::Type MemoryData::get_type() const { + return this->type; +} + +MemoryData::MemoryData() : type(TNone) {} + +Name::Name() : value() { + this->type = TName; +} + +Name::Name(string& str) : value(str) { + this->type = TName; +} + +Int::Int(int num) : value(num) { + this->type = TInt; +} + +Function::Function(list argNames, int scopeStart, int scopeEnd) : argumentNames(argNames), scopeStart(scopeStart), scopeEnd(scopeEnd) { + this->type = TFunc; +} diff --git a/src/MemoryData.h b/src/MemoryData.h new file mode 100644 index 0000000..06f259a --- /dev/null +++ b/src/MemoryData.h @@ -0,0 +1,46 @@ +#ifndef ARZU_INTERPRETER_MEMORY_DATA +#define ARZU_INTERPRETER_MEMORY_DATA + +#include +#include +using namespace std; + +class MemoryData { +public: + enum Type { + TNone, + TName, + TFunc, + TInt, + }; + +protected: + Type type; + +public: + + Type get_type() const; + + MemoryData(); + ~MemoryData() = default; +}; + +struct Name : public MemoryData { + string value; + Name(); + Name(string&); +}; + +struct Int : public MemoryData { + int value; + Int(int); +}; + +struct Function : public MemoryData { + list argumentNames; + int scopeStart; + int scopeEnd; + Function(list, int, int); +}; + +#endif diff --git a/src/PolishNotationParser.cpp b/src/PolishNotationParser.cpp new file mode 100644 index 0000000..f99f404 --- /dev/null +++ b/src/PolishNotationParser.cpp @@ -0,0 +1,145 @@ +#include "PolishNotationParser.h" +#include "MemoryData.h" + +#include + +Instruction::Instruction(const char* str, unsigned argc, void (*exec)(Memory&)) : name(str), argc(argc), exec(exec) {} + +Atom::Atom(bool (*isAtomChar)(char), MemoryData* (*absorb)(ifstream&)) : isAtomChar(isAtomChar), absorb(absorb) {} + +void PNParser::addInstr(Instruction i) { + this->instr.push_back(i); +} + +void PNParser::addAtom(Atom a) { + this->atoms.push_back(a); +} + +bool isSpacing(char ch) { + return ch == ' ' || ch == '\t' || ch == EOF; +} + +void skipSpacing(ifstream& inFile) { + while(isSpacing(inFile.peek())) + inFile.get(); +} + +void PNParser::evaluateFunction(Memory& mem, Function* func, ifstream& inFile) { + if (func->scopeEnd < 0) { + instr[func->scopeStart].exec(mem); + delete func; + return; + } + + for (auto i = func->argumentNames.rbegin(); i != func->argumentNames.rend(); ++i) { + mem.vars.push_back(new Name(*i)); + mem.vars.push_back(mem.work.top()); + mem.work.pop(); + } + mem.scopeVars.push(func->argumentNames.size()); + + this->parseScope(inFile, mem, func->scopeStart, func->scopeEnd); +} + +void PNParser::pushToWork(Memory& mem, MemoryData* data, ifstream& inFile) { + if (data->get_type() == MemoryData::TFunc) { + Function* func = static_cast(data); + mem.work.push(data); + mem.work.push(new Int(func->argumentNames.size())); + return; + } + + // We keep the amount of required arguments and the function + // on the top of the stack at all times + Int* argc = static_cast(mem.work.top()); + mem.work.pop(); + Function* func = static_cast(mem.work.top()); + mem.work.pop(); + + mem.work.push(data); + argc->value--; + + while (argc->value == 0) { + delete argc; + evaluateFunction(mem, func, inFile); + + if (mem.work.size() == 1) return; + + data = mem.work.top(); + mem.work.pop(); + argc = static_cast(mem.work.top()); + mem.work.pop(); + func = static_cast(mem.work.top()); + mem.work.pop(); + + mem.work.push(data); + argc->value--; + } + + mem.work.push(func); + mem.work.push(argc); +} + +void PNParser::parseScope(ifstream& inFile, Memory& mem, int start, int end) { + mem.scopeVars.push(0); + bool absorbed = false; + while (start < inFile.tellg() && inFile.tellg() < end && inFile.peek() != EOF) { + skipSpacing(inFile); + + /* If value is an atom */ + + for (Atom& a : this->atoms) { + if (a.isAtomChar(inFile.peek())) { + pushToWork(mem, a.absorb(inFile), inFile); + absorbed = true; + break; + } + } + + if (absorbed) { absorbed = false; continue; } + + /* If value is a built-in function */ + + string name; + while (!isSpacing(inFile.peek())) + name.push_back(inFile.get()); + + for (int i = instr.size() - 1; i >= 0; i--) { + if (instr[i].name == name) { + list temp; + temp.resize(instr[i].argc); + pushToWork(mem, new Function(temp, i, -1), inFile); + absorbed = true; + break; + } + } + + if (absorbed) { absorbed = false; continue; } + + /* If value is a variable */ + + Name* cname; + for (auto i = ++mem.vars.rbegin(); i != mem.vars.rend(); ++(++i)) { + cname = static_cast(*i); + if (cname->value == name) { + MemoryData* value = new MemoryData(*static_cast(*(--i))); + pushToWork(mem, value, inFile); + break; + } + } + } + cout << mem.vars.size(); + // cout << static_cast(mem.work.top())->value << endl; + + for (int i = mem.scopeVars.top(); i > 0; i--) { + delete mem.vars.back(); + mem.vars.pop_back(); + delete mem.vars.back(); + mem.vars.pop_back(); + } + mem.scopeVars.pop(); +} + +void PNParser::parse(ifstream& inFile, Memory& mem) { + parseScope(inFile, mem, -1, 100000); // global scope +} diff --git a/src/PolishNotationParser.h b/src/PolishNotationParser.h new file mode 100644 index 0000000..1de6f26 --- /dev/null +++ b/src/PolishNotationParser.h @@ -0,0 +1,40 @@ +#ifndef ARZU_INTERPRETER_PARSER +#define ARZU_INTERPRETER_PARSER + +#include +#include +#include +#include +#include "Memory.h" +using namespace std; + +struct Instruction { + string name; + unsigned argc; + void (*exec)(Memory&); + + Instruction(const char* str, unsigned argc, void (*exec)(Memory&)); +}; + +struct Atom { + bool (*isAtomChar)(char); + MemoryData* (*absorb)(ifstream&); + + Atom(bool (*isAtomChar)(char), MemoryData* (*absorb)(ifstream&)); +}; + +class PNParser { + deque instr; + list atoms; + + void evaluateFunction(Memory& mem, Function* func, ifstream& inFile); + void pushToWork(Memory& mem, MemoryData* data, ifstream& inFile); + void parseScope(ifstream& inFile, Memory& mem, int start, int end); +public: + PNParser() = default; + void addInstr(Instruction); + void addAtom(Atom); + void parse(ifstream&, Memory&); +}; + +#endif diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..aa4a5dc --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,13 @@ +#include "ArzuParser.h" +#include +#include +#include +using namespace std; + +int main() { + Memory userRAM; + ifstream inFile("test.arzu"); + ArzuParser parser; + parser.parse(inFile, userRAM); + inFile.close(); +} diff --git a/src/test.arzu b/src/test.arzu new file mode 100644 index 0000000..cb31da8 --- /dev/null +++ b/src/test.arzu @@ -0,0 +1,2 @@ +devar $a 10 ++ $a 1 -- cgit v1.2.3