Suppose we have an expression like (5+5 * (x^x-5 | y && 3))
and we'd like to get some computer-understandable representation of that expression, like:
ADD Token[5] (MUL Token[5] (AND (BIT_OR (XOR Token[x] (SUB Token[x] Token[5])) Token[y]) Token[3])
In case if you don't know how to do that or are looking for the solutin right now, you should know that I'm not going to present a correct solution. This post is just a joke. You should use either a Shunting-yard algorithm or a recursive descent parser.
So if you're ready for madness... Let's go!
Let's take Don't repeat yourself principle as a justification. Moreover, let's take it to extreme "Don't repeat". Indeed, why do we need to repeat what compiler's developers already did?
Here we go
#include <iostream> | |
#include <string> | |
#include <vector> | |
class Token { | |
typedef std::vector<std::string> TokenList; | |
TokenList _tokens; | |
public: | |
Token(std::string t) { | |
t = std::string("Token[") + t + "]"; | |
_tokens.push_back(t); | |
} | |
Token(const TokenList &tl) : _tokens(tl) { | |
} | |
const TokenList& tokens() const { return _tokens; } | |
}; | |
Token op(std::string operation, const Token &a) { | |
std::vector<std::string> tokens; | |
tokens.push_back(operation); | |
tokens.insert(tokens.end(), a.tokens().begin(), a.tokens().end()); | |
return Token(tokens); | |
} | |
Token op(std::string operation, const Token &a, const Token &b) { | |
std::vector<std::string> tokens; | |
tokens.push_back(operation); | |
tokens.insert(tokens.end(), a.tokens().begin(), a.tokens().end()); | |
tokens.insert(tokens.end(), b.tokens().begin(), b.tokens().end()); | |
return Token(tokens); | |
} | |
Token brackets(const Token &a) { | |
std::vector<std::string> tokens; | |
// tokens.push_back("BRACKET_OPEN"); | |
tokens.insert(tokens.end(), a.tokens().begin(), a.tokens().end()); | |
// tokens.push_back("BRACKET_CLOSE"); | |
return Token(tokens); | |
} | |
Token operator+(const Token &a, const Token &b) { | |
return op("ADD", a, b); | |
} | |
Token operator*(const Token &a, const Token &b) { | |
return op("MUL", a, b); | |
} | |
Token operator/(const Token &a, const Token &b) { | |
return op("DIV", a, b); | |
} | |
Token operator-(const Token &a, const Token &b) { | |
return op("SUB", a, b); | |
} | |
Token operator-(const Token &a) { | |
return op("MINUS", a); | |
} | |
Token operator^(const Token &a, const Token &b) { | |
return op("XOR", a, b); | |
} | |
Token operator|(const Token &a, const Token &b) { | |
return op("BIT_OR", a, b); | |
} | |
Token operator&(const Token &a, const Token &b) { | |
return op("BIT_AND", a, b); | |
} | |
Token operator&&(const Token &a, const Token &b) { | |
return op("AND", a, b); | |
} | |
Token operator||(const Token &a, const Token &b) { | |
return op("OR", a, b); | |
} | |
std::ostream& operator<<(std::ostream &out, Token &token) { | |
const std::vector<std::string> &tl = token.tokens(); | |
for (size_t i = 0, length = tl.size(); i < length; ++i) { | |
out << tl[i] << " "; | |
} | |
return out; | |
} | |
int main() { | |
#include "expr" | |
std::cout << val << std::endl; | |
} |
import subprocess | |
import re | |
import os | |
print "Input expression and we'll parse it!" | |
expr = raw_input('--> ') | |
tokens = re.findall("(\w+)", expr) | |
expr2 = re.sub("(\w+)", "t\1", expr) | |
f = open("expr", "w") | |
content = "" | |
for i in set(tokens): | |
content += 'Token t' + i + '("' + i + '");' | |
content += "Token val = (" + expr2 + ");" | |
f.write(content) | |
f.close() | |
proc = subprocess.Popen("g++ main.cpp -o prog", shell=True, stderr=subprocess.PIPE) | |
if proc.stderr.readline() == "": | |
proc = subprocess.Popen("./prog", shell=True, stdout=subprocess.PIPE) | |
ans = proc.stdout.readline() | |
os.remove("prog") | |
print ans | |
else: | |
print "Error, check your expression" |
In case you're wondering what the heck is going on: all constants are converted to instances of Token
class, for which I overloaded all the operators. Overloading is done in a way to preserve structure of an expression. The only thing we have to do then is to extract that information.
In case you're not familiar with C++, I recommend you to read something about operator overloading.
As you can see, g++ and python are required for that "parser". Unfortunatelly, priority of a bitwise xor is too low to serve as a power operator.