Basic Expression Parsing
Click here for advanced expression parsing
When writing your own calculator it is necessary to build a converter that can transform an input mathematical expression such as ( 1 + 8 ) – ( ( 3 * 4 ) / 2 )
, into a format that is more suited for evaluation by computers.
When evaluating expressions such as the one above (known as “infix notation“), that which appears simple and intuitive to us humans, is usually not so straightforward to implement in a programming language.
The shunting-yard algorithm is a method for parsing mathematical expressions written in infix notation to Reverse Polish Notation (RPN). The RPN notation is different to infix notation in that every operator (+, -, * etc) comes after the operands (numbers) and there are no parentheses (brackets).
So ( 3 * 4 )
for example becomes 3 4 *
.
When given an input string in Reverse Polish Notation, it is then possible to employ a simple algorithm based around the use of a stack in order to evaluate the RPN expression and determine the mathematical result.
The Shunting-yard algorithm
This pseudocode shows how the shunting-yard algorithm converts an expression given in conventional infix notation into Reverse Polish Notation:
For each token { If (token is a number) { Add number to the output queue } If (token is an operator eg +,-,*...) { While (stack not empty AND stack top element is an operator) { If ((token = left associative AND precedence <= stack top element) OR (token = right associative AND precedence < stack top element)) { Pop stack onto the output queue. Exit while loop. } } Push token onto stack } If (token is left bracket '(') { Push token on to stack } If (token is right bracket ')') { While (stack not empty AND stack top element not a left bracket) { Pop the stack onto output queue } Pop the stack } } While (stack not empty) { Pop stack onto output queue }
Implementing the the shunting-yard algorithm in Java
The Java function to implement the shunting yard algorithm described above is as follows:
// Convert infix expression format into reverse Polish notation public static String[] infixToRPN(String[] inputTokens) { ArrayList<String> out = new ArrayList<String>(); Stack<String> stack = new Stack<String>(); // For each tokens for (String token : inputTokens) { // If token is an operator if (isOperator(token)) { // While stack not empty AND stack top element // is an operator while (!stack.empty() && isOperator(stack.peek())) { if ((isAssociative(token, LEFT_ASSOC) && cmpPrecedence(token, stack.peek()) <= 0) || (isAssociative(token, RIGHT_ASSOC) && cmpPrecedence(token, stack.peek()) < 0)) { out.add(stack.pop()); continue; } break; } // Push the new operator on the stack stack.push(token); } // If token is a left bracket '(' else if (token.equals("(")) { stack.push(token); // } // If token is a right bracket ')' else if (token.equals(")")) { while (!stack.empty() && !stack.peek().equals("(")) { out.add(stack.pop()); } stack.pop(); } // If token is a number else { out.add(token); } } while (!stack.empty()) { out.add(stack.pop()); } String[] output = new String[out.size()]; return out.toArray(output); }
Evaluating expressions in Reverse Polish Notation
Once we have obtained our input expression string in RPN format, we evaluate the expression using a simple algorithm, also based around the use of a stack. Pseudocode as follows:
For each token { If (token is a number) { Push value onto stack } If (token is an operator) { Pop 2 top values from the stack Evaluate operator using popped values as args Push result onto stack } }
The single value remaining on the stack is the evaluation. Java code for this is as follows:
public static double RPNtoDouble(String[] tokens) { Stack<String> stack = new Stack<String>(); // For each token for (String token : tokens) { // If the token is a number push it onto the stack if (!isOperator(token)) { stack.push(token); } else { // Pop the two top entries Double d2 = Double.valueOf( stack.pop() ); Double d1 = Double.valueOf( stack.pop() ); //Get the result Double result = token.compareTo("+") == 0 ? d1 + d2 : token.compareTo("-") == 0 ? d1 - d2 : token.compareTo("*") == 0 ? d1 * d2 : d1 / d2; // Push result onto stack stack.push( String.valueOf( result )); } } return Double.valueOf(stack.pop()); }
Example
Consider the expression
( 1 + 2 ) * ( 3 / 4 ) - ( 5 + 6 )
.
Passing this as a string argument to the infixToRPN function will yield this RPN format:
.
1 2 + 3 4 / * 5 6 + -
It is then simply a matter of evaluating the RPN format to give the desired mathematical result of -8.75
.
Putting it all together: Java Implementation of Basic Expression Parser
Full Java code listing as follows:
package expressionparser; import java.util.*; public class ExpressionParser { // Associativity constants for operators private static final int LEFT_ASSOC = 0; private static final int RIGHT_ASSOC = 1; // Operators private static final Map<String, int[]> OPERATORS = new HashMap<String, int[]>(); static { // Map<"token", []{precendence, associativity}> OPERATORS.put("+", new int[] { 0, LEFT_ASSOC }); OPERATORS.put("-", new int[] { 0, LEFT_ASSOC }); OPERATORS.put("*", new int[] { 5, LEFT_ASSOC }); OPERATORS.put("/", new int[] { 5, LEFT_ASSOC }); } // Test if token is an operator private static boolean isOperator(String token) { return OPERATORS.containsKey(token); } // Test associativity of operator token private static boolean isAssociative(String token, int type) { if (!isOperator(token)) { throw new IllegalArgumentException("Invalid token: " + token); } if (OPERATORS.get(token)[1] == type) { return true; } return false; } // Compare precedence of operators. private static final int cmpPrecedence(String token1, String token2) { if (!isOperator(token1) || !isOperator(token2)) { throw new IllegalArgumentException("Invalid tokens: " + token1 + " " + token2); } return OPERATORS.get(token1)[0] - OPERATORS.get(token2)[0]; } // Convert infix expression format into reverse Polish notation public static String[] infixToRPN(String[] inputTokens) { ArrayList<String> out = new ArrayList<String>(); Stack<String> stack = new Stack<String>(); // For each token for (String token : inputTokens) { // If token is an operator if (isOperator(token)) { // While stack not empty AND stack top element // is an operator while (!stack.empty() && isOperator(stack.peek())) { if ((isAssociative(token, LEFT_ASSOC) && cmpPrecedence(token, stack.peek()) <= 0) || (isAssociative(token, RIGHT_ASSOC) && cmpPrecedence(token, stack.peek()) < 0)) { out.add(stack.pop()); continue; } break; } // Push the new operator on the stack stack.push(token); } // If token is a left bracket '(' else if (token.equals("(")) { stack.push(token); // } // If token is a right bracket ')' else if (token.equals(")")) { while (!stack.empty() && !stack.peek().equals("(")) { out.add(stack.pop()); } stack.pop(); } // If token is a number else { out.add(token); } } while (!stack.empty()) { out.add(stack.pop()); } String[] output = new String[out.size()]; return out.toArray(output); } public static double RPNtoDouble(String[] tokens) { Stack<String> stack = new Stack<String>(); // For each token for (String token : tokens) { // If the token is a value push it onto the stack if (!isOperator(token)) { stack.push(token); } else { // Token is an operator: pop top two entries Double d2 = Double.valueOf( stack.pop() ); Double d1 = Double.valueOf( stack.pop() ); //Get the result Double result = token.compareTo("+") == 0 ? d1 + d2 : token.compareTo("-") == 0 ? d1 - d2 : token.compareTo("*") == 0 ? d1 * d2 : d1 / d2; // Push result onto stack stack.push( String.valueOf( result )); } } return Double.valueOf(stack.pop()); } public static void main(String[] args) { String[] input = "( 1 + 2 ) * ( 3 / 4 ) - ( 5 + 6 )".split(" "); String[] output = infixToRPN(input); // Build output RPN string minus the commas for (String token : output) { System.out.print(token + " "); } // Feed the RPN string to RPNtoDouble to give result Double result = RPNtoDouble( output ); } }
C++ Implementation of Basic Expression Parser
#include <iostream> #include <sstream> #include <list> #include <stack> #include <map> #include <string> #include <vector> #include <iterator> #include <stdlib.h> const int LEFT_ASSOC = 0; const int RIGHT_ASSOC = 1; // Map the different operators: +, -, *, / etc typedef std::map< std::string, std::pair< int,int >> OpMap; typedef std::vector<std::string>::const_iterator cv_iter; typedef std::string::iterator s_iter; const OpMap::value_type assocs[] = { OpMap::value_type( "+", std::make_pair<int,int>( 0, LEFT_ASSOC ) ), OpMap::value_type( "-", std::make_pair<int,int>( 0, LEFT_ASSOC ) ), OpMap::value_type( "*", std::make_pair<int,int>( 5, LEFT_ASSOC ) ), OpMap::value_type( "/", std::make_pair<int,int>( 5, LEFT_ASSOC ) ) }; const OpMap opmap( assocs, assocs + sizeof( assocs ) / sizeof( assocs[ 0 ] ) ); // Test if token is an pathensesis bool isParenthesis( const std::string& token) { return token == "(" || token == ")"; } // Test if token is an operator bool isOperator( const std::string& token) { return token == "+" || token == "-" || token == "*" || token == "/"; } // Test associativity of operator token bool isAssociative( const std::string& token, const int& type) { const std::pair<int,int> p = opmap.find( token )->second; return p.second == type; } // Compare precedence of operators. int cmpPrecedence( const std::string& token1, const std::string& token2 ) { const std::pair<int,int> p1 = opmap.find( token1 )->second; const std::pair<int,int> p2 = opmap.find( token2 )->second; return p1.first - p2.first; } // Convert infix expression format into reverse Polish notation bool infixToRPN( const std::vector<std::string>& inputTokens, const int& size, std::vector<std::string>& strArray ) { bool success = true; std::list<std::string> out; std::stack<std::string> stack; // While there are tokens to be read for ( int i = 0; i < size; i++ ) { // Read the token const std::string token = inputTokens[ i ]; // If token is an operator if ( isOperator( token ) ) { // While there is an operator token, o2, at the top of the stack AND // either o1 is left-associative AND its precedence is equal to that of o2, // OR o1 has precedence less than that of o2, const std::string o1 = token; if ( !stack.empty() ) { std::string o2 = stack.top(); while ( isOperator( o2 ) && ( ( isAssociative( o1, LEFT_ASSOC ) && cmpPrecedence( o1, o2 ) == 0 ) || ( cmpPrecedence( o1, o2 ) < 0 ) ) ) { // pop o2 off the stack, onto the output queue; stack.pop(); out.push_back( o2 ); if ( !stack.empty() ) o2 = stack.top(); else break; } } // push o1 onto the stack. stack.push( o1 ); } // If the token is a left parenthesis, then push it onto the stack. else if ( token == "(" ) { // Push token to top of the stack stack.push( token ); } // If token is a right bracket ')' else if ( token == ")" ) { // Until the token at the top of the stack is a left parenthesis, // pop operators off the stack onto the output queue. std::string topToken = stack.top(); while ( topToken != "(" ) { out.push_back(topToken ); stack.pop(); if ( stack.empty() ) break; topToken = stack.top(); } // Pop the left parenthesis from the stack, but not onto the output queue. if ( !stack.empty() ) stack.pop(); // If the stack runs out without finding a left parenthesis, // then there are mismatched parentheses. if ( topToken != "(" ) { return false; } } // If the token is a number, then add it to the output queue. else { out.push_back( token ); } } // While there are still operator tokens in the stack: while ( !stack.empty() ) { const std::string stackToken = stack.top(); // If the operator token on the top of the stack is a parenthesis, // then there are mismatched parentheses. if ( isParenthesis( stackToken ) ) { return false; } // Pop the operator onto the output queue./ out.push_back( stackToken ); stack.pop(); } strArray.assign( out.begin(), out.end() ); return success; } double RPNtoDouble( std::vector<std::string> tokens ) { std::stack<std::string> st; // For each token for ( int i = 0; i < (int) tokens.size(); ++i ) { const std::string token = tokens[ i ]; // If the token is a value push it onto the stack if ( !isOperator(token) ) { st.push(token); } else { double result = 0.0; // Token is an operator: pop top two entries const std::string val2 = st.top(); st.pop(); const double d2 = strtod( val2.c_str(), NULL ); if ( !st.empty() ) { const std::string val1 = st.top(); st.pop(); const double d1 = strtod( val1.c_str(), NULL ); //Get the result result = token == "+" ? d1 + d2 : token == "-" ? d1 - d2 : token == "*" ? d1 * d2 : d1 / d2; } else { if ( token == "-" ) result = d2 * -1; else result = d2; } // Push result onto stack std::ostringstream s; s << result; st.push( s.str() ); } } return strtod( st.top().c_str(), NULL ); } std::vector<std::string> getExpressionTokens( const std::string& expression ) { std::vector<std::string> tokens; std::string str = ""; for ( int i = 0; i < (int) expression.length(); ++i ) { const std::string token( 1, expression[ i ] ); if ( isOperator( token ) || isParenthesis( token ) ) { if ( !str.empty() ) { tokens.push_back( str ) ; } str = ""; tokens.push_back( token ); } else { // Append the numbers if ( !token.empty() && token != " " ) { str.append( token ); } else { if ( str != "" ) { tokens.push_back( str ); str = ""; } } } } return tokens; } // Print iterators in a generic way template<typename T, typename InputIterator> void Print( const std::string& message, const InputIterator& itbegin, const InputIterator& itend, const std::string& delimiter) { std::cout << message << std::endl; std::copy(itbegin, itend, std::ostream_iterator<T>(std::cout, delimiter.c_str())); std::cout << std::endl; } int main() { std::string s = "( 1 + 2) * ( 3 / 4 )-(5+6)"; Print<char, s_iter>( "Input expression:", s.begin(), s.end(), "" ); // Tokenize input expression std::vector<std::string> tokens = getExpressionTokens( s ); // Evaluate feasible expressions std::vector<std::string> rpn; if ( infixToRPN( tokens, tokens.size(), rpn ) ) { double d = RPNtoDouble( rpn ); Print<std::string, cv_iter>( "RPN tokens: ", rpn.begin(), rpn.end(), " " ); std::cout << "Result = " << d << std::endl; } else { std::cout << "Mis-match in parentheses" << std::endl; } return 0; }
So when running the above code as a simple Windows Console application using an example input expression of ( 1 + 2) * ( 3 / 4 )-(5+6)
we get the following RPN tokens and mathematical result:
If an inappropriate input expression is used, one with a mis-match in the number of left/right parentheses such as ( 1 + 2 * ( 3 / 4 )-(5+6), then we get an error:
And if we insert an additional minus sign ‘-‘ so that the original expression used is now -( 1 + 2) * ( 3 / 4 )-(5+6)
, the algorithm recognizes that this should get treated as a unary minus operator, resulting in the following output:
UPDATE: 13 April 2013
Extending the algorithm to handle further mathematical expressions
So far our implementation has been applied to handle basic expressions based on standard mathematical operators +, -, *, / etc. The following downloadable C++ code makes a number of further improvements to handle additional mathematical operators sin, cos, tan, log, exp etc, as well as much more complicated subexpressions.
Additional sanity checks are included to make sure there are no mismatched numbers of parentheses, as well as some additional work on the tokenization of RPM strings to handle unary minus operators occuring at positions where an expression is expected eg -8 + 5 = -3 or 11 ^ -7 = 5.13158e-08.
All examples in this code have been verified using Google’s online calculator. To use Google’s built-in calculator simply enter your mathematical expression into the search box. For example:
Some examples are shown in the following table:
Expression (infix) | RPN (postfix) | Result |
---|---|---|
exp( 1.11 ) | 1.11 exp | 3.034358 |
sin( cos( 90 * pi / 180 ) ) | 90 pi * 180 / cos sin | 0.000001 |
34.5*(23+1.5)/2 | 34.5 23 1.5 + * 2 / | 422.625000 |
5 + ((1 + 2) * 4) – 3 | 5 1 2 + 4 * + 3 – | 14 |
( 1 + 2 ) * ( 3 / 4 ) ^ ( 5 + 6 ) | 1 2 + 3 4 / 5 6 + ^ * | 0.126705 |
3/2 + 4*(12+3) | 3 2 / 4 12 3 + * + | 61.5 |
PI*pow(9/2,2) | PI 9 2 / 2 pow * | 63.617197 |
((2*(6-1))/2)*4 | 2 6 1 – * 2 / 4 * | 20 |
ln(2)+3^5 | 2 ln 3 5 ^ + | 243.693147 |
11 ^ -7 | 11 -7 ^ | 5.13158e-08 |
cos ( ( 1.3 + 1 ) ^ ( 1 / 3 ) ) – log ( -2 * 3 / -14 ) | 1.3 1 + 1 3 / ^ cos -2 3 * -14 / log – | 0.616143 |
1 * -sin( Pi / 2) | 1 Pi 2 / -sin * | -1 |
-8 + 5 | -8 5 + | -3 |
1 – (-2^2) – 1 | 1 1 -2 2 ^ * – 1 – | 4 |
The file main.cpp
is an example usage of applying the Tokenize
method of the ExpressionParser
class to first prime the input expression string into a suitable format, which is subsequently converted to Reverse Polish Notation using the InfixToRPN
method.
If successful, the reverse Polish Notation is then evaluated to give the mathematical result.
Example usage shown here:
#include "ExpressionParser.h" // Print iterators in a generic way template<typename T, typename InputIterator> void Print( const std::string& message, const InputIterator& itbegin, const InputIterator& itend, const std::string& delimiter) { std::cout << message << std::endl; std::copy(itbegin, itend, std::ostream_iterator<T>(std::cout, delimiter.c_str())); std::cout << std::endl; } int main(int argc, char** argv) { std::string result; //std::string originalInput = "-11 ^ -7"; //std::string originalInput = "-1*- sin( Pi / -2)"; //std::string originalInput = "-8 + 5"; //std::string originalInput = "5 + (-1 + 2 )"; //std::string originalInput = "cos ( ( -1.3 + -1 ) ^ ( 1 / -3 ) ) - log ( -2 * 3 / -14 )"; //std::string originalInput = "cos ( ( 1.3 + 1 ) ^ ( 1 / 3 ) )"; //std::string originalInput = "( 1.3 + -1 ) ^ ( 1 / 3 )"; //std::string originalInput = "cos(- 1.32001)"; //std::string originalInput = "-8+3"; //std::string originalInput = "ln(2)+3^5"; //std::string originalInput = "((2*(6-1))/2)*4"; //std::string originalInput = "PI*pow(9/2,2)"; //std::string originalInput = "PI*pow(9/-2,2)"; //std::string originalInput = "pow(2, 3)"; //std::string originalInput = "3/2 + 4*(12+3)"; //std::string originalInput = "( 1 + 2 ) * ( 3 / 4 ) ^ ( 5 + 6 )"; //std::string originalInput = "5 + ((1 + 2) * -4) - 3"; //std::string originalInput = "34.5*(23+1.5)/2"; //std::string originalInput = "sin( cos( 90 * pi / 180 ) )"; std::string originalInput = "exp( 1.11 )"; //std::string originalInput = "5 + ((1 + 2) * 4) - 3 "; //std::string originalInput = "3 + 4 * 2 / ( 1 - 5 ) ^ 2 ^ 3"; //std::string originalInput = "2.5^3"; //std::string originalInput = "-cos(1/-20)+sin(1/30)+cos(1/50)"; //std::string originalInput = "--cos(1/-20)+-sin(1/30)"; //std::string originalInput = "--cos(1/-20)--sin(1/30)"; //std::string originalInput = "--cos(1/--20)"; //std::string originalInput = "SQRT(4 )"; //std::string originalInput = "5 + (-1 + 2 )"; //std::string originalInput = "-(2+3)*(4*-10-1)+100"; //std::string originalInput = "(2+3)*-(4*10-1)+100"; //std::string originalInput = "1 - (-2^2) - 1"; //std::string originalInput = "(4*-10-1)"; //std::string originalInput = "-(2+3)*(4*-10-1)"; //std::string originalInput = "-3/2 + 4*-( 12+3)"; //std::string originalInput = "-3/2 + 4*(-12-3)"; //std::string originalInput = "-3/2 + -4*(-12-3)"; Print<char, std::string::iterator>( "Input expression:", originalInput.begin(), originalInput.end(), "" ); ExpressionParser parser( originalInput ); if ( !parser.MatchingParetheses() ) { std::cout << "Error: mismatched parentheses or empty input" << std::endl; return 1; } std::vector<std::string> RPN; if ( parser.InfixToRPN( RPN ) ) { Print<std::string, std::vector<std::string>::const_iterator>( "RPN tokens: ", RPN.begin(), RPN.end(), " " ); if ( parser.Evaluate( RPN, result ) ) { std::cout << std::endl; std::cout << "Result = " << result << std::endl; } } else { std::cout << "Error: mismatched parentheses" << std::endl; } return 0; }
Example outputs:
Download below. Please contact me if you need any help in using the software.