Boolean expression (grammar) parser in c++

Either use a parser generator as Oli Charlesworth already mentioned (yacc, bison, antlr; the latter is in my experience better suited for C++ than the other two although it is a while that I looked at any of them) or create a simple recursive descent parser: for a language as simple as yours this may be the easier approach.


Here is an implementation based on Boost Spirit.

Because Boost Spirit generates recursive descent parsers based on expression templates, honouring the 'idiosyncratic' (sic) precedence rules (as mentioned by others) is quite tedious. Therefore the grammar lacks a certain elegance.

Abstract Data Type

I defined a tree data structure using Boost Variant's recursive variant support, note the definition of expr:

struct op_or  {}; // tag
struct op_and {}; // tag
struct op_xor {}; // tag
struct op_not {}; // tag

typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;

typedef boost::variant<var, 
        boost::recursive_wrapper<unop <op_not> >, 
        boost::recursive_wrapper<binop<op_and> >,
        boost::recursive_wrapper<binop<op_xor> >,
        boost::recursive_wrapper<binop<op_or> >
        > expr;

(full source below)

Grammar Rules

The following is the (slightly tedious) grammar definition, as mentioned.

Although I don't consider this grammar optimal, it is quite readable, and we have ourselves a statically compiled parser with strongly typed AST datatype in roughly 50 lines of code. Things could be considerably worse.

template <typename It, typename Skipper = qi::space_type>
    struct parser : qi::grammar<It, expr(), Skipper>
{
    parser() : parser::base_type(expr_)
    {
        using namespace qi;
        expr_  = or_.alias();

        not_ = ("not" > simple       ) [ _val = phx::construct<unop <op_not>>(_1)     ] | simple [ _val = _1 ];
#ifdef RIGHT_ASSOCIATIVE
        or_  = (xor_ >> "or"  >> or_ ) [ _val = phx::construct<binop<op_or >>(_1, _2) ] | xor_   [ _val = _1 ];
        xor_ = (and_ >> "xor" >> xor_) [ _val = phx::construct<binop<op_xor>>(_1, _2) ] | and_   [ _val = _1 ];
        and_ = (not_ >> "and" >> and_) [ _val = phx::construct<binop<op_and>>(_1, _2) ] | not_   [ _val = _1 ];
#else
        or_  = xor_ [ _val = _1 ] >> *("or"  >> xor_ [ _val = phx::construct<binop<op_or>> (_val, _1) ]);
        xor_ = and_ [ _val = _1 ] >> *("xor" >> and_ [ _val = phx::construct<binop<op_xor>>(_val, _1) ]);
        and_ = not_ [ _val = _1 ] >> *("and" >> not_ [ _val = phx::construct<binop<op_and>>(_val, _1) ]);
#endif

        simple = (('(' > expr_ > ')') | var_);
        var_ = qi::lexeme[ +alpha ];
    }

  private:
    qi::rule<It, var() , Skipper> var_;
    qi::rule<It, expr(), Skipper> not_, and_, xor_, or_, simple, expr_;
};

Note: I've left the old rule definitions that would result in right-associativity in the binary operators for reference, but left-associativity is the more natural, and therefore taken by default

Operating on the syntax tree

Obviously, you'd want to evaluate the expressions. For now, I decided to stop at just printing, so I don't have to do the lookup table for named variables :)

Traversing a recursive variant may look cryptic at first, but the boost::static_visitor<> is surprisingly simple once you get the hang of it:

struct printer : boost::static_visitor<void>
{
    printer(std::ostream& os) : _os(os) {}
    std::ostream& _os;

    //
    void operator()(const var& v) const { _os << v; }

    void operator()(const binop<op_and>& b) const { print(" & ", b.oper1, b.oper2); }
    void operator()(const binop<op_or >& b) const { print(" | ", b.oper1, b.oper2); }
    void operator()(const binop<op_xor>& b) const { print(" ^ ", b.oper1, b.oper2); }

    void print(const std::string& op, const expr& l, const expr& r) const
    {
        _os << "(";
            boost::apply_visitor(*this, l);
            _os << op;
            boost::apply_visitor(*this, r);
        _os << ")";
    }

    void operator()(const unop<op_not>& u) const
    {
        _os << "(";
            _os << "!";
            boost::apply_visitor(*this, u.oper1);
        _os << ")";
    }
};

std::ostream& operator<<(std::ostream& os, const expr& e)
{ boost::apply_visitor(printer(os), e); return os; }

Test output:

For the test cases in the code, the following is output, demonstrating correct handling of the precedence rules by adding (redundant) parentheses:

Live On Coliru

result: ((a & b) ^ ((c & d) | (a & b)))
result: ((a & b) ^ ((c & d) | (a & b)))
result: (a & b)
result: (a | b)
result: (a ^ b)
result: (!a)
result: ((!a) & b)
result: (!(a & b))
result: ((a | b) | c)

Note, compare to Live On Coliru, with -DRIGHT_ASSOCIATIVE

Full Code:

Live On Coliru

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/variant/recursive_wrapper.hpp>

namespace qi    = boost::spirit::qi;
namespace phx   = boost::phoenix;

struct op_or  {};
struct op_and {};
struct op_xor {};
struct op_not {};

typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;

typedef boost::variant<var, 
        boost::recursive_wrapper<unop <op_not> >, 
        boost::recursive_wrapper<binop<op_and> >,
        boost::recursive_wrapper<binop<op_xor> >,
        boost::recursive_wrapper<binop<op_or> >
        > expr;

template <typename tag> struct binop 
{ 
    explicit binop(const expr& l, const expr& r) : oper1(l), oper2(r) { }
    expr oper1, oper2; 
};

template <typename tag> struct unop  
{ 
    explicit unop(const expr& o) : oper1(o) { }
    expr oper1; 
};

struct printer : boost::static_visitor<void>
{
    printer(std::ostream& os) : _os(os) {}
    std::ostream& _os;

    //
    void operator()(const var& v) const { _os << v; }

    void operator()(const binop<op_and>& b) const { print(" & ", b.oper1, b.oper2); }
    void operator()(const binop<op_or >& b) const { print(" | ", b.oper1, b.oper2); }
    void operator()(const binop<op_xor>& b) const { print(" ^ ", b.oper1, b.oper2); }

    void print(const std::string& op, const expr& l, const expr& r) const
    {
        _os << "(";
            boost::apply_visitor(*this, l);
            _os << op;
            boost::apply_visitor(*this, r);
        _os << ")";
    }

    void operator()(const unop<op_not>& u) const
    {
        _os << "(";
            _os << "!";
            boost::apply_visitor(*this, u.oper1);
        _os << ")";
    }
};

std::ostream& operator<<(std::ostream& os, const expr& e)
{ boost::apply_visitor(printer(os), e); return os; }

template <typename It, typename Skipper = qi::space_type>
    struct parser : qi::grammar<It, expr(), Skipper>
{
    parser() : parser::base_type(expr_)
    {
        using namespace qi;

        expr_  = or_.alias();

        not_ = ("not" > simple       ) [ _val = phx::construct<unop <op_not>>(_1)     ] | simple [ _val = _1 ];
#ifdef RIGHT_ASSOCIATIVE
        or_  = (xor_ >> "or"  >> or_ ) [ _val = phx::construct<binop<op_or >>(_1, _2) ] | xor_   [ _val = _1 ];
        xor_ = (and_ >> "xor" >> xor_) [ _val = phx::construct<binop<op_xor>>(_1, _2) ] | and_   [ _val = _1 ];
        and_ = (not_ >> "and" >> and_) [ _val = phx::construct<binop<op_and>>(_1, _2) ] | not_   [ _val = _1 ];
#else
        or_  = xor_ [ _val = _1 ] >> *("or"  >> xor_ [ _val = phx::construct<binop<op_or>> (_val, _1) ]);
        xor_ = and_ [ _val = _1 ] >> *("xor" >> and_ [ _val = phx::construct<binop<op_xor>>(_val, _1) ]);
        and_ = not_ [ _val = _1 ] >> *("and" >> not_ [ _val = phx::construct<binop<op_and>>(_val, _1) ]);
#endif

        simple = (('(' > expr_ > ')') | var_);
        var_ = qi::lexeme[ +alpha ];

        BOOST_SPIRIT_DEBUG_NODE(expr_);
        BOOST_SPIRIT_DEBUG_NODE(or_);
        BOOST_SPIRIT_DEBUG_NODE(xor_);
        BOOST_SPIRIT_DEBUG_NODE(and_);
        BOOST_SPIRIT_DEBUG_NODE(not_);
        BOOST_SPIRIT_DEBUG_NODE(simple);
        BOOST_SPIRIT_DEBUG_NODE(var_);
    }

  private:
    qi::rule<It, var() , Skipper> var_;
    qi::rule<It, expr(), Skipper> not_, and_, xor_, or_, simple, expr_;
};

int main()
{
    for (auto& input : std::list<std::string> {
            // From the OP:
            "(a and b) xor ((c and d) or (a and b));",
            "a and b xor (c and d or a and b);",

            /// Simpler tests:
            "a and b;",
            "a or b;",
            "a xor b;",
            "not a;",
            "not a and b;",
            "not (a and b);",
            "a or b or c;",
            })
    {
        auto f(std::begin(input)), l(std::end(input));
        parser<decltype(f)> p;

        try
        {
            expr result;
            bool ok = qi::phrase_parse(f,l,p > ';',qi::space,result);

            if (!ok)
                std::cerr << "invalid input\n";
            else
                std::cout << "result: " << result << "\n";

        } catch (const qi::expectation_failure<decltype(f)>& e)
        {
            std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
        }

        if (f!=l) std::cerr << "unparsed: '" << std::string(f,l) << "'\n";
    }

    return 0;
}

Bonus:

For bonus points, to get a tree exactly like shown in the OP:

Live On Coliru

static const char indentstep[] = "    ";

struct tree_print : boost::static_visitor<void>
{
    tree_print(std::ostream& os, const std::string& indent=indentstep) : _os(os), _indent(indent) {}
    std::ostream& _os;
    std::string _indent;

    void operator()(const var& v) const { _os << _indent << v << std::endl; }

    void operator()(const binop<op_and>& b) const { print("and ", b.oper1, b.oper2); }
    void operator()(const binop<op_or >& b) const { print("or  ", b.oper2, b.oper1); }
    void operator()(const binop<op_xor>& b) const { print("xor ", b.oper2, b.oper1); }

    void print(const std::string& op, const expr& l, const expr& r) const
    {
        boost::apply_visitor(tree_print(_os, _indent+indentstep), l);
        _os << _indent << op << std::endl;
        boost::apply_visitor(tree_print(_os, _indent+indentstep), r);
    }

    void operator()(const unop<op_not>& u) const
    {
        _os << _indent << "!";
        boost::apply_visitor(tree_print(_os, _indent+indentstep), u.oper1);
    }
};

std::ostream& operator<<(std::ostream& os, const expr& e)
{ 
    boost::apply_visitor(tree_print(os), e); return os; 
}

result:

            a
        and 
            b
    or  
            c
        and 
            d
xor 
        a
    and 
        b