解析其他命名集的多个命名集

2024-03-09

所以我想用 boost::spirit::qi 编写一个......好吧......不那么简单的解析器。我知道Boost Spirit的基本知识,在过去的几个小时里我第一次熟悉了它。

基本上我需要解析这个:

# comment

# other comment

set "Myset A"
{
    figure "AF 1"
    {
        i 0 0 0
        i 1 2 5
        i 1 1 1
        f 3.1 45.11 5.3
        i 3 1 5
        f 1.1 2.33 5.166
    }

    figure "AF 2"
    {
        i 25 5 1
        i 3 1 3
    }
}

# comment

set "Myset B"
{
    figure "BF 1"
    {
        f 23.1 4.3 5.11
    }
}

set "Myset C"
{
    include "Myset A" # includes all figures from Myset A

    figure "CF"
    {
        i 1 1 1
        f 3.11 5.33 3
    }
}

进入这个:

struct int_point { int x, y, z; };
struct float_point { float x, y, z; };

struct figure
{
    string name;
    vector<int_point> int_points;
    vector<float_point> float_points;
};

struct figure_set
{
    string name;
    vector<figure> figures
};

vector<figure_set> figure_sets; // fill with the data of the input

现在,显然让别人为我编写它太过分了,但是您能否提供一些关于阅读内容以及如何构建此任务的语法和解析器的提示?

而且...可能是 boost::spirit 不是我可以用于该任务的最佳库。如果有,是哪一个?

编辑: 这就是我到目前为止所取得的进展。但我还不确定如何继续:http://liveworkspace.org/code/212c31dfc0b6fbdf6c462d8d931c0e9f http://liveworkspace.org/code/212c31dfc0b6fbdf6c462d8d931c0e9f

我能够读取单个数字,但是我还不知道如何解析一组数字。


这是我的看法

我相信对你来说阻碍的规则是

figure  = eps >> "figure" 
    >> name         [ at_c<0>(_val) = _1 ] >> '{' >> 
    *(
            ipoints [ push_back(at_c<1>(_val), _1) ]
          | fpoints [ push_back(at_c<2>(_val), _1) ]
     ) >> '}';

这实际上是您解析混合的事实的症状i and f线放入单独的容器中。

请参阅下文了解替代方案。

这是我的完整代码:test.cpp https://gist.github.com/3886058/5bc593804e6d4ed3406f49695673f1adcab2482f#file_test.cpp

//#define BOOST_SPIRIT_DEBUG // before including Spirit
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <fstream>

namespace Format
{
    struct int_point   { int x, y, z;   }; 
    struct float_point { float x, y, z; }; 

    struct figure
    {
        std::string              name;
        std::vector<int_point>   int_points;
        std::vector<float_point> float_points;

        friend std::ostream& operator<<(std::ostream& os, figure const& o);
    };

    struct figure_set
    {
        std::string           name;
        std::set<std::string> includes;
        std::vector<figure>   figures;

        friend std::ostream& operator<<(std::ostream& os, figure_set const& o);
    };

    typedef std::vector<figure_set> file_data;
}

BOOST_FUSION_ADAPT_STRUCT(Format::int_point,   
        (int, x)(int, y)(int, z))
BOOST_FUSION_ADAPT_STRUCT(Format::float_point, 
        (float, x)(float, y)(float, z))
BOOST_FUSION_ADAPT_STRUCT(Format::figure,      
        (std::string, name)
        (std::vector<Format::int_point>, int_points)
        (std::vector<Format::float_point>, float_points))
BOOST_FUSION_ADAPT_STRUCT(Format::figure_set,  
        (std::string, name)
        (std::set<std::string>, includes)
        (std::vector<Format::figure>, figures))

namespace Format
{
    std::ostream& operator<<(std::ostream& os, figure const& o)
    {
        using namespace boost::spirit::karma;
        return os << format_delimited(
                "\n    figure" << no_delimit [ '"' << string << '"' ] << "\n    {"
                << *("\n       i" << int_ << int_ << int_)
                << *("\n       f" << float_ << float_ << float_)
                << "\n    }"
                , ' ', o);
    }

    std::ostream& operator<<(std::ostream& os, figure_set const& o)
    {
        using namespace boost::spirit::karma;
        return os << format_delimited(
                "\nset" << no_delimit [ '"' << string << '"' ] << "\n{"
                << *("\n    include " << no_delimit [ '"' << string << '"' ])
                << *stream
                << "\n}"
                , ' ', o);
    }
}

namespace /*anon*/
{
    namespace phx=boost::phoenix;
    namespace qi =boost::spirit::qi;

    template <typename Iterator> struct skipper
        : public qi::grammar<Iterator>
    {
        skipper() : skipper::base_type(start, "skipper")
        {
            using namespace qi;

            comment = '#' >> *(char_ - eol) >> (eol|eoi);
            start   = comment | qi::space;

            BOOST_SPIRIT_DEBUG_NODE(start);
            BOOST_SPIRIT_DEBUG_NODE(comment);
        }

      private:
        qi::rule<Iterator> start, comment;
    };

    template <typename Iterator> struct parser
        : public qi::grammar<Iterator, Format::file_data(), skipper<Iterator> >
    {
        parser() : parser::base_type(start, "parser")
        {
            using namespace qi;
            using phx::push_back;
            using phx::at_c;

            name    = eps >> lexeme [ '"' >> *~char_('"') >> '"' ];

            include = eps >> "include" >> name;
            ipoints = eps >> "i"       >> int_         >> int_   >> int_;
            fpoints = eps >> "f"       >> float_       >> float_ >> float_;

            figure  = eps >> "figure" 
                >> name         [ at_c<0>(_val) = _1 ] >> '{' >> 
                *(
                        ipoints [ push_back(at_c<1>(_val), _1) ]
                      | fpoints [ push_back(at_c<2>(_val), _1) ]
                 ) >> '}';
            set     = eps >> "set" >> name >> '{' >> *include >> *figure >> '}';
            start   = *set;
        }

      private:
        qi::rule<Iterator, std::string()        , skipper<Iterator> > name, include;
        qi::rule<Iterator, Format::int_point()  , skipper<Iterator> > ipoints;
        qi::rule<Iterator, Format::float_point(), skipper<Iterator> > fpoints;
        qi::rule<Iterator, Format::figure()     , skipper<Iterator> > figure;
        qi::rule<Iterator, Format::figure_set() , skipper<Iterator> > set;
        qi::rule<Iterator, Format::file_data()  , skipper<Iterator> > start;
    };
}

namespace Parser {

    bool parsefile(const std::string& spec, Format::file_data& data)
    {
        std::ifstream in(spec.c_str());
        in.unsetf(std::ios::skipws);

        std::string v;
        v.reserve(4096);
        v.insert(v.end(), std::istreambuf_iterator<char>(in.rdbuf()), std::istreambuf_iterator<char>());

        if (!in) 
            return false;

        typedef char const * iterator_type;
        iterator_type first = &v[0];
        iterator_type last = first+v.size();

        try
        {
            parser<iterator_type>  p;
            skipper<iterator_type> s;
            bool r = qi::phrase_parse(first, last, p, s, data);

            r = r && (first == last);

            if (!r)
                std::cerr << spec << ": parsing failed at: \"" << std::string(first, last) << "\"\n";
            return r;
        }
        catch (const qi::expectation_failure<char const *>& e)
        {
            std::cerr << "FIXME: expected " << e.what_ << ", got '" << std::string(e.first, e.last) << "'" << std::endl;
            return false;
        }
    }
}

int main()
{
    Format::file_data data;
    bool ok = Parser::parsefile("input.txt", data);

    std::cerr << "Parse " << (ok?"success":"failed") << std::endl;
    std::cout << "# figure sets exported automatically by karma\n\n";

    for (auto& set : data)
        std::cout << set;
}

它输出解析的数据作为验证:输出.txt https://gist.github.com/3886058/5bc593804e6d4ed3406f49695673f1adcab2482f#file_output.txt

Parse success
# figure sets exported automatically by karma


set "Myset A"
{ 
    figure "AF 1"
    { 
       i 0 0 0 
       i 1 2 5 
       i 1 1 1 
       i 3 1 5 
       f 3.1 45.11 5.3 
       f 1.1 2.33 5.166 
    }  
    figure "AF 2"
    { 
       i 25 5 1 
       i 3 1 3 
    }  
} 
set "Myset B"
{ 
    figure "BF 1"
    { 
       f 23.1 4.3 5.11 
    }  
} 
set "Myset C"
{ 
    include  "Myset A"
    figure "CF"
    { 
       i 1 1 1 
       f 3.11 5.33 3.0 
    }  
}

你会注意到

  • 点线的顺序发生变化(全部int_points优先于所有float_points)
  • 此外,还会添加非有效数字,例如在最后一行3.0代替3显示该类型是否为浮动。
  • 您“忘记”(?)问题中包含的内容

选择

有一些东西可以使实际点线保持原始顺序:

typedef boost::variant<int_point, float_point> if_point;

struct figure
{
    std::string            name;
    std::vector<if_point>  if_points;
}

现在规则变得简单:

name    = eps >> lexeme [ '"' >> *~char_('"') >> '"' ];

include = eps >> "include" >> name;
ipoints = eps >> "i"       >> int_         >> int_   >> int_;
fpoints = eps >> "f"       >> float_       >> float_ >> float_;

figure  = eps >> "figure" >> name >> '{' >> *(ipoints | fpoints) >> '}';
set     = eps >> "set"    >> name >> '{' >> *include >> *figure  >> '}';
start   = *set;

注意其中的优雅

figure  = eps >> "figure" >> name >> '{' >> *(ipoints | fpoints) >> '}';

并且输出保持输入的精确顺序:输出.txt https://gist.github.com/3886058/39a925610419759cbe8640364c5785a796ac0d27#file_output.txt

再次,完整的演示代码(仅在 github 上):test.cpp https://gist.github.com/3886058/39a925610419759cbe8640364c5785a796ac0d27#file_test.cpp

奖金更新

最后,我制作了第一个正确的 Karma 语法来输出结果:

name    = no_delimit ['"' << string << '"'];
include = "include" << name;
ipoints = "\n        i" << int_   << int_   << int_;
fpoints = "\n        f" << float_ << float_ << float_;

figure  = "figure" << name << "\n    {" << *(ipoints | fpoints) << "\n    }";
set     = "set"    << name << "\n{" 
            << *("\n   " << include)
            << *("\n   " << figure)  << "\n}";

start   = "# figure sets exported automatically by karma\n\n" 
            << set % eol;

这实际上比我预期的要舒服得多。在最新版本中查看完全更新的要点 https://gist.github.com/3886058/f58cad691ad692302b42a9ef579d7cf6ed05dbfd: test.hpp https://gist.github.com/3886058/f58cad691ad692302b42a9ef579d7cf6ed05dbfd#file_test.cpp

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

解析其他命名集的多个命名集 的相关文章

随机推荐