split adaptor

リストの分割は

  1. 検索:デリミタを探す
  2. 分割:トークンのリストを作る(空のトークンを除去する、デリミタを残すなど)

という2つの独立した操作に分解できると思ったので、2つの操作をポリシーでカスタマイズできるようなsplit iteratorを作り、それをベースにしたrange adaptorを書くことにしました。

namespace oven = pstade::oven;

int main()
{
    using namespace boost::lambda;

    // 1. 一定の要素数ごとに区切る
    // 2. 区切りごとにトークンを作る
    std::cout << (
        oven::counting('a', (char)('z' + 1))
          | vitro::split_every<std::string>(3)
          | oven::identities
    ) << '\n';
    // {abc,def,ghi,jkl,mno,pqr,stu,vwx,yz}

    // 1. 条件を満たす要素で区切る
    // 2. 区切りごとにトークンを作る
    std::cout << (
        oven::initial_values(1, 3, -4, 5, 7, -9, 0, 2)
          | vitro::split_when<vitro::use_default>(oven::regular(_1 < 0))
          | oven::transformed(oven::make_range)
    ) << '\n';
    // {{1,3},{5,7},{0,2}}

    // 1. デリミタを指定して区切る
    // 2. 空のトークンを除去する
    std::cout << (
        "a..b...c....d.."
          | oven::as_literal
          | vitro::split_on<std::string>(
                ".." | oven::as_literal,
                vitro::split_options::remove_empty_entries
            )
          | oven::identities
    ) << '\n';
    // {a,b,.c,d}
}

Finder Conceptを満たす型を自前で定義して、

rng | vitro::split<vitro::use_default>(f, options)

のように書くこともできます。
最初は2. 分割の挙動を侵入的に設定できるようにと考えていたのですが、面倒だったのと、あまりメリットが思いつかなかったのでやめました。上のコードにもsplit_options::remove_empty_entriesなるものが出てきますが、タグディスパッチに使っているだけで、これに処理を委譲しているわけではありません。
そのうちに、PStade.Ovenのstring_splitとその他のアダプタを組み合わせた方が柔軟な気がしてきたので飽きました。
一応実装を置いておきます。Boost.Rangeのみ使っています。(Forward iteratorがDefaultConstructibleでなければならない理由がわかりません)

#ifndef VITRO_SPLIT_HPP
#define VITRO_SPLIT_HPP

#include <algorithm>
#include <iterator>
#include <boost/iterator/iterator_categories.hpp>
#include <boost/iterator/iterator_facade.hpp>
#include <boost/iterator/iterator_traits.hpp>
#include <boost/mpl/if.hpp>
#include <boost/range/begin.hpp>
#include <boost/range/concepts.hpp>
#include <boost/range/distance.hpp>
#include <boost/range/end.hpp>
#include <boost/range/iterator.hpp>
#include <boost/range/iterator_range.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/assert.hpp>
#include <boost/next_prior.hpp>

namespace vitro {
struct use_default {};

namespace split_options {
struct none_t {} const none = {};
struct remove_empty_entries_t {} const remove_empty_entries = {};
}

namespace detail {
template <class Iterator, class SubRange, class Separator, class Options>
class split_iterator
  : public boost::iterator_facade<
        split_iterator<Iterator, SubRange, Separator, Options>,
        SubRange,
        boost::forward_traversal_tag,
        SubRange
    >
{
public:
    split_iterator() {}

    split_iterator(Iterator first, Iterator last, Separator sep)
      : exon(first), intron(first), next_exon(first), terminal(last),
        splice(sep)
    {
        increment();
    }

private:
    friend class boost::iterator_core_access;

    SubRange dereference() const
    {
        return SubRange(exon, intron);
    }

    bool equal(split_iterator const &other) const
    {
        return exon == other.exon && intron == other.intron;
    }

    void increment()
    {
        increment_impl(Options());
    }

    void increment_impl(...)
    {
        step();
    }

    void increment_impl(split_options::remove_empty_entries_t)
    {
        do
            step();
        while (exon == intron && exon != terminal);
    }

    void step()
    {
        boost::iterator_range<Iterator> const next_intron =
           splice(next_exon, terminal);
        exon = next_exon;
        intron = next_intron.begin();
        next_exon = next_intron.end();
    }

    Iterator exon, intron, next_exon, terminal;
    Separator splice;
};

template <class Range, class SubRange, class Separator, class Options>
struct split_impl
{
#if BOOST_RANGE_ENABLE_CONCEPT_ASSERT
    BOOST_RANGE_CONCEPT_ASSERT((boost::ForwardRangeConcept<Range>));
#endif

    typedef split_iterator<
        typename boost::range_iterator<Range>::type,
        typename boost::mpl::if_<
            boost::is_same<SubRange, use_default>,
            boost::iterator_range<typename boost::range_iterator<Range>::type>,
            SubRange
        >::type,
        Separator,
        Options
    > iterator_type;

    typedef boost::iterator_range<iterator_type> result_type;

    result_type operator()(Range &r, Separator sep) const
    {
        return result_type(iterator_type(boost::begin(r), boost::end(r), sep),
                           iterator_type(boost::end(r), boost::end(r), sep));
    }
};

template <class SubRange, class Separator, class Options>
struct split_holder
{
    split_holder(Separator sep) : sep_(sep)
    {}

    Separator sep_;
};

template <class Range, class SubRange, class Separator, class Options>
inline
typename split_impl<Range const, SubRange, Separator, Options>::result_type
operator|(Range const &r, split_holder<SubRange, Separator, Options> const &h)
{
    return split_impl<Range const, SubRange, Separator, Options>()(r, h.sep_);
}

template <class Range, class SubRange, class Separator, class Options>
inline
typename split_impl<Range, SubRange, Separator, Options>::result_type
operator|(Range &r, split_holder<SubRange, Separator, Options> const &h)
{
    return split_impl<Range, SubRange, Separator, Options>()(r, h.sep_);
}
} // namespace detail

using detail::split_holder;

template <class SubRange, class Separator, class Options>
inline split_holder<SubRange, Separator, Options> split(Separator sep, Options)
{
    return (sep);
}

namespace detail {
template <class Iterator, class Distance>
inline Iterator next_safe_impl(Iterator it, Iterator last, Distance n,
                               std::input_iterator_tag)
{
    if (n <= 0)
        return boost::next(it, n);
    while (it != last && n > 0)
        ++it, --n;
    return it;
}

template <class Iterator, class Distance>
inline Iterator next_safe_impl(Iterator it, Iterator last, Distance n,
                               std::random_access_iterator_tag)
{
    return last - it < n ? last : (it + n);
}

template <class Iterator, class Distance>
inline Iterator next_safe(Iterator it, Iterator last, Distance n)
{
    return next_safe_impl(it, last, n,
                          typename boost::iterator_category<Iterator>::type());
}
} // namespace detail

using detail::next_safe;

template <class Distance>
class distance_separator
{
public:
    explicit distance_separator(Distance n) : n_(n)
    {}

    template <class Iterator>
    boost::iterator_range<Iterator>
    operator()(Iterator begin, Iterator end) const
    {
        Iterator it = next_safe(begin, end, n_);
        return boost::make_iterator_range(it, it);
    }

private:
    Distance n_;
};

template <class SubRange, class Distance>
inline
split_holder<SubRange, distance_separator<Distance>, split_options::none_t>
split_every(Distance n)
{
    BOOST_ASSERT(n > 0);
    return (distance_separator<Distance>(n));
}

template <class Predicate>
class predicate_separator
{
public:
    explicit predicate_separator(Predicate pred) : pred_(pred)
    {}

    template <class Iterator>
    boost::iterator_range<Iterator>
    operator()(Iterator begin, Iterator end) const
    {
        Iterator it = std::find_if(begin, end, pred_);
        return it != end ?
            boost::make_iterator_range(it, boost::next(it)) :
            boost::make_iterator_range(end, end);
    }

private:
    Predicate pred_;
};

template <class SubRange, class Predicate>
inline
split_holder<SubRange, predicate_separator<Predicate>, split_options::none_t>
split_when(Predicate pred)
{
    return (predicate_separator<Predicate>(pred));
}

template <class SubRange, class Predicate, class Options>
inline
split_holder<SubRange, predicate_separator<Predicate>, Options>
split_when(Predicate pred, Options)
{
    return (predicate_separator<Predicate>(pred));
}

template <class Delimiter>
class delimiter_separator
{
#if BOOST_RANGE_ENABLE_CONCEPT_ASSERT
    BOOST_RANGE_CONCEPT_ASSERT((boost::ForwardRangeConcept<Delimiter>));
#endif

public:
    explicit delimiter_separator(Delimiter const &delim) : delim_(delim)
    {}

    template <class Iterator>
    boost::iterator_range<Iterator>
    operator()(Iterator begin, Iterator end) const
    {
        Iterator it = std::search(begin, end,
                                  boost::begin(delim_), boost::end(delim_));
        return it != end ?
            boost::make_iterator_range(
                it,
                boost::next(it, boost::distance(delim_))
            ) :
            boost::make_iterator_range(end, end);
    }

private:
    Delimiter const &delim_;
};

template <class SubRange, class Delimiter>
inline
split_holder<SubRange, delimiter_separator<Delimiter>, split_options::none_t>
split_on(Delimiter const &delim)
{
    return (delimiter_separator<Delimiter>(delim));
}

template <class SubRange, class Delimiter, class Options>
inline
split_holder<SubRange, delimiter_separator<Delimiter>, Options>
split_on(Delimiter const &delim, Options)
{
    return (delimiter_separator<Delimiter>(delim));
}
} // namespace vitro
#endif // VITRO_SPLIT_HPP