symbols.hpp 6.23 KB
/*=============================================================================
    Copyright (c) 2001-2003 Joel de Guzman
    Copyright (c) 2011 Daniel James
    http://spirit.sourceforge.net/

    Use, modification and distribution is subject to the Boost Software
    License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
    http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
#ifndef QUICKBOOK_SYMBOLS_IPP
#define QUICKBOOK_SYMBOLS_IPP

///////////////////////////////////////////////////////////////////////////////

#include <boost/intrusive_ptr.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/spirit/home/classic/symbols.hpp>

///////////////////////////////////////////////////////////////////////////////
namespace quickbook
{

    ///////////////////////////////////////////////////////////////////////////////
    //
    //  tst class
    //
    //      This it the Ternary Search Tree from
    //      <boost/spirit/home/classic/symbols/impl/tst.ipp> adapted to be cheap
    //      to copy.
    //
    //      Ternary Search Tree implementation. The data structure is faster
    //      than
    //      hashing for many typical search problems especially when the search
    //      interface is iterator based. Searching for a string of length k in a
    //      ternary search tree with n strings will require at most O(log n+k)
    //      character comparisons. TSTs are many times faster than hash tables
    //      for unsuccessful searches since mismatches are discovered earlier
    //      after examining only a few characters. Hash tables always examine an
    //      entire key when searching.
    //
    //      For details see http://www.cs.princeton.edu/~rs/strings/.
    //
    //      *** This is a low level class and is
    //          not meant for public consumption ***
    //
    ///////////////////////////////////////////////////////////////////////////////

    template <typename T, typename CharT> struct tst_node
    {
        tst_node(CharT value_)
            : reference_count(0)
            , left()
            , middle()
            , right()
            , data()
            , value(value_)
        {
        }

        tst_node(tst_node const& other)
            : reference_count(0)
            , left(other.left)
            , middle(other.middle)
            , right(other.right)
            , data(other.data ? new T(*other.data) : 0)
            , value(other.value)
        {
        }

        // If you fancy a slight improvement in memory use,
        // reference_count + value could probably be packed
        // in the space for a single int.
        int reference_count;
        boost::intrusive_ptr<tst_node> left;
        boost::intrusive_ptr<tst_node> middle;
        boost::intrusive_ptr<tst_node> right;
        boost::scoped_ptr<T> data;
        CharT value;

      private:
        tst_node& operator=(tst_node const&);
    };

    template <typename T, typename CharT>
    void intrusive_ptr_add_ref(tst_node<T, CharT>* ptr)
    {
        ++ptr->reference_count;
    }

    template <typename T, typename CharT>
    void intrusive_ptr_release(tst_node<T, CharT>* ptr)
    {
        if (--ptr->reference_count == 0) delete ptr;
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename T, typename CharT> class tst
    {
        typedef tst_node<T, CharT> node_t;
        typedef boost::intrusive_ptr<node_t> node_ptr;
        node_ptr root;

      public:
        struct search_info
        {
            T* data;
            std::size_t length;
        };

        void swap(tst& other) { root.swap(other.root); }

        // Adds symbol to ternary search tree.
        // If it already exists, then replace it with new value.
        //
        // pre: first != last
        template <typename IteratorT>
        T* add(IteratorT first, IteratorT const& last, T const& data)
        {
            assert(first != last);

            node_ptr* np = &root;
            CharT ch = *first;

            for (;;) {
                if (!*np) {
                    *np = new node_t(ch);
                }
                else if ((*np)->reference_count > 1) {
                    *np = new node_t(**np);
                }

                if (ch < (*np)->value) {
                    np = &(*np)->left;
                }
                else if (ch == (*np)->value) {
                    ++first;
                    if (first == last) break;
                    ch = *first;
                    np = &(*np)->middle;
                }
                else {
                    np = &(*np)->right;
                }
            }

            boost::scoped_ptr<T> new_data(new T(data));
            boost::swap((*np)->data, new_data);
            return (*np)->data.get();
        }

        template <typename ScannerT>
        search_info find(ScannerT const& scan) const
        {
            search_info result = {0, 0};
            if (scan.at_end()) {
                return result;
            }

            typedef typename ScannerT::iterator_t iterator_t;
            node_ptr np = root;
            CharT ch = *scan;
            iterator_t latest = scan.first;
            std::size_t length = 0;

            while (np) {
                if (ch < np->value) // => go left!
                {
                    np = np->left;
                }
                else if (ch == np->value) // => go middle!
                {
                    ++scan;
                    ++length;

                    // Found a potential match.
                    if (np->data.get()) {
                        result.data = np->data.get();
                        result.length = length;
                        latest = scan.first;
                    }

                    if (scan.at_end()) break;
                    ch = *scan;
                    np = np->middle;
                }
                else // (ch > np->value) => go right!
                {
                    np = np->right;
                }
            }

            scan.first = latest;
            return result;
        }
    };

    typedef boost::spirit::classic::
        symbols<std::string, char, quickbook::tst<std::string, char> >
            string_symbols;
} // namespace quickbook

#endif