Current File : //proc/self/root/usr/include/boost/spirit/home/support/detail/lexer/file_input.hpp
// file_input.hpp
// Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_LEXER_FILE_INPUT
#define BOOST_LEXER_FILE_INPUT

#include "char_traits.hpp"
// memcpy
#include <cstring>
#include <fstream>
#include "size_t.hpp"
#include "state_machine.hpp"

namespace boost
{
namespace lexer
{
template<typename CharT, typename Traits = char_traits<CharT> >
class basic_file_input
{
public:
    class iterator
    {
    public:
#if defined _MSC_VER && _MSC_VER <= 1200
        friend basic_file_input;
#else
        friend class basic_file_input;
#endif

        struct data
        {
            std::size_t id;
            std::size_t unique_id;
            const CharT *start;
            const CharT *end;
            std::size_t state;

            // Construct in end() state.
            data () :
                id (0),
                unique_id (npos),
                state (npos)
            {
            }

            bool operator == (const data &rhs_) const
            {
                return id == rhs_.id && unique_id == rhs_.unique_id &&
                    start == rhs_.start && end == rhs_.end &&
                    state == rhs_.state;
            }
        };

        iterator () :
            _input (0)
        {
        }

        bool operator == (const iterator &rhs_) const
        {
            return _data == rhs_._data;
        }

        bool operator != (const iterator &rhs_) const
        {
            return !(*this == rhs_);
        }

        data &operator * ()
        {
            return _data;
        }

        data *operator -> ()
        {
            return &_data;
        }

        // Let compiler generate operator = ().

        // prefix version
        iterator &operator ++ ()
        {
            next_token ();
            return *this;
        }

        // postfix version
        iterator operator ++ (int)
        {
            iterator iter_ = *this;

            next_token ();
            return iter_;
        }

        void next_token ()
        {
            const detail::internals &internals_ =
                _input->_state_machine->data ();

            _data.start = _data.end;

            if (internals_._dfa->size () == 1)
            {
                _data.id = _input->next (&internals_._lookup->front ()->
                    front (), internals_._dfa_alphabet.front (),
                    &internals_._dfa->front ()->front (), _data.start,
                    _data.end, _data.unique_id);
            }
            else
            {
                _data.id = _input->next (internals_, _data.state, _data.start,
                    _data.end, _data.unique_id);
            }

            if (_data.id == 0)
            {
                _data.start = 0;
                _data.end = 0;
                // Ensure current state matches that returned by end().
                _data.state = npos;
            }
        }

    private:
        // Not owner (obviously!)
        basic_file_input *_input;
        data _data;
    };

#if defined _MSC_VER && _MSC_VER <= 1200
    friend iterator;
#else
    friend class iterator;
#endif

    // Make it explict that we are NOT taking a copy of state_machine_!
    basic_file_input (const basic_state_machine<CharT> *state_machine_,
        std::basic_ifstream<CharT> *is_,
        const std::streamsize buffer_size_ = 4096,
        const std::streamsize buffer_increment_ = 1024) :
        _state_machine (state_machine_),
        _stream (is_),
        _buffer_size (buffer_size_),
        _buffer_increment (buffer_increment_),
        _buffer (_buffer_size, '!')
    {
        _start_buffer = &_buffer.front ();
        _end_buffer = _start_buffer + _buffer.size ();
        _start_token = _end_buffer;
        _end_token = _end_buffer;
    }

    iterator begin ()
    {
        iterator iter_;

        iter_._input = this;
        // Over-ride default of 0 (EOF)
        iter_._data.id = npos;
        iter_._data.start = 0;
        iter_._data.end = 0;
        iter_._data.state = 0;
        ++iter_;
        return iter_;
    }

    iterator end ()
    {
        iterator iter_;

        iter_._input = this;
        iter_._data.start = 0;
        iter_._data.end = 0;
        return iter_;
    }

    void flush ()
    {
        // This temporary is mandatory, otherwise the
        // pointer calculations won't work!
        const CharT *temp_ = _end_buffer;

        _start_token = _end_token = _end_buffer;
        reload_buffer (temp_, true, _end_token);
    }

private:
    typedef std::basic_istream<CharT> istream;
    typedef std::vector<CharT> buffer;

    const basic_state_machine<CharT> *_state_machine;
    const std::streamsize _buffer_size;
    const std::streamsize _buffer_increment;

    buffer _buffer;
    CharT *_start_buffer;
    istream *_stream;
    const CharT *_start_token;
    const CharT *_end_token;
    CharT *_end_buffer;

    std::size_t next (const detail::internals &internals_,
        std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
        std::size_t &unique_id_)
    {
        _start_token = _end_token;

again:
        const std::size_t * lookup_ = &internals_._lookup[start_state_]->
            front ();
        std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
        const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
        const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
        const CharT *curr_ = _start_token;
        bool end_state_ = *ptr_ != 0;
        std::size_t id_ = *(ptr_ + id_index);
        std::size_t uid_ = *(ptr_ + unique_id_index);
        const CharT *end_token_ = curr_;

        for (;;)
        {
            if (curr_ >= _end_buffer)
            {
                if (!reload_buffer (curr_, end_state_, end_token_))
                {
                    // EOF
                    break;
                }
            }

            const std::size_t BOL_state_ = ptr_[bol_index];
            const std::size_t EOL_state_ = ptr_[eol_index];

            if (BOL_state_ && (_start_token == _start_buffer ||
                *(_start_token - 1) == '\n'))
            {
                ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
            }
            else if (EOL_state_ && *curr_ == '\n')
            {
                ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
            }
            else
            {
                const std::size_t state_ =
                    ptr_[lookup_[static_cast<typename Traits::index_type>
                        (*curr_++)]];

                if (state_ == 0)
                {
                    break;
                }

                ptr_ = &dfa_[state_ * dfa_alphabet_];
            }

            if (*ptr_)
            {
                end_state_ = true;
                id_ = *(ptr_ + id_index);
                uid_ = *(ptr_ + unique_id_index);
                start_state_ = *(ptr_ + state_index);
                end_token_ = curr_;
            }
        }

        if (_start_token >= _end_buffer)
        {
            // No more tokens...
            unique_id_ = npos;
            return 0;
        }

        const std::size_t EOL_state_ = ptr_[eol_index];

        if (EOL_state_ && curr_ == end_)
        {
            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];

            if (*ptr_)
            {
                end_state_ = true;
                id_ = *(ptr_ + id_index);
                uid_ = *(ptr_ + unique_id_index);
                start_state_ = *(ptr_ + state_index);
                end_token_ = curr_;
            }
        }

        if (end_state_)
        {
            // return longest match
            _end_token = end_token_;

            if (id_ == 0) goto again;
        }
        else
        {
            // No match causes char to be skipped
            _end_token = _start_token + 1;
            id_ = npos;
            uid_ = npos;
        }

        start_ = _start_token;
        end_ = _end_token;
        unique_id_ = uid_;
        return id_;
    }

    std::size_t next (const std::size_t * const lookup_,
        const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
        const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
    {
        _start_token = _end_token;

        const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
        const CharT *curr_ = _start_token;
        bool end_state_ = *ptr_ != 0;
        std::size_t id_ = *(ptr_ + id_index);
        std::size_t uid_ = *(ptr_ + unique_id_index);
        const CharT *end_token_ = curr_;

        for (;;)
        {
            if (curr_ >= _end_buffer)
            {
                if (!reload_buffer (curr_, end_state_, end_token_))
                {
                    // EOF
                    break;
                }
            }

            const std::size_t BOL_state_ = ptr_[bol_index];
            const std::size_t EOL_state_ = ptr_[eol_index];

            if (BOL_state_ && (_start_token == _start_buffer ||
                *(_start_token - 1) == '\n'))
            {
                ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
            }
            else if (EOL_state_ && *curr_ == '\n')
            {
                ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
            }
            else
            {
                const std::size_t state_ =
                    ptr_[lookup_[static_cast<typename Traits::index_type>
                        (*curr_++)]];

                if (state_ == 0)
                {
                    break;
                }

                ptr_ = &dfa_[state_ * dfa_alphabet_];
            }

            if (*ptr_)
            {
                end_state_ = true;
                id_ = *(ptr_ + id_index);
                uid_ = *(ptr_ + unique_id_index);
                end_token_ = curr_;
            }
        }

        if (_start_token >= _end_buffer)
        {
            // No more tokens...
            unique_id_ = npos;
            return 0;
        }

        const std::size_t EOL_state_ = ptr_[eol_index];

        if (EOL_state_ && curr_ == end_)
        {
            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];

            if (*ptr_)
            {
                end_state_ = true;
                id_ = *(ptr_ + id_index);
                uid_ = *(ptr_ + unique_id_index);
                end_token_ = curr_;
            }
        }

        if (end_state_)
        {
            // return longest match
            _end_token = end_token_;
        }
        else
        {
            // No match causes char to be skipped
            _end_token = _start_token + 1;
            id_ = npos;
            uid_ = npos;
        }

        start_ = _start_token;
        end_ = _end_token;
        unique_id_ = uid_;
        return id_;
    }

    bool reload_buffer (const CharT * &curr_, const bool end_state_,
        const CharT * &end_token_)
    {
        bool success_ = !_stream->eof ();

        if (success_)
        {
            const CharT *old_start_token_ = _start_token;
            std::size_t old_size_ = _buffer.size ();
            std::size_t count_ = 0;

            if (_start_token - 1 == _start_buffer)
            {
                // Run out of buffer space, so increase.
                _buffer.resize (old_size_ + _buffer_increment, '!');
                _start_buffer = &_buffer.front ();
                _start_token = _start_buffer + 1;
                _stream->read (_start_buffer + old_size_,
                    _buffer_increment);
                count_ = _stream->gcount ();
                _end_buffer = _start_buffer + old_size_ + count_;
            }
            else if (_start_token < _end_buffer)
            {
                const std::size_t len_ = _end_buffer - _start_token;
                // Some systems have memcpy in namespace std.
                using namespace std;

                memcpy (_start_buffer, _start_token - 1, (len_ + 1) *
                    sizeof (CharT));
                _stream->read (_start_buffer + len_ + 1,
                    static_cast<std::streamsize> (_buffer.size () - len_ - 1));
                count_ = _stream->gcount ();
                _start_token = _start_buffer + 1;
                _end_buffer = _start_buffer + len_ + 1 + count_;
            }
            else
            {
                _stream->read (_start_buffer, static_cast<std::streamsize>
                    (_buffer.size ()));
                count_ = _stream->gcount ();
                _start_token = _start_buffer;
                _end_buffer = _start_buffer + count_;
            }

            if (end_state_)
            {
                end_token_ = _start_token +
                    (end_token_ - old_start_token_);
            }

            curr_ = _start_token + (curr_ - old_start_token_);
        }

        return success_;
    }

    // Disallow copying of buffer
    basic_file_input (const basic_file_input &);
    const basic_file_input &operator = (const basic_file_input &);
};

typedef basic_file_input<char> file_input;
typedef basic_file_input<wchar_t> wfile_input;
}
}

#endif