Pegmatite
/Users/theraven/Documents/Work/Teaching/Compilers ACS Module/Coursework/Resources/Pegmatite/parser.hh
1 /*-
2  * Copyright (c) 2012, Achilleas Margaritis
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * * Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * * Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  * POSSIBILITY OF SUCH DAMAGE.
26  */
27 #ifndef PEGMATITE_PARSER_HPP
28 #define PEGMATITE_PARSER_HPP
29 
30 
31 #include <vector>
32 #include <string>
33 #include <list>
34 #include <functional>
35 #include <memory>
36 
37 
38 namespace pegmatite {
39 
40 
41 class Expr;
42 class Context;
43 class Rule;
44 
45 
52 class Input
53 {
54  protected:
59  static const std::size_t static_buffer_size = 512;
60  public:
64  typedef size_t Index;
65  static const Index npos = static_cast<Index>(-1);
69  class iterator : public std::iterator<std::bidirectional_iterator_tag, char32_t>
70  {
71  friend Input;
75  Input *buffer;
79  Index idx;
83  inline iterator(Input *b, Index i) : buffer(b), idx(i) {}
84  public:
88  inline iterator() : buffer(0), idx(npos) {}
94  const std::string& filename() const;
99  inline char32_t operator*() const { return (*buffer)[idx]; }
107  {
108  idx++;
109  return *this;
110  }
111  inline iterator operator++(int /* dummy */)
112  {
113  iterator copy = *this;
114  idx++;
115  return copy;
116  }
120  inline iterator &operator+=(size_t amount)
121  {
122  idx += amount;
123  return *this;
124  }
130  {
131  idx--;
132  return *this;
133  }
138  inline bool operator==(const iterator &other) const
139  {
140  return (buffer == other.buffer) && (idx == other.idx);
141  }
145  inline bool operator!=(const iterator &other) const
146  {
147  return !(*this == other);
148  }
152  inline bool operator>(const iterator &other) const
153  {
154  return (idx > other.idx);
155  }
159  inline bool operator<(const iterator &other) const
160  {
161  return (idx < other.idx);
162  }
166  inline Index operator-(const iterator &other) const
167  {
168  return idx-other.idx;
169  }
173  Index index() const { return idx; }
174  };
178  inline iterator begin()
179  {
180  return iterator(this, 0);
181  }
185  inline iterator end()
186  {
187  return iterator(this, size());
188  }
192  const std::string& name() const
193  {
194  return user_name;
195  }
201  inline char32_t operator[](Index n)
202  {
203  // If the local buffer can satisfy the request, fetch the value
204  if ((n >= buffer_start) && (n < buffer_end))
205  {
206  return buffer[n - buffer_start];
207  }
208  return slowCharacterLookup(n);
209  }
217  Input(const std::string& name)
218  : user_name(name), buffer(0), buffer_start(1), buffer_end(0) {}
219  private:
224  const std::string user_name;
229  char32_t *buffer;
233  Index buffer_start;
237  Index buffer_end;
242  char32_t local_buffer[static_buffer_size];
247  char32_t slowCharacterLookup(Index n);
248  protected:
261  virtual bool fillBuffer(Index start, Index &length, char32_t *&b) = 0;
265  virtual Index size() const = 0;
269  virtual ~Input();
273  Input(const Input&);
274 };
275 
279 class UnicodeVectorInput : public Input
280 {
284  const std::vector<char32_t> vector;
285  public:
291  const std::vector<char32_t> &getVector() { return vector; }
296  UnicodeVectorInput(std::vector<char32_t> &&v, const std::string& name = "")
297  : Input(name), vector(v) {}
301  bool fillBuffer(Index start, Index &length, char32_t *&b) override;
305  Index size() const override;
306 };
307 
314 struct AsciiFileInput : public Input
315 {
319  AsciiFileInput(int file, const std::string& name = "");
320  bool fillBuffer(Index start, Index &length, char32_t *&b) override;
321  Index size() const override;
322  private:
326  int fd;
330  size_t file_size;
331 };
332 
334 struct StreamInput : public Input
335 {
336  public:
345  static StreamInput Create(const std::string& name, std::istream&);
346 
347  bool fillBuffer(Index start, Index &length, char32_t*&) override;
348  Index size() const override;
349 
350  private:
351  StreamInput(const std::string& name, std::istream&, size_t len);
352 
353  const size_t length;
354  std::istream& stream;
355 };
356 
361 class StringInput : public Input
362 {
366  const std::string str;
367  public:
373  const std::string &getString() { return str; }
378  StringInput(std::string &&s, const std::string& name = "")
379  : Input(name), str(s) {}
384  StringInput(const std::string& s, const std::string& name = "")
385  : Input(name), str(s) {}
389  bool fillBuffer(Index start, Index &length, char32_t *&b) override;
393  Index size() const override;
394 };
395 
396 template<class T>
397 class IteratorInput : public Input
398 {
402  T begin;
406  T end;
407  public:
411  IteratorInput(T b, T e, const std::string& name = "")
412  : Input(name), begin(b), end(e) {}
416  bool fillBuffer(Index start, Index &length, char32_t *&b) override
417  {
418  if (start > std::distance(begin, end))
419  {
420  length = 0;
421  return false;
422  }
423  Index copied = 0;
424  for (T i=std::advance(begin, start) ; (i != end) && (copied < length) ; ++i)
425  {
426  b[copied++] = static_cast<char32_t>(*i);
427  }
428  length = copied;
429  return true;
430  }
434  Index size() const override { return std::distance(begin, end); }
435 };
436 
437 
440 {
443 
445  const std::string& filename() const { return it.filename(); }
446 
448  int line;
449 
451  int col;
452 
455 
459  ParserPosition(Input &i);
460 };
461 
467 {
468 public:
471 
474 
477 
482  InputRange(const ParserPosition &b, const ParserPosition &e);
486  Input::iterator begin() const { return start.it; }
490  Input::iterator end() const { return finish.it; }
494  std::string str() const;
495 };
496 
506 typedef std::function<bool(const InputRange&, void*)> parse_proc;
507 
508 
510 typedef std::function<void (const InputRange&, const std::string&)> ErrorReporter;
511 
515 class CharacterExpr;
516 typedef std::shared_ptr<CharacterExpr> CharacterExprPtr;
517 
522 struct ExprPtr : public std::shared_ptr<Expr>
523 {
527  ExprPtr(Expr *e) : std::shared_ptr<Expr>(e) {}
531  ExprPtr(const Rule &e);
535  ExprPtr(const CharacterExprPtr &e);
540  ExprPtr(const char *);
545  ExprPtr(const char32_t);
546 };
547 
548 
554 class Rule
555 {
556 public:
561  Rule(const ExprPtr e);
565  Rule(const CharacterExprPtr e) : Rule(ExprPtr(e)) {}
569  Rule(const Rule &r) : Rule(ExprPtr(r)) {}
574  Rule(const Rule &&r);
579  Rule& operator=(Rule &&);
580 private:
584  ExprPtr expr;
585 
590  friend class Context;
591 };
592 
596 class Expr
597 {
598 public:
607  virtual ~Expr();
608 
614  virtual bool parse_non_term(Context &con) const = 0;
615 
620  virtual bool parse_term(Context &con) const = 0;
621 
625  virtual void dump() const = 0;
626 
627 };
631 ExprPtr operator *(const ExprPtr &e);
632 inline ExprPtr operator *(const Rule &r)
633 {
634  return *ExprPtr(r);
635 }
636 
640 ExprPtr operator +(const ExprPtr &e);
641 inline ExprPtr operator +(const Rule &r)
642 {
643  return +ExprPtr(r);
644 }
645 
649 ExprPtr operator -(const ExprPtr &e);
650 inline ExprPtr operator -(const Rule &r)
651 {
652  return -ExprPtr(r);
653 }
654 
658 ExprPtr operator &(const ExprPtr &e);
659 inline ExprPtr operator &(const Rule &r)
660 {
661  return &ExprPtr(r);
662 }
663 
667 ExprPtr operator !(const ExprPtr &e);
668 inline ExprPtr operator !(const Rule &r)
669 {
670  return !ExprPtr(r);
671 }
672 
673 
674 CharacterExprPtr operator "" _E(const char x);
675 CharacterExprPtr operator "" _E(const char32_t x);
679 ExprPtr operator "" _S(const char *x, std::size_t len);
683 ExprPtr operator "" _E(const char *x, std::size_t len);
687 ExprPtr operator "" _R(const char *x, std::size_t len);
688 ExprPtr operator-(const CharacterExprPtr &left, const CharacterExprPtr &right);
689 ExprPtr operator-(const CharacterExprPtr &left, char32_t right);
690 
691 
696 #ifdef DEBUG_PARSING
697 ExprPtr trace_debug(const char *msg, const ExprPtr e);
698 inline ExprPtr trace(const char *msg, const ExprPtr e)
699 {
700  return trace_debug(msg, e);
701 }
702 #else
703 inline ExprPtr trace(const char *, const ExprPtr e)
704 {
705  return e;
706 }
707 #endif
708 
709 
715 ExprPtr operator >> (const ExprPtr &left, const ExprPtr &right);
716 
722 ExprPtr operator | (const ExprPtr &left, const ExprPtr &right);
723 
728 ExprPtr term(const ExprPtr &e);
729 
730 
735 ExprPtr set(const char *s);
736 
740 ExprPtr regex(const char *s);
741 
745 ExprPtr regex(const wchar_t *s);
746 
747 
752 ExprPtr set(const wchar_t *s);
753 
754 
760 ExprPtr range(char32_t min, char32_t max);
761 
762 
769 ExprPtr nl(const ExprPtr &e);
770 
771 
775 ExprPtr eof();
776 
777 
781 ExprPtr any();
782 
787 ExprPtr debug(std::function<void()> fn);
788 
794 {
798  virtual parse_proc get_parse_proc(const Rule &) const = 0;
802  virtual ~ParserDelegate();
803 };
804 
815 bool parse(Input &i, const Rule &g, const Rule &ws, ErrorReporter &err,
816  const ParserDelegate &delegate, void *d);
817 
818 
824 template <class T> T &operator << (T &stream, const InputRange &ir)
825 {
826  for(auto c : ir)
827  {
828  stream << static_cast<typename T::char_type>(c);
829  }
830  return stream;
831 }
832 
833 
834 
835 } //namespace pegmatite
836 
837 
838 #endif //PEGMATITE_PARSER_HPP
ParserPosition finish
end position.
Definition: parser.hh:473
Input::iterator end() const
Iterator to the end of the input range.
Definition: parser.hh:490
IteratorInput(T b, T e, const std::string &name="")
Construct an input that reads from between the two iterators specified.
Definition: parser.hh:411
size_t Index
The type of indexes into the buffer.
Definition: parser.hh:64
Parser delegate abstract class.
Definition: parser.hh:793
Rule(const Rule &r)
Copying rules is not allowed.
Definition: parser.hh:569
char32_t operator[](Index n)
Fetch the character at the specified index.
Definition: parser.hh:201
static const std::size_t static_buffer_size
Size for the static buffer.
Definition: parser.hh:59
iterator end()
Returns an iterator for the end of the input.
Definition: parser.hh:185
A concrete input class that wraps a vector of 32-bit characters.
Definition: parser.hh:279
InputRange()
empty constructor.
Definition: parser.hh:476
Iterator, refers back into the input stream.
Definition: parser.hh:69
A shared pointer to an expression.
Definition: parser.hh:522
bool operator<(const iterator &other) const
Compares locations of iterators in the input.
Definition: parser.hh:159
const std::string & filename() const
user-meaningful filename.
Definition: parser.hh:445
StringInput(std::string &&s, const std::string &name="")
Constructs the wrapper from a string (s).
Definition: parser.hh:378
Input(const std::string &name)
Default constructor, sets the buffer start to be after the buffer end, so that the first request will...
Definition: parser.hh:217
STL namespace.
iterator & operator++()
Move the iterator on to the next location.
Definition: parser.hh:106
Input::iterator begin() const
Iterator to the start of the input range.
Definition: parser.hh:486
iterator()
Default constructor, constructs an invalid iterator into no buffer.
Definition: parser.hh:88
Abstract base class for expressions.
Definition: parser.hh:596
iterator begin()
Returns an iterator for the start of the input.
Definition: parser.hh:178
int col
column.
Definition: parser.hh:451
Rule(const CharacterExprPtr e)
Constructor for creating rules from character expressions.
Definition: parser.hh:565
Index index() const
Returns the index into the input stream.
Definition: parser.hh:173
virtual Index size() const =0
Returns the size of the buffer.
Rule class, which represents a rule in a grammar.
Definition: parser.hh:554
iterator & operator+=(size_t amount)
Move the iterator forward by the specified amount.
Definition: parser.hh:120
ExprPtr(Expr *e)
Construct an expression pointer wrapping an expression.
Definition: parser.hh:527
A range within input.
Definition: parser.hh:466
char32_t operator*() const
Dereference operator, returns the character represented by this index.
Definition: parser.hh:99
An Input that wraps a std::istream.
Definition: parser.hh:334
position into the input.
Definition: parser.hh:439
UnicodeVectorInput(std::vector< char32_t > &&v, const std::string &name="")
Constructs the wrapper from a vector.
Definition: parser.hh:296
const std::vector< char32_t > & getVector()
Returns an immutable reference to the vector.
Definition: parser.hh:291
const std::string & name() const
Returns a user-meaningful name (typically a filename).
Definition: parser.hh:192
Index size() const override
Returns the size of the string.
Definition: parser.hh:434
bool operator!=(const iterator &other) const
Compares iterators for inequality.
Definition: parser.hh:145
virtual ~Input()
Virtual destructor.
const std::string & getString()
Returns an immutable reference to the vector.
Definition: parser.hh:373
Abstract superclass for indexing into a buffer with arbitrary storage.
Definition: parser.hh:52
bool operator>(const iterator &other) const
Compares locations of iterators in the input.
Definition: parser.hh:152
Input::iterator it
iterator into the input.
Definition: parser.hh:442
StringInput(const std::string &s, const std::string &name="")
Constructs the wrapper from a string (s).
Definition: parser.hh:384
virtual bool fillBuffer(Index start, Index &length, char32_t *&b)=0
Fill in the buffer with the next range.
const std::string & filename() const
Filename given by Input this iterator is derived from.
ParserPosition()
null constructor.
Definition: parser.hh:454
iterator & operator--()
Move the iterator to the previous location.
Definition: parser.hh:129
Definition: parser.hh:397
Index operator-(const iterator &other) const
Subtracts one iterator from another,.
Definition: parser.hh:166
Definition: ast.hh:41
A concrete Input class that wraps a file.
Definition: parser.hh:314
bool fillBuffer(Index start, Index &length, char32_t *&b) override
Copy the data into the buffer.
Definition: parser.hh:416
int line
line.
Definition: parser.hh:448
A concrete Input subclass that wraps a std::string, providing access to the underlying characters...
Definition: parser.hh:361
ParserPosition start
begin position.
Definition: parser.hh:470
bool operator==(const iterator &other) const
Compares iterators for equality.
Definition: parser.hh:138