Back

1     //------------------------------------------------------------------------------
2     // Module Language.cpp                                                        //
3     //                                                                            //
4     //    Class which encapsulates a programming language's definition            //
5     //                                                                            //
6     //    Copyright (c) 2004 by Lars Haendel                                      //
7     //    Home: http://www.newty.de                                               //
8     //                                                                            //
9     //    This program is free software; you can redistribute it and/or modify    //
10    //    it under the terms of the GNU General Public License as published by    //
11    //    the Free Software Foundation as version 2 of the License.               //
12    //                                                                            //
13    //    This program is distributed in the hope that it will be useful,         //
14    //    but WITHOUT ANY WARRANTY; without even the implied warranty of          //
15    //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           //
16    //    GNU General Public License for more details.                            //
17    //                                                                            //
18    //    You should have received a copy of the GNU General Public License       //
19    //    along with this program; if not, write to the Free Software             //
20    //    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.               //
21    //                                                                            //
22    //------------------------------------------------------------------------------
23
24
25
26    #ifndef LanguageH
27    #define LanguageH
28
29    #include <iostream>           // due to:  ifstream
30    #include <fstream>
31
32    #include "WordList.h"         //          TWordList
33    #include "NameUtil.h"         //          SizeOfString()
34    #include "defines.h"          //          general defines
35    #include "StdList.h"          //          TStdList
36    #include "Regex.h"            //          TRegex
37    #include "ItemStyle.h"        //          TItemType::ItemType
38
39
40    //----------------------------------------------------------------------------------------------------------------------
41    // defines
42    #define MAX_SEQUENCE_LEN            8        // maximum length of a comment start or stop sequence
43    #define MAX_SYMBOL_LEN              124      // maximum length of a symbol string
44    #define MAX_NAME_LEN                32
45
46    typedef char ComSeq[MAX_SEQUENCE_LEN];
47
48
49    //----------------------------------------------------------------------------------------------------------------------
50    // class encapsulates a programming language's definition
51    class TLanguage
52    {
53    public:
54
55       // struct with type and identifier type exchange info
56       struct TInfo
57       {
58          TItemStyle::ItemType type1;         // type of element that starts the identifier type exchange
59          TItemStyle::ItemType type2;         // type to exchange identifier with
60          char szEndSeq[MAX_SEQUENCE_LEN];    // end sequence
61       };
62
63
64       // constructor/destructor
65       TLanguage();
66       ~TLanguage();
67
68       //-------------------------------------------------------------------------------------------------------------------
69       // Load and Save
70       void Load(ifstream& file);
71       void Save(ofstream& file) const;
72
73
74       //-------------------------------------------------------------------------------------------------------------------
75       // misc.
76
77       // returns true if passed string starts with a (single line) comment sequence
78       static int SequenceStartsString(const char*const& szString, const char*const& szSequence, const int& nSeq);
79
80
81       // return true if passed character is symbol
82       bool IsSymbol(const char& cChar) const { return IsWithinString(cChar, szSymbols, _nSymbols); };
83
84       // determine if a character is a number ending symbol, i.e. a symbol except '+', '-' or '.'
85       bool SymbolEndsNumber(const char& cChar) const;
86
87       // note: returns '\0' if string is empty
88       char RandomSymbol() const { return szSymbols[random(_nSymbols)]; };
89
90       // get language definition's name
91       const char* Name() const { return szName; };
92
93       // get string with typical file extensions
94       const char* Extensions() const { return szExtensions; };
95
96       // returns true if passed symbol is allowed to be within strings/names
97       bool SymbolMayBeWithinString(const char& cChar) const { return IsWithinString(cChar, szCharsWithinString, _nCharsWithinString); };
98
99       // returns true if passed string is started with a sequence that matches one of the regexes
100      int StartsWithRegex(const char*const& szString) const;
101
102      // return struct with regex type and identifier type exchange info
103      const TInfo* RegexInfo() const { return &regexInfo.Get(lastRegexId); };
104
105
106      //-------------------------------------------------------------------------------------------------------------------
107      // strings
108      char String() const { return cString; };
109      char Character() const { return cCharacter; };
110      char EscapeChar() const { return cEscapeChar; };
111      bool SillyStringHandling() const { return f_SillyStringHandling; };
112
113
114      //-------------------------------------------------------------------------------------------------------------------
115      // comments
116
117      // returns true if passed string is started with a sequence that starts a single line comment
118      int StartsSingleLineComment(const char*const& szString) const;
119
120      // returns true if passed string is started with a sequence that starts a multi line comment
121      int StartsMultiLineComment(const char*const& szString) const;
122
123      // returns true if passed string is started with a sequence that ends a multi line comment
124      int EndsMultiLineComment(const char*const& szString) const;
125
126      const char* SingleLineComment() const { return szSingleLineComment; };
127      const char* MultiLineComment() const { return szMultiLineComment; };
128
129
130      //-------------------------------------------------------------------------------------------------------------------
131      // word lists
132      bool IsKeyword(const char*const& szWord) const { return keywords.IsInList(szWord); };
133      int StartsPreproc(const char*const& szWord, const int& nChar) const;
134      bool IsUserWord(const char*const& szWord) const;
135
136      // return struct with regex type and identifier type exchange info
137      const TInfo* WordListInfo() const { return &wordListInfo.Get(lastWordId); };
138
139   //   const char* RandomKeyword() const { return keywords.GetRandom(); };
140   //   const char* RandomPreproc() const { return preproc.GetRandom(); };
141   //   const char* RandomCustom1() const { return custom1.GetRandom(); };
142   //   const char* RandomCustom2() const { return custom2.GetRandom(); };
143
144
145   private:
146
147      // returns true if passed character is within passed string
148      static bool IsWithinString(const char& cChar, const char*const& szString, const int& len);
149
150      // parse single/multi line comment strings
151      void TLanguage::ParseCommentStrings();
152
153      // misc.
154      char szName[MAX_NAME_LEN];
155      char szSymbols[MAX_SYMBOL_LEN];                       // string with symbols
156      char szExtensions[STS];                               // common file extensions
157      char szCharsWithinString[MAX_SYMBOL_LEN];             // chars that may be within strings
158      bool f_CaseSensitive;                                 // ignore case when searching in word lists
159      bool f_AllowWhiteAfterFirst;
160      int _nSymbols, _nCharsWithinString;                   // number of characters in the above strings
161
162      TStdList<TRegex> regex;                               // regex list
163      TStdList<TInfo> regexInfo;                            // regex type and identifier type exchange info
164      mutable int lastRegexId, lastWordId;                  // last regex/word Id identified
165
166      // strings
167      char cString;                                         // character that starts/ends a string
168      char cCharacter;                                      // character that starts/ends a character/string
169      char cEscapeChar;                                     // escape character in strings
170      bool f_SillyStringHandling;                           // the string is extended to the next line(s) until 
171                                                            // the closing character is found
172
173      // comments
174      char szSingleLineComment[STS];                        // multiline comment string
175      char szMultiLineComment[STS];                         // single line comment string
176      int nSingleLineComments, nMultiLineComments;          // number of single/multi line comments
177
178      ComSeq* szSingleLineCommentStart;         // start sequences for single line comments
179      ComSeq* szMultiLineCommentStart;          // start sequences for multi line comments
180      ComSeq* szMultiLineCommentEnd;            // end sequences for multi line comments
181      int* nSingleLineCommentStart;             // number of charcters in corresponding start/end sequence
182      int* nMultiLineCommentStart;
183      int* nMultiLineCommentEnd;
184
185      // word lists
186      TWordList keywords;                       // keyword list
187      TWordList preproc;                        // list with words that start a preprocessor directive
188      TStdList<TWordList> wordLists;            // list with user word lists
189      TStdList<TInfo> wordListInfo;             // type and identifier type exchange info for user word lists
190   };
191   #endif

Top