Back
1 //------------------------------------------------------------------------------
2 // Module Language.cpp //
3 // //
4 // Class which encapsulates a programming language's definition //
5 // //
6 // Copyright (c) 2004 by Lars Haendel //
7 // Home: http://www.newty.de //
8 // //
9 // This program is free software; you can redistribute it and/or modify //
10 // it under the terms of the GNU General Public License as published by //
11 // the Free Software Foundation as version 2 of the License. //
12 // //
13 // This program is distributed in the hope that it will be useful, //
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of //
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
16 // GNU General Public License for more details. //
17 // //
18 // You should have received a copy of the GNU General Public License //
19 // along with this program; if not, write to the Free Software //
20 // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. //
21 // //
22 //------------------------------------------------------------------------------
23
24
25
26 #ifndef LanguageH
27 #define LanguageH
28
29 #include <iostream> // due to: ifstream
30 #include <fstream>
31
32 #include "WordList.h" // TWordList
33 #include "NameUtil.h" // SizeOfString()
34 #include "defines.h" // general defines
35 #include "StdList.h" // TStdList
36 #include "Regex.h" // TRegex
37 #include "ItemStyle.h" // TItemType::ItemType
38
39
40 //----------------------------------------------------------------------------------------------------------------------
41 // defines
42 #define MAX_SEQUENCE_LEN 8 // maximum length of a comment start or stop sequence
43 #define MAX_SYMBOL_LEN 124 // maximum length of a symbol string
44 #define MAX_NAME_LEN 32
45
46 typedef char ComSeq[MAX_SEQUENCE_LEN];
47
48
49 //----------------------------------------------------------------------------------------------------------------------
50 // class encapsulates a programming language's definition
51 class TLanguage
52 {
53 public:
54
55 // struct with type and identifier type exchange info
56 struct TInfo
57 {
58 TItemStyle::ItemType type1; // type of element that starts the identifier type exchange
59 TItemStyle::ItemType type2; // type to exchange identifier with
60 char szEndSeq[MAX_SEQUENCE_LEN]; // end sequence
61 };
62
63
64 // constructor/destructor
65 TLanguage();
66 ~TLanguage();
67
68 //-------------------------------------------------------------------------------------------------------------------
69 // Load and Save
70 void Load(ifstream& file);
71 void Save(ofstream& file) const;
72
73
74 //-------------------------------------------------------------------------------------------------------------------
75 // misc.
76
77 // returns true if passed string starts with a (single line) comment sequence
78 static int SequenceStartsString(const char*const& szString, const char*const& szSequence, const int& nSeq);
79
80
81 // return true if passed character is symbol
82 bool IsSymbol(const char& cChar) const { return IsWithinString(cChar, szSymbols, _nSymbols); };
83
84 // determine if a character is a number ending symbol, i.e. a symbol except '+', '-' or '.'
85 bool SymbolEndsNumber(const char& cChar) const;
86
87 // note: returns '\0' if string is empty
88 char RandomSymbol() const { return szSymbols[random(_nSymbols)]; };
89
90 // get language definition's name
91 const char* Name() const { return szName; };
92
93 // get string with typical file extensions
94 const char* Extensions() const { return szExtensions; };
95
96 // returns true if passed symbol is allowed to be within strings/names
97 bool SymbolMayBeWithinString(const char& cChar) const { return IsWithinString(cChar, szCharsWithinString, _nCharsWithinString); };
98
99 // returns true if passed string is started with a sequence that matches one of the regexes
100 int StartsWithRegex(const char*const& szString) const;
101
102 // return struct with regex type and identifier type exchange info
103 const TInfo* RegexInfo() const { return ®exInfo.Get(lastRegexId); };
104
105
106 //-------------------------------------------------------------------------------------------------------------------
107 // strings
108 char String() const { return cString; };
109 char Character() const { return cCharacter; };
110 char EscapeChar() const { return cEscapeChar; };
111 bool SillyStringHandling() const { return f_SillyStringHandling; };
112
113
114 //-------------------------------------------------------------------------------------------------------------------
115 // comments
116
117 // returns true if passed string is started with a sequence that starts a single line comment
118 int StartsSingleLineComment(const char*const& szString) const;
119
120 // returns true if passed string is started with a sequence that starts a multi line comment
121 int StartsMultiLineComment(const char*const& szString) const;
122
123 // returns true if passed string is started with a sequence that ends a multi line comment
124 int EndsMultiLineComment(const char*const& szString) const;
125
126 const char* SingleLineComment() const { return szSingleLineComment; };
127 const char* MultiLineComment() const { return szMultiLineComment; };
128
129
130 //-------------------------------------------------------------------------------------------------------------------
131 // word lists
132 bool IsKeyword(const char*const& szWord) const { return keywords.IsInList(szWord); };
133 int StartsPreproc(const char*const& szWord, const int& nChar) const;
134 bool IsUserWord(const char*const& szWord) const;
135
136 // return struct with regex type and identifier type exchange info
137 const TInfo* WordListInfo() const { return &wordListInfo.Get(lastWordId); };
138
139 // const char* RandomKeyword() const { return keywords.GetRandom(); };
140 // const char* RandomPreproc() const { return preproc.GetRandom(); };
141 // const char* RandomCustom1() const { return custom1.GetRandom(); };
142 // const char* RandomCustom2() const { return custom2.GetRandom(); };
143
144
145 private:
146
147 // returns true if passed character is within passed string
148 static bool IsWithinString(const char& cChar, const char*const& szString, const int& len);
149
150 // parse single/multi line comment strings
151 void TLanguage::ParseCommentStrings();
152
153 // misc.
154 char szName[MAX_NAME_LEN];
155 char szSymbols[MAX_SYMBOL_LEN]; // string with symbols
156 char szExtensions[STS]; // common file extensions
157 char szCharsWithinString[MAX_SYMBOL_LEN]; // chars that may be within strings
158 bool f_CaseSensitive; // ignore case when searching in word lists
159 bool f_AllowWhiteAfterFirst;
160 int _nSymbols, _nCharsWithinString; // number of characters in the above strings
161
162 TStdList<TRegex> regex; // regex list
163 TStdList<TInfo> regexInfo; // regex type and identifier type exchange info
164 mutable int lastRegexId, lastWordId; // last regex/word Id identified
165
166 // strings
167 char cString; // character that starts/ends a string
168 char cCharacter; // character that starts/ends a character/string
169 char cEscapeChar; // escape character in strings
170 bool f_SillyStringHandling; // the string is extended to the next line(s) until
171 // the closing character is found
172
173 // comments
174 char szSingleLineComment[STS]; // multiline comment string
175 char szMultiLineComment[STS]; // single line comment string
176 int nSingleLineComments, nMultiLineComments; // number of single/multi line comments
177
178 ComSeq* szSingleLineCommentStart; // start sequences for single line comments
179 ComSeq* szMultiLineCommentStart; // start sequences for multi line comments
180 ComSeq* szMultiLineCommentEnd; // end sequences for multi line comments
181 int* nSingleLineCommentStart; // number of charcters in corresponding start/end sequence
182 int* nMultiLineCommentStart;
183 int* nMultiLineCommentEnd;
184
185 // word lists
186 TWordList keywords; // keyword list
187 TWordList preproc; // list with words that start a preprocessor directive
188 TStdList<TWordList> wordLists; // list with user word lists
189 TStdList<TInfo> wordListInfo; // type and identifier type exchange info for user word lists
190 };
191 #endif
Top |