1 /* 2 Copyright (c) 2011-2014 Timur Gafarov 3 4 Boost Software License - Version 1.0 - August 17th, 2003 5 6 Permission is hereby granted, free of charge, to any person or organization 7 obtaining a copy of the software and accompanying documentation covered by 8 this license (the "Software") to use, reproduce, display, distribute, 9 execute, and transmit the Software, and to prepare derivative works of the 10 Software, and to permit third-parties to whom the Software is furnished to 11 do so, all subject to the following: 12 13 The copyright notices in the Software and this entire statement, including 14 the above license grant, this restriction and the following disclaimer, 15 must be included in all copies of the Software, in whole or in part, and 16 all derivative works of the Software, unless such copies or derivative 17 works are solely in the form of machine-executable object code generated by 18 a source language processor. 19 20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 23 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 24 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 25 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 DEALINGS IN THE SOFTWARE. 27 */ 28 29 module lexer; 30 31 import std..string; 32 import std.ascii; 33 import std.conv; 34 import std.algorithm; 35 36 static string[] stddelimiters = 37 [ 38 "==","!=","<=",">=","+=","-=","*=","/=", 39 "++","--","||","&&","<<",">>","<>", 40 "//","/*","*/","\\\\","\\\"","\\\'", 41 "+","-","*","/","%","=","|","^","~","<",">","!", 42 "(",")","{","}","[","]", 43 ";",":",",","@","#","$","&", 44 "\\","\"","\'" 45 ]; 46 47 /* 48 * Helper functions 49 */ 50 51 bool matches(T)(T input, T[] list) 52 { 53 foreach(v; list) 54 if (input==to!T(v)) return true; 55 return false; 56 } 57 58 bool isWhitespace(string input) 59 { 60 if (input.length==0) return false; 61 else if (matches(input[0], std.ascii.whitespace)) return true; 62 return false; 63 } 64 65 /* 66 * General-purpose lexical analyther 67 */ 68 69 final class Lexer 70 { 71 private: 72 string source; 73 int pos = 0; 74 string[] delimiters; 75 76 public: 77 string singleLineComment = "//"; 78 string multiLineCommentBegin = "/*"; 79 string multiLineCommentEnd = "*/"; 80 string[] stringLiteralQuote = ["\"","\'"]; 81 82 string current; 83 84 uint line = 1; 85 uint lastTokenLine = 0; 86 bool noTokensOnCurrentLine = true; 87 88 this(string src) 89 { 90 source = src; 91 } 92 93 void addDelimiter(string op) 94 { 95 delimiters ~= op; 96 sort!("a.length > b.length")(delimiters); 97 } 98 99 void addDelimiters(string[] op = stddelimiters) 100 { 101 delimiters ~= op; 102 sort!("a.length > b.length")(delimiters); 103 } 104 105 void readNext() 106 { 107 current = getLexeme(); 108 } 109 110 string getLexeme() 111 { 112 string result; 113 bool commentSingleLine = false; 114 bool commentMultiLine = false; 115 bool stringLiteral = false; 116 string tempStringLiteral = ""; 117 bool satisfied = false; 118 119 while(!satisfied) 120 { 121 string lexeme = getLexemeUnfiltered(); 122 if (!lexeme) satisfied = true; 123 else if (lexeme == "\n") 124 { 125 if (!commentMultiLine) 126 { 127 if (!stringLiteral) commentSingleLine = false; 128 else tempStringLiteral ~= lexeme; 129 } 130 line++; 131 noTokensOnCurrentLine = true; 132 } 133 else if (lexeme == singleLineComment && !commentMultiLine) 134 { 135 if (!stringLiteral) 136 commentSingleLine = true; 137 } 138 else if (!commentSingleLine) 139 { 140 if (lexeme == multiLineCommentBegin) { if (!stringLiteral) commentMultiLine = true; } 141 else if (lexeme == multiLineCommentEnd) { if (!stringLiteral) commentMultiLine = false; } 142 else if (!commentMultiLine) 143 { 144 if (matches(lexeme, stringLiteralQuote)) 145 { 146 tempStringLiteral ~= lexeme; 147 if (stringLiteral) 148 { 149 if (lexeme[0] == tempStringLiteral[0]) 150 { 151 result = tempStringLiteral; 152 stringLiteral = false; 153 satisfied = true; 154 } 155 } 156 else stringLiteral = true; 157 } 158 else 159 { 160 if (stringLiteral) 161 tempStringLiteral ~= lexeme; 162 else if (!lexeme.isWhitespace) 163 { 164 if (!commentSingleLine && !commentMultiLine) 165 { 166 result = lexeme; 167 satisfied = true; 168 } 169 } 170 } 171 } 172 } 173 } 174 175 if (result != "" && noTokensOnCurrentLine) 176 { 177 lastTokenLine = line; 178 noTokensOnCurrentLine = false; 179 } 180 181 return result; 182 } 183 184 private: 185 string getLexemeUnfiltered() 186 { 187 string temp; 188 while (pos < source.length) 189 { 190 string forw = matchForward(pos, delimiters); 191 if (source[pos] == '\n') 192 { 193 if (!temp) { temp ~= "\n"; pos++; } 194 break; 195 } 196 else if (matches(source[pos], std.ascii.whitespace)) 197 { 198 if (!temp) { temp ~= source[pos]; pos++; } 199 break; 200 } 201 else if (forw.length > 0) 202 { 203 if (!temp) 204 { 205 temp ~= forw; 206 pos += forw.length; 207 break; 208 } 209 else break; 210 } 211 else 212 { 213 temp ~= source[pos]; 214 pos++; 215 } 216 } 217 return temp; 218 } 219 220 string matchForward(size_t start, string[] list) 221 { 222 foreach(v; list) 223 { 224 string forward = getForward(start,v.length); 225 if (forward == v) return forward; 226 } 227 return ""; 228 } 229 230 string getForward(size_t position, size_t num) 231 { 232 if (position + num < source.length) 233 return source[position..position+num]; 234 else 235 return source[position..$]; 236 } 237 } 238