00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00035 #include "OW_config.h"
00036 #include "OW_XMLParserCore.hpp"
00037 #include "OW_Format.hpp"
00038 #include "OW_XMLParseException.hpp"
00039
00040 #include <cctype>
00041
00042 namespace OW_NAMESPACE
00043 {
00044
00045
00046
00047
00048
00050
00051
00052
00054 bool XMLParserCore::next(XMLToken& entry)
00055 {
00056 IstreamBufIterator iterEOF;
00057 if (m_current == iterEOF || *m_current == 0)
00058 {
00059 if (!m_stack.empty())
00060 {
00061 OW_THROWXMLLINE(XMLParseException::UNCLOSED_TAGS, m_line);
00062 }
00063 return false;
00064 }
00065
00066
00067
00068 if (m_tagIsEmpty)
00069 {
00070 m_tagIsEmpty = false;
00071 entry.type = XMLToken::END_TAG;
00072 entry.attributeCount = 0;
00073 return true;
00074 }
00075
00076 if (*m_current == '<')
00077 {
00078
00079 skipWhitespace();
00080 m_current++;
00081 getElement(entry);
00082 if (entry.type == XMLToken::START_TAG)
00083 {
00084 if (m_stack.empty() && m_foundRoot)
00085 {
00086 OW_THROWXMLLINE(XMLParseException::MULTIPLE_ROOTS, m_line);
00087 }
00088 m_foundRoot = true;
00089 if (!m_tagIsEmpty)
00090 {
00091 m_stack.push(entry.text.toString());
00092 }
00093 }
00094 else if (entry.type == XMLToken::END_TAG)
00095 {
00096 if (m_stack.empty())
00097 {
00098 OW_THROWXMLLINE(XMLParseException::START_END_MISMATCH, m_line);
00099 }
00100 if (m_stack.top() != entry.text.toString())
00101 {
00102 OW_THROWXMLLINE(XMLParseException::START_END_MISMATCH, m_line);
00103 }
00104 m_stack.pop();
00105 }
00106 return true;
00107 }
00108 else
00109 {
00110 entry.type = XMLToken::CONTENT;
00111
00112
00113 getContent(entry);
00114
00115
00116
00117
00118
00119 return true;
00120 }
00121 }
00122
00123
00124
00125
00126
00127
00128 void XMLParserCore::skipWhitespace()
00129 {
00130 while (isspace(*m_current))
00131 {
00132 if (*m_current == '\n')
00133 {
00134 ++m_line;
00135 }
00136 ++m_current;
00137 }
00138 }
00139
00140 inline bool isNameChar(char c)
00141 {
00142 return isalnum(c) || c == '_' || c == '-' ||
00143 c == ':' || c == '.';
00144 }
00145
00146 bool XMLParserCore::getElementName(XMLToken& entry)
00147 {
00148 if (!isalpha(*m_current) && *m_current != '_')
00149 {
00150 OW_THROWXMLLINE(XMLParseException::BAD_START_TAG, m_line);
00151 }
00152 entry.text.reset();
00153 while (isNameChar(*m_current))
00154 {
00155 entry.text += *m_current++;
00156 }
00157
00158 skipWhitespace();
00159
00160 if (*m_current == '>')
00161 {
00162 ++m_current;
00163 return true;
00164 }
00165 return false;
00166 }
00167
00168 bool XMLParserCore::getOpenElementName(XMLToken& entry, bool& openCloseElement)
00169 {
00170 openCloseElement = false;
00171 if (getElementName(entry))
00172 {
00173 return true;
00174 }
00175 if (*m_current == '/')
00176 {
00177 ++m_current;
00178 if (*m_current == '>')
00179 {
00180 openCloseElement = true;
00181 ++m_current;
00182 return true;
00183 }
00184 }
00185 return false;
00186 }
00187 void XMLParserCore::getAttributeNameAndEqual(XMLToken::Attribute& att)
00188 {
00189 if (!isalpha(*m_current) && *m_current != '_')
00190 {
00191 OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_NAME,
00192 m_line, Format("Expected alpha or _; got %1", *m_current).c_str());
00193 }
00194 att.name.reset();
00195 while (isalnum(*m_current) || *m_current == '_' || *m_current == '-' ||
00196 *m_current == ':' || *m_current == '.')
00197 {
00198 att.name += *m_current++;
00199 }
00200 skipWhitespace();
00201 if (*m_current != '=')
00202 {
00203 OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_NAME,
00204 m_line, Format("Expected =; got %1", *m_current).c_str());
00205 }
00206 m_current++;
00207 skipWhitespace();
00208 }
00209 void XMLParserCore::getAttributeValue(XMLToken::Attribute& att)
00210 {
00211
00212 if (*m_current != '"' && *m_current != '\'')
00213 {
00214 OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00215 m_line, Format("Expecting \" or '; got %1", *m_current).c_str());
00216 }
00217 char startChar = *m_current++;
00218 att.value.reset();
00219 while (*m_current && *m_current != startChar)
00220 {
00221 att.value += *m_current++;
00222 }
00223
00224 if (*m_current != startChar)
00225 {
00226 OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00227 m_line, Format("Expecting %1; Got %2", startChar, static_cast<int>(*m_current)).c_str());
00228 }
00229 ++m_current;
00230 }
00231 void XMLParserCore::getComment()
00232 {
00233
00234 for (; *m_current; m_current++)
00235 {
00236 if (*m_current == '-')
00237 {
00238 ++m_current;
00239 if (*m_current == '-')
00240 {
00241 ++m_current;
00242 if (*m_current == '>')
00243 {
00244 ++m_current;
00245 return;
00246 }
00247 else
00248 {
00249 OW_THROWXMLLINE(
00250 XMLParseException::MINUS_MINUS_IN_COMMENT, m_line);
00251 }
00252 }
00253 }
00254 }
00255
00256 OW_THROWXMLLINE(XMLParseException::UNTERMINATED_COMMENT, m_line);
00257 }
00258 void XMLParserCore::getCData(XMLToken& entry)
00259 {
00260
00261 entry.text.reset();
00262 for (; *m_current; m_current++)
00263 {
00264 if (*m_current == ']')
00265 {
00266 ++m_current;
00267 if (*m_current == ']')
00268 {
00269 ++m_current;
00270 if (*m_current == '>')
00271 {
00272 ++m_current;
00273 return;
00274 }
00275 else
00276 {
00277 entry.text += ']';
00278 entry.text += ']';
00279 }
00280 }
00281 else
00282 {
00283 entry.text += ']';
00284 }
00285 }
00286 if (*m_current == '\n')
00287 {
00288 ++m_line;
00289 }
00290 entry.text += *m_current;
00291 }
00292
00293 OW_THROWXMLLINE(XMLParseException::UNTERMINATED_CDATA, m_line);
00294 }
00295 void XMLParserCore::getDocType()
00296 {
00297
00298 for (; *m_current && *m_current != '>'; ++m_current)
00299 {
00300 if (*m_current == '\n')
00301 {
00302 ++m_line;
00303 }
00304 }
00305 if (*m_current != '>')
00306 {
00307 OW_THROWXMLLINE(XMLParseException::UNTERMINATED_DOCTYPE, m_line);
00308 }
00309 m_current++;
00310 }
00311
00312 void XMLParserCore::getContent(XMLToken& entry)
00313 {
00314 entry.text.reset();
00315
00316 while (*m_current && *m_current != '<')
00317 {
00318 if (*m_current == '\n')
00319 {
00320 ++m_line;
00321 }
00322
00323
00324
00325
00326
00327 entry.text += *m_current++;
00328 }
00329 }
00330 void XMLParserCore::getElement(XMLToken& entry)
00331 {
00332 entry.attributeCount = 0;
00333 entry.text.reset();
00334
00335
00336
00337 if (*m_current == '?')
00338 {
00339 entry.type = XMLToken::XML_DECLARATION;
00340 ++m_current;
00341 if (getElementName(entry))
00342 {
00343 return;
00344 }
00345 }
00346 else if (*m_current == '!')
00347 {
00348 m_current++;
00349
00350 if (*m_current == '-')
00351 {
00352 ++m_current;
00353 if (*m_current == '-')
00354 {
00355 ++m_current;
00356 entry.type = XMLToken::COMMENT;
00357 getComment();
00358 return;
00359 }
00360 }
00361 else if (*m_current == '[')
00362 {
00363 char string[] = "CDATA[";
00364 char *curChar = string;
00365 m_current++;
00366 while (*curChar)
00367 {
00368 if (*curChar++ != *m_current++)
00369 {
00370 OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00371 }
00372 }
00373 entry.type = XMLToken::CDATA;
00374 getCData(entry);
00375 return;
00376 }
00377 else if (*m_current == 'D')
00378 {
00379 char string[] = "OCTYPE";
00380 char *curChar = string;
00381 m_current++;
00382 while (*curChar)
00383 {
00384 if (*curChar++ != *m_current++)
00385 {
00386 OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00387 }
00388 }
00389 entry.type = XMLToken::DOCTYPE;
00390 getDocType();
00391 return;
00392 }
00393 OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00394 }
00395 else if (*m_current == '/')
00396 {
00397 entry.type = XMLToken::END_TAG;
00398 ++m_current;
00399 if (!getElementName(entry))
00400 {
00401 OW_THROWXMLLINE(XMLParseException::BAD_END_TAG, m_line);
00402 }
00403 return;
00404 }
00405 else if (isalpha(*m_current) || *m_current == '_')
00406 {
00407 entry.type = XMLToken::START_TAG;
00408 bool openCloseElement;
00409 if (getOpenElementName(entry, openCloseElement))
00410 {
00411 if (openCloseElement)
00412 {
00413 entry.type = XMLToken::START_TAG;
00414 m_tagIsEmpty = true;
00415 }
00416 return;
00417 }
00418 }
00419 else
00420 OW_THROWXMLLINE(XMLParseException::BAD_START_TAG, m_line);
00421
00422
00423
00424 for (;;)
00425 {
00426 skipWhitespace();
00427 if (entry.type == XMLToken::XML_DECLARATION)
00428 {
00429 if (*m_current == '?')
00430 {
00431 ++m_current;
00432 if (*m_current == '>')
00433 {
00434 ++m_current;
00435 return;
00436 }
00437 else
00438 {
00439 OW_THROWXMLLINEMSG(
00440 XMLParseException::BAD_ATTRIBUTE_VALUE, m_line,
00441 Format("Expecting >; Got %1", *m_current).c_str());
00442 }
00443 }
00444 }
00445 else if (entry.type == XMLToken::START_TAG && *m_current == '/')
00446 {
00447 ++m_current;
00448 if (*m_current =='>')
00449 {
00450 entry.type = XMLToken::START_TAG;
00451 m_tagIsEmpty = true;
00452 ++m_current;
00453 return;
00454 }
00455 else
00456 {
00457 OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00458 m_line, Format("Expecting >; Got %1", *m_current).c_str());
00459 }
00460 }
00461 else if (*m_current == '>')
00462 {
00463 ++m_current;
00464 return;
00465 }
00466 ++entry.attributeCount;
00467 XMLToken::Attribute& attr = entry.attributes[entry.attributeCount - 1];
00468 getAttributeNameAndEqual(attr);
00469 getAttributeValue(attr);
00470 if (entry.attributeCount == XMLToken::MAX_ATTRIBUTES)
00471 {
00472 OW_THROWXMLLINE(XMLParseException::TOO_MANY_ATTRIBUTES, m_line);
00473 }
00474 }
00475 }
00476
00477 }
00478