osgEarth 2.1.1
|
00001 /* 00002 www.sourceforge.net/projects/tinyxml 00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com) 00004 00005 This software is provided 'as-is', without any express or implied 00006 warranty. In no event will the authors be held liable for any 00007 damages arising from the use of this software. 00008 00009 Permission is granted to anyone to use this software for any 00010 purpose, including commercial applications, and to alter it and 00011 redistribute it freely, subject to the following restrictions: 00012 00013 1. The origin of this software must not be misrepresented; you must 00014 not claim that you wrote the original software. If you use this 00015 software in a product, an acknowledgment in the product documentation 00016 would be appreciated but is not required. 00017 00018 2. Altered source versions must be plainly marked as such, and 00019 must not be misrepresented as being the original software. 00020 00021 3. This notice may not be removed or altered from any source 00022 distribution. 00023 */ 00024 00025 #include <ctype.h> 00026 #include <stddef.h> 00027 00028 #include "tinyxml.h" 00029 00030 //#define DEBUG_PARSER 00031 #if defined( DEBUG_PARSER ) 00032 # if defined( DEBUG ) && defined( _MSC_VER ) 00033 # include <windows.h> 00034 # define TIXML_LOG OutputDebugString 00035 # else 00036 # define TIXML_LOG printf 00037 # endif 00038 #endif 00039 00040 // Note tha "PutString" hardcodes the same list. This 00041 // is less flexible than it appears. Changing the entries 00042 // or order will break putstring. 00043 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 00044 { 00045 { "&", 5, '&' }, 00046 { "<", 4, '<' }, 00047 { ">", 4, '>' }, 00048 { """, 6, '\"' }, 00049 { "'", 6, '\'' } 00050 }; 00051 00052 // Bunch of unicode info at: 00053 // http://www.unicode.org/faq/utf_bom.html 00054 // Including the basic of this table, which determines the #bytes in the 00055 // sequence from the lead byte. 1 placed for invalid sequences -- 00056 // although the result will be junk, pass it through as much as possible. 00057 // Beware of the non-characters in UTF-8: 00058 // ef bb bf (Microsoft "lead bytes") 00059 // ef bf be 00060 // ef bf bf 00061 00062 const unsigned char TIXML_UTF_LEAD_0 = 0xefU; 00063 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; 00064 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; 00065 00066 const int TiXmlBase::utf8ByteTable[256] = 00067 { 00068 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 00070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 00071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 00072 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 00073 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 00074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 00075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 00076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range 00077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid 00078 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 00079 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 00080 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 00081 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte 00082 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 00083 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte 00084 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid 00085 }; 00086 00087 00088 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) 00089 { 00090 const unsigned long BYTE_MASK = 0xBF; 00091 const unsigned long BYTE_MARK = 0x80; 00092 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 00093 00094 if (input < 0x80) 00095 *length = 1; 00096 else if ( input < 0x800 ) 00097 *length = 2; 00098 else if ( input < 0x10000 ) 00099 *length = 3; 00100 else if ( input < 0x200000 ) 00101 *length = 4; 00102 else 00103 { *length = 0; return; } // This code won't covert this correctly anyway. 00104 00105 output += *length; 00106 00107 // Scary scary fall throughs. 00108 switch (*length) 00109 { 00110 case 4: 00111 --output; 00112 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00113 input >>= 6; 00114 case 3: 00115 --output; 00116 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00117 input >>= 6; 00118 case 2: 00119 --output; 00120 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00121 input >>= 6; 00122 case 1: 00123 --output; 00124 *output = (char)(input | FIRST_BYTE_MARK[*length]); 00125 } 00126 } 00127 00128 00129 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) 00130 { 00131 // This will only work for low-ascii, everything else is assumed to be a valid 00132 // letter. I'm not sure this is the best approach, but it is quite tricky trying 00133 // to figure out alhabetical vs. not across encoding. So take a very 00134 // conservative approach. 00135 00136 // if ( encoding == TIXML_ENCODING_UTF8 ) 00137 // { 00138 if ( anyByte < 127 ) 00139 return isalpha( anyByte ); 00140 else 00141 return 1; // What else to do? The unicode set is huge...get the english ones right. 00142 // } 00143 // else 00144 // { 00145 // return isalpha( anyByte ); 00146 // } 00147 } 00148 00149 00150 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) 00151 { 00152 // This will only work for low-ascii, everything else is assumed to be a valid 00153 // letter. I'm not sure this is the best approach, but it is quite tricky trying 00154 // to figure out alhabetical vs. not across encoding. So take a very 00155 // conservative approach. 00156 00157 // if ( encoding == TIXML_ENCODING_UTF8 ) 00158 // { 00159 if ( anyByte < 127 ) 00160 return isalnum( anyByte ); 00161 else 00162 return 1; // What else to do? The unicode set is huge...get the english ones right. 00163 // } 00164 // else 00165 // { 00166 // return isalnum( anyByte ); 00167 // } 00168 } 00169 00170 00171 class TiXmlParsingData 00172 { 00173 friend class TiXmlDocument; 00174 public: 00175 void Stamp( const char* now, TiXmlEncoding encoding ); 00176 00177 const TiXmlCursor& Cursor() { return cursor; } 00178 00179 private: 00180 // Only used by the document! 00181 TiXmlParsingData( const char* start, int _tabsize, int row, int col ) 00182 { 00183 assert( start ); 00184 stamp = start; 00185 tabsize = _tabsize; 00186 cursor.row = row; 00187 cursor.col = col; 00188 } 00189 00190 TiXmlCursor cursor; 00191 const char* stamp; 00192 int tabsize; 00193 }; 00194 00195 00196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding ) 00197 { 00198 assert( now ); 00199 00200 // Do nothing if the tabsize is 0. 00201 if ( tabsize < 1 ) 00202 { 00203 return; 00204 } 00205 00206 // Get the current row, column. 00207 int row = cursor.row; 00208 int col = cursor.col; 00209 const char* p = stamp; 00210 assert( p ); 00211 00212 while ( p < now ) 00213 { 00214 // Treat p as unsigned, so we have a happy compiler. 00215 const unsigned char* pU = (const unsigned char*)p; 00216 00217 // Code contributed by Fletcher Dunn: (modified by lee) 00218 switch (*pU) { 00219 case 0: 00220 // We *should* never get here, but in case we do, don't 00221 // advance past the terminating null character, ever 00222 return; 00223 00224 case '\r': 00225 // bump down to the next line 00226 ++row; 00227 col = 0; 00228 // Eat the character 00229 ++p; 00230 00231 // Check for \r\n sequence, and treat this as a single character 00232 if (*p == '\n') { 00233 ++p; 00234 } 00235 break; 00236 00237 case '\n': 00238 // bump down to the next line 00239 ++row; 00240 col = 0; 00241 00242 // Eat the character 00243 ++p; 00244 00245 // Check for \n\r sequence, and treat this as a single 00246 // character. (Yes, this bizarre thing does occur still 00247 // on some arcane platforms...) 00248 if (*p == '\r') { 00249 ++p; 00250 } 00251 break; 00252 00253 case '\t': 00254 // Eat the character 00255 ++p; 00256 00257 // Skip to next tab stop 00258 col = (col / tabsize + 1) * tabsize; 00259 break; 00260 00261 case TIXML_UTF_LEAD_0: 00262 if ( encoding == TIXML_ENCODING_UTF8 ) 00263 { 00264 if ( *(p+1) && *(p+2) ) 00265 { 00266 // In these cases, don't advance the column. These are 00267 // 0-width spaces. 00268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 ) 00269 p += 3; 00270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU ) 00271 p += 3; 00272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU ) 00273 p += 3; 00274 else 00275 { p +=3; ++col; } // A normal character. 00276 } 00277 } 00278 else 00279 { 00280 ++p; 00281 ++col; 00282 } 00283 break; 00284 00285 default: 00286 if ( encoding == TIXML_ENCODING_UTF8 ) 00287 { 00288 // Eat the 1 to 4 byte utf8 character. 00289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)]; 00290 if ( step == 0 ) 00291 step = 1; // Error case from bad encoding, but handle gracefully. 00292 p += step; 00293 00294 // Just advance one column, of course. 00295 ++col; 00296 } 00297 else 00298 { 00299 ++p; 00300 ++col; 00301 } 00302 break; 00303 } 00304 } 00305 cursor.row = row; 00306 cursor.col = col; 00307 assert( cursor.row >= -1 ); 00308 assert( cursor.col >= -1 ); 00309 stamp = p; 00310 assert( stamp ); 00311 } 00312 00313 00314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding ) 00315 { 00316 if ( !p || !*p ) 00317 { 00318 return 0; 00319 } 00320 if ( encoding == TIXML_ENCODING_UTF8 ) 00321 { 00322 while ( *p ) 00323 { 00324 const unsigned char* pU = (const unsigned char*)p; 00325 00326 // Skip the stupid Microsoft UTF-8 Byte order marks 00327 if ( *(pU+0)==TIXML_UTF_LEAD_0 00328 && *(pU+1)==TIXML_UTF_LEAD_1 00329 && *(pU+2)==TIXML_UTF_LEAD_2 ) 00330 { 00331 p += 3; 00332 continue; 00333 } 00334 else if(*(pU+0)==TIXML_UTF_LEAD_0 00335 && *(pU+1)==0xbfU 00336 && *(pU+2)==0xbeU ) 00337 { 00338 p += 3; 00339 continue; 00340 } 00341 else if(*(pU+0)==TIXML_UTF_LEAD_0 00342 && *(pU+1)==0xbfU 00343 && *(pU+2)==0xbfU ) 00344 { 00345 p += 3; 00346 continue; 00347 } 00348 00349 if ( IsWhiteSpace( *p ) ) // Still using old rules for white space. 00350 ++p; 00351 else 00352 break; 00353 } 00354 } 00355 else 00356 { 00357 while ( *p && IsWhiteSpace( *p ) ) 00358 ++p; 00359 } 00360 00361 return p; 00362 } 00363 00364 #ifdef TIXML_USE_STL 00365 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag ) 00366 { 00367 for( ;; ) 00368 { 00369 if ( !in->good() ) return false; 00370 00371 int c = in->peek(); 00372 // At this scope, we can't get to a document. So fail silently. 00373 if ( !IsWhiteSpace( c ) || c <= 0 ) 00374 return true; 00375 00376 *tag += (char) in->get(); 00377 } 00378 } 00379 00380 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag ) 00381 { 00382 //assert( character > 0 && character < 128 ); // else it won't work in utf-8 00383 while ( in->good() ) 00384 { 00385 int c = in->peek(); 00386 if ( c == character ) 00387 return true; 00388 if ( c <= 0 ) // Silent failure: can't get document at this scope 00389 return false; 00390 00391 in->get(); 00392 *tag += (char) c; 00393 } 00394 return false; 00395 } 00396 #endif 00397 00398 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The 00399 // "assign" optimization removes over 10% of the execution time. 00400 // 00401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding ) 00402 { 00403 // Oddly, not supported on some comilers, 00404 //name->clear(); 00405 // So use this: 00406 *name = ""; 00407 assert( p ); 00408 00409 // Names start with letters or underscores. 00410 // Of course, in unicode, tinyxml has no idea what a letter *is*. The 00411 // algorithm is generous. 00412 // 00413 // After that, they can be letters, underscores, numbers, 00414 // hyphens, or colons. (Colons are valid ony for namespaces, 00415 // but tinyxml can't tell namespaces from names.) 00416 if ( p && *p 00417 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) ) 00418 { 00419 const char* start = p; 00420 while( p && *p 00421 && ( IsAlphaNum( (unsigned char ) *p, encoding ) 00422 || *p == '_' 00423 || *p == '-' 00424 || *p == '.' 00425 || *p == ':' ) ) 00426 { 00427 //(*name) += *p; // expensive 00428 ++p; 00429 } 00430 if ( p-start > 0 ) { 00431 name->assign( start, p-start ); 00432 } 00433 return p; 00434 } 00435 return 0; 00436 } 00437 00438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding ) 00439 { 00440 // Presume an entity, and pull it out. 00441 TIXML_STRING ent; 00442 int i; 00443 *length = 0; 00444 00445 if ( *(p+1) && *(p+1) == '#' && *(p+2) ) 00446 { 00447 unsigned long ucs = 0; 00448 ptrdiff_t delta = 0; 00449 unsigned mult = 1; 00450 00451 if ( *(p+2) == 'x' ) 00452 { 00453 // Hexadecimal. 00454 if ( !*(p+3) ) return 0; 00455 00456 const char* q = p+3; 00457 q = strchr( q, ';' ); 00458 00459 if ( !q || !*q ) return 0; 00460 00461 delta = q-p; 00462 --q; 00463 00464 while ( *q != 'x' ) 00465 { 00466 if ( *q >= '0' && *q <= '9' ) 00467 ucs += mult * (*q - '0'); 00468 else if ( *q >= 'a' && *q <= 'f' ) 00469 ucs += mult * (*q - 'a' + 10); 00470 else if ( *q >= 'A' && *q <= 'F' ) 00471 ucs += mult * (*q - 'A' + 10 ); 00472 else 00473 return 0; 00474 mult *= 16; 00475 --q; 00476 } 00477 } 00478 else 00479 { 00480 // Decimal. 00481 if ( !*(p+2) ) return 0; 00482 00483 const char* q = p+2; 00484 q = strchr( q, ';' ); 00485 00486 if ( !q || !*q ) return 0; 00487 00488 delta = q-p; 00489 --q; 00490 00491 while ( *q != '#' ) 00492 { 00493 if ( *q >= '0' && *q <= '9' ) 00494 ucs += mult * (*q - '0'); 00495 else 00496 return 0; 00497 mult *= 10; 00498 --q; 00499 } 00500 } 00501 if ( encoding == TIXML_ENCODING_UTF8 ) 00502 { 00503 // convert the UCS to UTF-8 00504 ConvertUTF32ToUTF8( ucs, value, length ); 00505 } 00506 else 00507 { 00508 *value = (char)ucs; 00509 *length = 1; 00510 } 00511 return p + delta + 1; 00512 } 00513 00514 // Now try to match it. 00515 for( i=0; i<NUM_ENTITY; ++i ) 00516 { 00517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 ) 00518 { 00519 assert( strlen( entity[i].str ) == entity[i].strLength ); 00520 *value = entity[i].chr; 00521 *length = 1; 00522 return ( p + entity[i].strLength ); 00523 } 00524 } 00525 00526 // So it wasn't an entity, its unrecognized, or something like that. 00527 *value = *p; // Don't put back the last one, since we return it! 00528 //*length = 1; // Leave unrecognized entities - this doesn't really work. 00529 // Just writes strange XML. 00530 return p+1; 00531 } 00532 00533 00534 bool TiXmlBase::StringEqual( const char* p, 00535 const char* tag, 00536 bool ignoreCase, 00537 TiXmlEncoding encoding ) 00538 { 00539 assert( p ); 00540 assert( tag ); 00541 if ( !p || !*p ) 00542 { 00543 assert( 0 ); 00544 return false; 00545 } 00546 00547 const char* q = p; 00548 00549 if ( ignoreCase ) 00550 { 00551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) ) 00552 { 00553 ++q; 00554 ++tag; 00555 } 00556 00557 if ( *tag == 0 ) 00558 return true; 00559 } 00560 else 00561 { 00562 while ( *q && *tag && *q == *tag ) 00563 { 00564 ++q; 00565 ++tag; 00566 } 00567 00568 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal? 00569 return true; 00570 } 00571 return false; 00572 } 00573 00574 const char* TiXmlBase::ReadText( const char* p, 00575 TIXML_STRING * text, 00576 bool trimWhiteSpace, 00577 const char* endTag, 00578 bool caseInsensitive, 00579 TiXmlEncoding encoding ) 00580 { 00581 *text = ""; 00582 if ( !trimWhiteSpace // certain tags always keep whitespace 00583 || !condenseWhiteSpace ) // if true, whitespace is always kept 00584 { 00585 // Keep all the white space. 00586 while ( p && *p 00587 && !StringEqual( p, endTag, caseInsensitive, encoding ) 00588 ) 00589 { 00590 int len; 00591 char cArr[4] = { 0, 0, 0, 0 }; 00592 p = GetChar( p, cArr, &len, encoding ); 00593 text->append( cArr, len ); 00594 } 00595 } 00596 else 00597 { 00598 bool whitespace = false; 00599 00600 // Remove leading white space: 00601 p = SkipWhiteSpace( p, encoding ); 00602 while ( p && *p 00603 && !StringEqual( p, endTag, caseInsensitive, encoding ) ) 00604 { 00605 if ( *p == '\r' || *p == '\n' ) 00606 { 00607 whitespace = true; 00608 ++p; 00609 } 00610 else if ( IsWhiteSpace( *p ) ) 00611 { 00612 whitespace = true; 00613 ++p; 00614 } 00615 else 00616 { 00617 // If we've found whitespace, add it before the 00618 // new character. Any whitespace just becomes a space. 00619 if ( whitespace ) 00620 { 00621 (*text) += ' '; 00622 whitespace = false; 00623 } 00624 int len; 00625 char cArr[4] = { 0, 0, 0, 0 }; 00626 p = GetChar( p, cArr, &len, encoding ); 00627 if ( len == 1 ) 00628 (*text) += cArr[0]; // more efficient 00629 else 00630 text->append( cArr, len ); 00631 } 00632 } 00633 } 00634 if ( p && *p ) 00635 p += strlen( endTag ); 00636 return p; 00637 } 00638 00639 #ifdef TIXML_USE_STL 00640 00641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag ) 00642 { 00643 // The basic issue with a document is that we don't know what we're 00644 // streaming. Read something presumed to be a tag (and hope), then 00645 // identify it, and call the appropriate stream method on the tag. 00646 // 00647 // This "pre-streaming" will never read the closing ">" so the 00648 // sub-tag can orient itself. 00649 00650 if ( !StreamTo( in, '<', tag ) ) 00651 { 00652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00653 return; 00654 } 00655 00656 while ( in->good() ) 00657 { 00658 int tagIndex = (int) tag->length(); 00659 while ( in->good() && in->peek() != '>' ) 00660 { 00661 int c = in->get(); 00662 if ( c <= 0 ) 00663 { 00664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 00665 break; 00666 } 00667 (*tag) += (char) c; 00668 } 00669 00670 if ( in->good() ) 00671 { 00672 // We now have something we presume to be a node of 00673 // some sort. Identify it, and call the node to 00674 // continue streaming. 00675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); 00676 00677 if ( node ) 00678 { 00679 node->StreamIn( in, tag ); 00680 bool isElement = node->ToElement() != 0; 00681 delete node; 00682 node = 0; 00683 00684 // If this is the root element, we're done. Parsing will be 00685 // done by the >> operator. 00686 if ( isElement ) 00687 { 00688 return; 00689 } 00690 } 00691 else 00692 { 00693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); 00694 return; 00695 } 00696 } 00697 } 00698 // We should have returned sooner. 00699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); 00700 } 00701 00702 #endif 00703 00704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding ) 00705 { 00706 ClearError(); 00707 00708 // Parse away, at the document level. Since a document 00709 // contains nothing but other tags, most of what happens 00710 // here is skipping white space. 00711 if ( !p || !*p ) 00712 { 00713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00714 return 0; 00715 } 00716 00717 // Note that, for a document, this needs to come 00718 // before the while space skip, so that parsing 00719 // starts from the pointer we are given. 00720 location.Clear(); 00721 if ( prevData ) 00722 { 00723 location.row = prevData->cursor.row; 00724 location.col = prevData->cursor.col; 00725 } 00726 else 00727 { 00728 location.row = 0; 00729 location.col = 0; 00730 } 00731 TiXmlParsingData data( p, TabSize(), location.row, location.col ); 00732 location = data.Cursor(); 00733 00734 if ( encoding == TIXML_ENCODING_UNKNOWN ) 00735 { 00736 // Check for the Microsoft UTF-8 lead bytes. 00737 const unsigned char* pU = (const unsigned char*)p; 00738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0 00739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1 00740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 ) 00741 { 00742 encoding = TIXML_ENCODING_UTF8; 00743 useMicrosoftBOM = true; 00744 } 00745 } 00746 00747 p = SkipWhiteSpace( p, encoding ); 00748 if ( !p ) 00749 { 00750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00751 return 0; 00752 } 00753 00754 while ( p && *p ) 00755 { 00756 TiXmlNode* node = Identify( p, encoding ); 00757 if ( node ) 00758 { 00759 p = node->Parse( p, &data, encoding ); 00760 LinkEndChild( node ); 00761 } 00762 else 00763 { 00764 break; 00765 } 00766 00767 // Did we get encoding info? 00768 if ( encoding == TIXML_ENCODING_UNKNOWN 00769 && node->ToDeclaration() ) 00770 { 00771 TiXmlDeclaration* dec = node->ToDeclaration(); 00772 const char* enc = dec->Encoding(); 00773 assert( enc ); 00774 00775 if ( *enc == 0 ) 00776 encoding = TIXML_ENCODING_UTF8; 00777 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) 00778 encoding = TIXML_ENCODING_UTF8; 00779 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) 00780 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice 00781 else 00782 encoding = TIXML_ENCODING_LEGACY; 00783 } 00784 00785 p = SkipWhiteSpace( p, encoding ); 00786 } 00787 00788 // Was this empty? 00789 if ( !firstChild ) { 00790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding ); 00791 return 0; 00792 } 00793 00794 // All is well. 00795 return p; 00796 } 00797 00798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding ) 00799 { 00800 // The first error in a chain is more accurate - don't set again! 00801 if ( error ) 00802 return; 00803 00804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); 00805 error = true; 00806 errorId = err; 00807 errorDesc = errorString[ errorId ]; 00808 00809 errorLocation.Clear(); 00810 if ( pError && data ) 00811 { 00812 data->Stamp( pError, encoding ); 00813 errorLocation = data->Cursor(); 00814 } 00815 } 00816 00817 00818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) 00819 { 00820 TiXmlNode* returnNode = 0; 00821 00822 p = SkipWhiteSpace( p, encoding ); 00823 if( !p || !*p || *p != '<' ) 00824 { 00825 return 0; 00826 } 00827 00828 p = SkipWhiteSpace( p, encoding ); 00829 00830 if ( !p || !*p ) 00831 { 00832 return 0; 00833 } 00834 00835 // What is this thing? 00836 // - Elements start with a letter or underscore, but xml is reserved. 00837 // - Comments: <!-- 00838 // - Decleration: <?xml 00839 // - Everthing else is unknown to tinyxml. 00840 // 00841 00842 const char* xmlHeader = { "<?xml" }; 00843 const char* commentHeader = { "<!--" }; 00844 const char* dtdHeader = { "<!" }; 00845 const char* cdataHeader = { "<![CDATA[" }; 00846 00847 if ( StringEqual( p, xmlHeader, true, encoding ) ) 00848 { 00849 #ifdef DEBUG_PARSER 00850 TIXML_LOG( "XML parsing Declaration\n" ); 00851 #endif 00852 returnNode = new TiXmlDeclaration(); 00853 } 00854 else if ( StringEqual( p, commentHeader, false, encoding ) ) 00855 { 00856 #ifdef DEBUG_PARSER 00857 TIXML_LOG( "XML parsing Comment\n" ); 00858 #endif 00859 returnNode = new TiXmlComment(); 00860 } 00861 else if ( StringEqual( p, cdataHeader, false, encoding ) ) 00862 { 00863 #ifdef DEBUG_PARSER 00864 TIXML_LOG( "XML parsing CDATA\n" ); 00865 #endif 00866 TiXmlText* text = new TiXmlText( "" ); 00867 text->SetCDATA( true ); 00868 returnNode = text; 00869 } 00870 else if ( StringEqual( p, dtdHeader, false, encoding ) ) 00871 { 00872 #ifdef DEBUG_PARSER 00873 TIXML_LOG( "XML parsing Unknown(1)\n" ); 00874 #endif 00875 returnNode = new TiXmlUnknown(); 00876 } 00877 else if ( IsAlpha( *(p+1), encoding ) 00878 || *(p+1) == '_' ) 00879 { 00880 #ifdef DEBUG_PARSER 00881 TIXML_LOG( "XML parsing Element\n" ); 00882 #endif 00883 returnNode = new TiXmlElement( "" ); 00884 } 00885 else 00886 { 00887 #ifdef DEBUG_PARSER 00888 TIXML_LOG( "XML parsing Unknown(2)\n" ); 00889 #endif 00890 returnNode = new TiXmlUnknown(); 00891 } 00892 00893 if ( returnNode ) 00894 { 00895 // Set the parent, so it can report errors 00896 returnNode->parent = this; 00897 } 00898 return returnNode; 00899 } 00900 00901 #ifdef TIXML_USE_STL 00902 00903 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag) 00904 { 00905 // We're called with some amount of pre-parsing. That is, some of "this" 00906 // element is in "tag". Go ahead and stream to the closing ">" 00907 while( in->good() ) 00908 { 00909 int c = in->get(); 00910 if ( c <= 0 ) 00911 { 00912 TiXmlDocument* document = GetDocument(); 00913 if ( document ) 00914 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 00915 return; 00916 } 00917 (*tag) += (char) c ; 00918 00919 if ( c == '>' ) 00920 break; 00921 } 00922 00923 if ( tag->length() < 3 ) return; 00924 00925 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag. 00926 // If not, identify and stream. 00927 00928 if ( tag->at( tag->length() - 1 ) == '>' 00929 && tag->at( tag->length() - 2 ) == '/' ) 00930 { 00931 // All good! 00932 return; 00933 } 00934 else if ( tag->at( tag->length() - 1 ) == '>' ) 00935 { 00936 // There is more. Could be: 00937 // text 00938 // cdata text (which looks like another node) 00939 // closing tag 00940 // another node. 00941 for ( ;; ) 00942 { 00943 StreamWhiteSpace( in, tag ); 00944 00945 // Do we have text? 00946 if ( in->good() && in->peek() != '<' ) 00947 { 00948 // Yep, text. 00949 TiXmlText text( "" ); 00950 text.StreamIn( in, tag ); 00951 00952 // What follows text is a closing tag or another node. 00953 // Go around again and figure it out. 00954 continue; 00955 } 00956 00957 // We now have either a closing tag...or another node. 00958 // We should be at a "<", regardless. 00959 if ( !in->good() ) return; 00960 assert( in->peek() == '<' ); 00961 int tagIndex = (int) tag->length(); 00962 00963 bool closingTag = false; 00964 bool firstCharFound = false; 00965 00966 for( ;; ) 00967 { 00968 if ( !in->good() ) 00969 return; 00970 00971 int c = in->peek(); 00972 if ( c <= 0 ) 00973 { 00974 TiXmlDocument* document = GetDocument(); 00975 if ( document ) 00976 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 00977 return; 00978 } 00979 00980 if ( c == '>' ) 00981 break; 00982 00983 *tag += (char) c; 00984 in->get(); 00985 00986 // Early out if we find the CDATA id. 00987 if ( c == '[' && tag->size() >= 9 ) 00988 { 00989 size_t len = tag->size(); 00990 const char* start = tag->c_str() + len - 9; 00991 if ( strcmp( start, "<![CDATA[" ) == 0 ) { 00992 assert( !closingTag ); 00993 break; 00994 } 00995 } 00996 00997 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) ) 00998 { 00999 firstCharFound = true; 01000 if ( c == '/' ) 01001 closingTag = true; 01002 } 01003 } 01004 // If it was a closing tag, then read in the closing '>' to clean up the input stream. 01005 // If it was not, the streaming will be done by the tag. 01006 if ( closingTag ) 01007 { 01008 if ( !in->good() ) 01009 return; 01010 01011 int c = in->get(); 01012 if ( c <= 0 ) 01013 { 01014 TiXmlDocument* document = GetDocument(); 01015 if ( document ) 01016 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01017 return; 01018 } 01019 assert( c == '>' ); 01020 *tag += (char) c; 01021 01022 // We are done, once we've found our closing tag. 01023 return; 01024 } 01025 else 01026 { 01027 // If not a closing tag, id it, and stream. 01028 const char* tagloc = tag->c_str() + tagIndex; 01029 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING ); 01030 if ( !node ) 01031 return; 01032 node->StreamIn( in, tag ); 01033 delete node; 01034 node = 0; 01035 01036 // No return: go around from the beginning: text, closing tag, or node. 01037 } 01038 } 01039 } 01040 } 01041 #endif 01042 01043 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01044 { 01045 p = SkipWhiteSpace( p, encoding ); 01046 TiXmlDocument* document = GetDocument(); 01047 01048 if ( !p || !*p ) 01049 { 01050 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding ); 01051 return 0; 01052 } 01053 01054 if ( data ) 01055 { 01056 data->Stamp( p, encoding ); 01057 location = data->Cursor(); 01058 } 01059 01060 if ( *p != '<' ) 01061 { 01062 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding ); 01063 return 0; 01064 } 01065 01066 p = SkipWhiteSpace( p+1, encoding ); 01067 01068 // Read the name. 01069 const char* pErr = p; 01070 01071 p = ReadName( p, &value, encoding ); 01072 if ( !p || !*p ) 01073 { 01074 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding ); 01075 return 0; 01076 } 01077 01078 TIXML_STRING endTag ("</"); 01079 endTag += value; 01080 01081 // Check for and read attributes. Also look for an empty 01082 // tag or an end tag. 01083 while ( p && *p ) 01084 { 01085 pErr = p; 01086 p = SkipWhiteSpace( p, encoding ); 01087 if ( !p || !*p ) 01088 { 01089 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); 01090 return 0; 01091 } 01092 if ( *p == '/' ) 01093 { 01094 ++p; 01095 // Empty tag. 01096 if ( *p != '>' ) 01097 { 01098 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding ); 01099 return 0; 01100 } 01101 return (p+1); 01102 } 01103 else if ( *p == '>' ) 01104 { 01105 // Done with attributes (if there were any.) 01106 // Read the value -- which can include other 01107 // elements -- read the end tag, and return. 01108 ++p; 01109 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens. 01110 if ( !p || !*p ) { 01111 // We were looking for the end tag, but found nothing. 01112 // Fix for [ 1663758 ] Failure to report error on bad XML 01113 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); 01114 return 0; 01115 } 01116 01117 // We should find the end tag now 01118 // note that: 01119 // </foo > and 01120 // </foo> 01121 // are both valid end tags. 01122 if ( StringEqual( p, endTag.c_str(), false, encoding ) ) 01123 { 01124 p += endTag.length(); 01125 p = SkipWhiteSpace( p, encoding ); 01126 if ( p && *p && *p == '>' ) { 01127 ++p; 01128 return p; 01129 } 01130 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); 01131 return 0; 01132 } 01133 else 01134 { 01135 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); 01136 return 0; 01137 } 01138 } 01139 else 01140 { 01141 // Try to read an attribute: 01142 TiXmlAttribute* attrib = new TiXmlAttribute(); 01143 if ( !attrib ) 01144 { 01145 return 0; 01146 } 01147 01148 attrib->SetDocument( document ); 01149 pErr = p; 01150 p = attrib->Parse( p, data, encoding ); 01151 01152 if ( !p || !*p ) 01153 { 01154 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); 01155 delete attrib; 01156 return 0; 01157 } 01158 01159 // Handle the strange case of double attributes: 01160 #ifdef TIXML_USE_STL 01161 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() ); 01162 #else 01163 TiXmlAttribute* node = attributeSet.Find( attrib->Name() ); 01164 #endif 01165 if ( node ) 01166 { 01167 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); 01168 delete attrib; 01169 return 0; 01170 } 01171 01172 attributeSet.Add( attrib ); 01173 } 01174 } 01175 return p; 01176 } 01177 01178 01179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01180 { 01181 TiXmlDocument* document = GetDocument(); 01182 01183 // Read in text and elements in any order. 01184 const char* pWithWhiteSpace = p; 01185 p = SkipWhiteSpace( p, encoding ); 01186 01187 while ( p && *p ) 01188 { 01189 if ( *p != '<' ) 01190 { 01191 // Take what we have, make a text element. 01192 TiXmlText* textNode = new TiXmlText( "" ); 01193 01194 if ( !textNode ) 01195 { 01196 return 0; 01197 } 01198 01199 if ( TiXmlBase::IsWhiteSpaceCondensed() ) 01200 { 01201 p = textNode->Parse( p, data, encoding ); 01202 } 01203 else 01204 { 01205 // Special case: we want to keep the white space 01206 // so that leading spaces aren't removed. 01207 p = textNode->Parse( pWithWhiteSpace, data, encoding ); 01208 } 01209 01210 if ( !textNode->Blank() ) 01211 LinkEndChild( textNode ); 01212 else 01213 delete textNode; 01214 } 01215 else 01216 { 01217 // We hit a '<' 01218 // Have we hit a new element or an end tag? This could also be 01219 // a TiXmlText in the "CDATA" style. 01220 if ( StringEqual( p, "</", false, encoding ) ) 01221 { 01222 return p; 01223 } 01224 else 01225 { 01226 TiXmlNode* node = Identify( p, encoding ); 01227 if ( node ) 01228 { 01229 p = node->Parse( p, data, encoding ); 01230 LinkEndChild( node ); 01231 } 01232 else 01233 { 01234 return 0; 01235 } 01236 } 01237 } 01238 pWithWhiteSpace = p; 01239 p = SkipWhiteSpace( p, encoding ); 01240 } 01241 01242 if ( !p ) 01243 { 01244 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding ); 01245 } 01246 return p; 01247 } 01248 01249 01250 #ifdef TIXML_USE_STL 01251 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag ) 01252 { 01253 while ( in->good() ) 01254 { 01255 int c = in->get(); 01256 if ( c <= 0 ) 01257 { 01258 TiXmlDocument* document = GetDocument(); 01259 if ( document ) 01260 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01261 return; 01262 } 01263 (*tag) += (char) c; 01264 01265 if ( c == '>' ) 01266 { 01267 // All is well. 01268 return; 01269 } 01270 } 01271 } 01272 #endif 01273 01274 01275 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01276 { 01277 TiXmlDocument* document = GetDocument(); 01278 p = SkipWhiteSpace( p, encoding ); 01279 01280 if ( data ) 01281 { 01282 data->Stamp( p, encoding ); 01283 location = data->Cursor(); 01284 } 01285 if ( !p || !*p || *p != '<' ) 01286 { 01287 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding ); 01288 return 0; 01289 } 01290 ++p; 01291 value = ""; 01292 01293 while ( p && *p && *p != '>' ) 01294 { 01295 value += *p; 01296 ++p; 01297 } 01298 01299 if ( !p ) 01300 { 01301 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding ); 01302 } 01303 if ( *p == '>' ) 01304 return p+1; 01305 return p; 01306 } 01307 01308 #ifdef TIXML_USE_STL 01309 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag ) 01310 { 01311 while ( in->good() ) 01312 { 01313 int c = in->get(); 01314 if ( c <= 0 ) 01315 { 01316 TiXmlDocument* document = GetDocument(); 01317 if ( document ) 01318 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01319 return; 01320 } 01321 01322 (*tag) += (char) c; 01323 01324 if ( c == '>' 01325 && tag->at( tag->length() - 2 ) == '-' 01326 && tag->at( tag->length() - 3 ) == '-' ) 01327 { 01328 // All is well. 01329 return; 01330 } 01331 } 01332 } 01333 #endif 01334 01335 01336 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01337 { 01338 TiXmlDocument* document = GetDocument(); 01339 value = ""; 01340 01341 p = SkipWhiteSpace( p, encoding ); 01342 01343 if ( data ) 01344 { 01345 data->Stamp( p, encoding ); 01346 location = data->Cursor(); 01347 } 01348 const char* startTag = "<!--"; 01349 const char* endTag = "-->"; 01350 01351 if ( !StringEqual( p, startTag, false, encoding ) ) 01352 { 01353 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding ); 01354 return 0; 01355 } 01356 p += strlen( startTag ); 01357 01358 // [ 1475201 ] TinyXML parses entities in comments 01359 // Oops - ReadText doesn't work, because we don't want to parse the entities. 01360 // p = ReadText( p, &value, false, endTag, false, encoding ); 01361 // 01362 // from the XML spec: 01363 /* 01364 [Definition: Comments may appear anywhere in a document outside other markup; in addition, 01365 they may appear within the document type declaration at places allowed by the grammar. 01366 They are not part of the document's character data; an XML processor MAY, but need not, 01367 make it possible for an application to retrieve the text of comments. For compatibility, 01368 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity 01369 references MUST NOT be recognized within comments. 01370 01371 An example of a comment: 01372 01373 <!-- declarations for <head> & <body> --> 01374 */ 01375 01376 value = ""; 01377 // Keep all the white space. 01378 while ( p && *p && !StringEqual( p, endTag, false, encoding ) ) 01379 { 01380 value.append( p, 1 ); 01381 ++p; 01382 } 01383 if ( p && *p ) 01384 p += strlen( endTag ); 01385 01386 return p; 01387 } 01388 01389 01390 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01391 { 01392 p = SkipWhiteSpace( p, encoding ); 01393 if ( !p || !*p ) return 0; 01394 01395 if ( data ) 01396 { 01397 data->Stamp( p, encoding ); 01398 location = data->Cursor(); 01399 } 01400 // Read the name, the '=' and the value. 01401 const char* pErr = p; 01402 p = ReadName( p, &name, encoding ); 01403 if ( !p || !*p ) 01404 { 01405 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); 01406 return 0; 01407 } 01408 p = SkipWhiteSpace( p, encoding ); 01409 if ( !p || !*p || *p != '=' ) 01410 { 01411 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 01412 return 0; 01413 } 01414 01415 ++p; // skip '=' 01416 p = SkipWhiteSpace( p, encoding ); 01417 if ( !p || !*p ) 01418 { 01419 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 01420 return 0; 01421 } 01422 01423 const char* end; 01424 const char SINGLE_QUOTE = '\''; 01425 const char DOUBLE_QUOTE = '\"'; 01426 01427 if ( *p == SINGLE_QUOTE ) 01428 { 01429 ++p; 01430 end = "\'"; // single quote in string 01431 p = ReadText( p, &value, false, end, false, encoding ); 01432 } 01433 else if ( *p == DOUBLE_QUOTE ) 01434 { 01435 ++p; 01436 end = "\""; // double quote in string 01437 p = ReadText( p, &value, false, end, false, encoding ); 01438 } 01439 else 01440 { 01441 // All attribute values should be in single or double quotes. 01442 // But this is such a common error that the parser will try 01443 // its best, even without them. 01444 value = ""; 01445 while ( p && *p // existence 01446 && !IsWhiteSpace( *p ) // whitespace 01447 && *p != '/' && *p != '>' ) // tag end 01448 { 01449 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) { 01450 // [ 1451649 ] Attribute values with trailing quotes not handled correctly 01451 // We did not have an opening quote but seem to have a 01452 // closing one. Give up and throw an error. 01453 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 01454 return 0; 01455 } 01456 value += *p; 01457 ++p; 01458 } 01459 } 01460 return p; 01461 } 01462 01463 #ifdef TIXML_USE_STL 01464 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag ) 01465 { 01466 while ( in->good() ) 01467 { 01468 int c = in->peek(); 01469 if ( !cdata && (c == '<' ) ) 01470 { 01471 return; 01472 } 01473 if ( c <= 0 ) 01474 { 01475 TiXmlDocument* document = GetDocument(); 01476 if ( document ) 01477 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01478 return; 01479 } 01480 01481 (*tag) += (char) c; 01482 in->get(); // "commits" the peek made above 01483 01484 if ( cdata && c == '>' && tag->size() >= 3 ) { 01485 size_t len = tag->size(); 01486 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) { 01487 // terminator of cdata. 01488 return; 01489 } 01490 } 01491 } 01492 } 01493 #endif 01494 01495 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01496 { 01497 value = ""; 01498 TiXmlDocument* document = GetDocument(); 01499 01500 if ( data ) 01501 { 01502 data->Stamp( p, encoding ); 01503 location = data->Cursor(); 01504 } 01505 01506 const char* const startTag = "<![CDATA["; 01507 const char* const endTag = "]]>"; 01508 01509 if ( cdata || StringEqual( p, startTag, false, encoding ) ) 01510 { 01511 cdata = true; 01512 01513 if ( !StringEqual( p, startTag, false, encoding ) ) 01514 { 01515 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding ); 01516 return 0; 01517 } 01518 p += strlen( startTag ); 01519 01520 // Keep all the white space, ignore the encoding, etc. 01521 while ( p && *p 01522 && !StringEqual( p, endTag, false, encoding ) 01523 ) 01524 { 01525 value += *p; 01526 ++p; 01527 } 01528 01529 TIXML_STRING dummy; 01530 p = ReadText( p, &dummy, false, endTag, false, encoding ); 01531 return p; 01532 } 01533 else 01534 { 01535 bool ignoreWhite = true; 01536 01537 const char* end = "<"; 01538 p = ReadText( p, &value, ignoreWhite, end, false, encoding ); 01539 if ( p ) 01540 return p-1; // don't truncate the '<' 01541 return 0; 01542 } 01543 } 01544 01545 #ifdef TIXML_USE_STL 01546 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag ) 01547 { 01548 while ( in->good() ) 01549 { 01550 int c = in->get(); 01551 if ( c <= 0 ) 01552 { 01553 TiXmlDocument* document = GetDocument(); 01554 if ( document ) 01555 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01556 return; 01557 } 01558 (*tag) += (char) c; 01559 01560 if ( c == '>' ) 01561 { 01562 // All is well. 01563 return; 01564 } 01565 } 01566 } 01567 #endif 01568 01569 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding ) 01570 { 01571 p = SkipWhiteSpace( p, _encoding ); 01572 // Find the beginning, find the end, and look for 01573 // the stuff in-between. 01574 TiXmlDocument* document = GetDocument(); 01575 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) ) 01576 { 01577 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding ); 01578 return 0; 01579 } 01580 if ( data ) 01581 { 01582 data->Stamp( p, _encoding ); 01583 location = data->Cursor(); 01584 } 01585 p += 5; 01586 01587 version = ""; 01588 encoding = ""; 01589 standalone = ""; 01590 01591 while ( p && *p ) 01592 { 01593 if ( *p == '>' ) 01594 { 01595 ++p; 01596 return p; 01597 } 01598 01599 p = SkipWhiteSpace( p, _encoding ); 01600 if ( StringEqual( p, "version", true, _encoding ) ) 01601 { 01602 TiXmlAttribute attrib; 01603 p = attrib.Parse( p, data, _encoding ); 01604 version = attrib.Value(); 01605 } 01606 else if ( StringEqual( p, "encoding", true, _encoding ) ) 01607 { 01608 TiXmlAttribute attrib; 01609 p = attrib.Parse( p, data, _encoding ); 01610 encoding = attrib.Value(); 01611 } 01612 else if ( StringEqual( p, "standalone", true, _encoding ) ) 01613 { 01614 TiXmlAttribute attrib; 01615 p = attrib.Parse( p, data, _encoding ); 01616 standalone = attrib.Value(); 01617 } 01618 else 01619 { 01620 // Read over whatever it is. 01621 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) ) 01622 ++p; 01623 } 01624 } 01625 return 0; 01626 } 01627 01628 bool TiXmlText::Blank() const 01629 { 01630 for ( unsigned i=0; i<value.length(); i++ ) 01631 if ( !IsWhiteSpace( value[i] ) ) 01632 return false; 01633 return true; 01634 } 01635