Source/WebCore/ChangeLog

112013-02-11 Eric Seidel <eric@webkit.org>
22
 3 Fold MarkupTokenBase into HTMLToken now that it has no other subclasses
 4 https://bugs.webkit.org/show_bug.cgi?id=109483
 5
 6 Reviewed by NOBODY (OOPS!).
 7
 8 This deletes an epic amount of template yuck, as well as removes
 9 a vtable !?! from HTMLToken.
 10
 11 This paves the way for further cleanup of HTMLToken now that we
 12 can see the whole object at once.
 13 We'll also probably re-create an HTMLToken.cpp again, now that we're
 14 free from the chains of template nonsense.
 15
 16 * GNUmakefile.list.am:
 17 * Target.pri:
 18 * WebCore.gypi:
 19 * WebCore.vcproj/WebCore.vcproj:
 20 * WebCore.vcxproj/WebCore.vcxproj:
 21 * WebCore.vcxproj/WebCore.vcxproj.filters:
 22 * WebCore.xcodeproj/project.pbxproj:
 23 * html/parser/HTMLToken.h:
 24 (WebCore::findAttributeInVector):
 25 (WebCore):
 26 (HTMLToken):
 27 (Attribute):
 28 (Range):
 29 (WebCore::HTMLToken::HTMLToken):
 30 (WebCore::HTMLToken::clear):
 31 (WebCore::HTMLToken::isUninitialized):
 32 (WebCore::HTMLToken::type):
 33 (WebCore::HTMLToken::makeEndOfFile):
 34 (WebCore::HTMLToken::startIndex):
 35 (WebCore::HTMLToken::endIndex):
 36 (WebCore::HTMLToken::setBaseOffset):
 37 (WebCore::HTMLToken::end):
 38 (WebCore::HTMLToken::data):
 39 (WebCore::HTMLToken::isAll8BitData):
 40 (WebCore::HTMLToken::name):
 41 (WebCore::HTMLToken::appendToName):
 42 (WebCore::HTMLToken::nameString):
 43 (WebCore::HTMLToken::selfClosing):
 44 (WebCore::HTMLToken::setSelfClosing):
 45 (WebCore::HTMLToken::beginStartTag):
 46 (WebCore::HTMLToken::beginEndTag):
 47 (WebCore::HTMLToken::addNewAttribute):
 48 (WebCore::HTMLToken::beginAttributeName):
 49 (WebCore::HTMLToken::endAttributeName):
 50 (WebCore::HTMLToken::beginAttributeValue):
 51 (WebCore::HTMLToken::endAttributeValue):
 52 (WebCore::HTMLToken::appendToAttributeName):
 53 (WebCore::HTMLToken::appendToAttributeValue):
 54 (WebCore::HTMLToken::attributes):
 55 (WebCore::HTMLToken::eraseValueOfAttribute):
 56 (WebCore::HTMLToken::ensureIsCharacterToken):
 57 (WebCore::HTMLToken::characters):
 58 (WebCore::HTMLToken::appendToCharacter):
 59 (WebCore::HTMLToken::comment):
 60 (WebCore::HTMLToken::beginComment):
 61 (WebCore::HTMLToken::appendToComment):
 62 (WebCore::HTMLToken::eraseCharacters):
 63 * html/parser/HTMLTokenTypes.h:
 64 * html/parser/XSSAuditor.h:
 65 * xml/parser/MarkupTokenBase.h: Removed.
 66
 672013-02-11 Eric Seidel <eric@webkit.org>
 68
369 Make WebVTTTokenizer stop inheriting from MarkupTokenizerBase
470 https://bugs.webkit.org/show_bug.cgi?id=109411
571

Source/WebCore/GNUmakefile.list.am

@@webcore_sources += \
47094709 Source/WebCore/workers/WorkerThread.cpp \
47104710 Source/WebCore/workers/WorkerThread.h \
47114711 Source/WebCore/xml/parser/CharacterReferenceParserInlines.h \
4712  Source/WebCore/xml/parser/MarkupTokenBase.h \
47134712 Source/WebCore/xml/parser/MarkupTokenizerBase.h \
47144713 Source/WebCore/xml/parser/MarkupTokenizerInlines.h \
47154714 Source/WebCore/xml/parser/XMLDocumentParser.cpp \

Source/WebCore/Target.pri

@@HEADERS += \
28502850 workers/WorkerScriptLoader.h \
28512851 workers/WorkerThread.h \
28522852 xml/parser/CharacterReferenceParserInlines.h \
2853  xml/parser/MarkupTokenBase.h \
28542853 xml/parser/MarkupTokenizerBase.h \
28552854 xml/parser/MarkupTokenizerInlines.h \
28562855 xml/parser/XMLDocumentParser.h \

Source/WebCore/WebCore.gypi

26702670 'workers/chromium/WorkerContextProxyChromium.cpp',
26712671 'workers/chromium/WorkerContextProxyChromium.h',
26722672 'xml/parser/CharacterReferenceParserInlines.h',
2673  'xml/parser/MarkupTokenBase.h',
26742673 'xml/parser/MarkupTokenizerBase.h',
26752674 'xml/parser/MarkupTokenizerInlines.h',
26762675 'xml/parser/XMLDocumentParser.cpp',

Source/WebCore/WebCore.vcproj/WebCore.vcproj

4909049090 Name="parser"
4909149091 >
4909249092 <File
49093  RelativePath="..\xml\parser\MarkupTokenBase.h"
49094  >
49095  </File>
49096  <File
4909749093 RelativePath="..\xml\parser\MarkupTokenizerBase.h"
4909849094 >
4909949095 </File>

Source/WebCore/WebCore.vcxproj/WebCore.vcxproj

75137513 <ClInclude Include="..\xml\XSLTExtensions.h" />
75147514 <ClInclude Include="..\xml\XSLTProcessor.h" />
75157515 <ClInclude Include="..\xml\XSLTUnicodeSort.h" />
7516  <ClInclude Include="..\xml\parser\MarkupTokenBase.h" />
75177516 <ClInclude Include="..\xml\parser\MarkupTokenizerBase.h" />
75187517 <ClInclude Include="..\xml\parser\MarkupTokenizerInlines.h" />
75197518 <ClInclude Include="..\xml\parser\XMLDocumentParser.h" />

Source/WebCore/WebCore.vcxproj/WebCore.vcxproj.filters

1131311313 <ClInclude Include="..\xml\XSLTUnicodeSort.h">
1131411314 <Filter>xml</Filter>
1131511315 </ClInclude>
11316  <ClInclude Include="..\xml\parser\MarkupTokenBase.h">
11317  <Filter>xml\parser</Filter>
11318  </ClInclude>
1131911316 <ClInclude Include="..\xml\parser\MarkupTokenizerBase.h">
1132011317 <Filter>xml\parser</Filter>
1132111318 </ClInclude>

Source/WebCore/WebCore.xcodeproj/project.pbxproj

4848 0014628A103CD1DE000B20DB /* OriginAccessEntry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00146288103CD1DE000B20DB /* OriginAccessEntry.cpp */; };
4949 0014628B103CD1DE000B20DB /* OriginAccessEntry.h in Headers */ = {isa = PBXBuildFile; fileRef = 00146289103CD1DE000B20DB /* OriginAccessEntry.h */; };
5050 003F1FEA11E6AB43008258D9 /* UserContentTypes.h in Headers */ = {isa = PBXBuildFile; fileRef = 003F1FE911E6AB43008258D9 /* UserContentTypes.h */; settings = {ATTRIBUTES = (Private, ); }; };
51  00A629C113D0BEC70050AC52 /* MarkupTokenBase.h in Headers */ = {isa = PBXBuildFile; fileRef = 00A629C013D0BEC70050AC52 /* MarkupTokenBase.h */; };
5251 00B9318713BA8DB30035A948 /* XMLDocumentParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00B9318113BA867F0035A948 /* XMLDocumentParser.cpp */; };
5352 00B9318813BA8DBA0035A948 /* XMLDocumentParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 00B9318213BA867F0035A948 /* XMLDocumentParser.h */; };
5453 00B9318913BA8DBC0035A948 /* XMLDocumentParserLibxml2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00B9318313BA867F0035A948 /* XMLDocumentParserLibxml2.cpp */; };

72947293 00146288103CD1DE000B20DB /* OriginAccessEntry.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = OriginAccessEntry.cpp; sourceTree = "<group>"; };
72957294 00146289103CD1DE000B20DB /* OriginAccessEntry.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = OriginAccessEntry.h; sourceTree = "<group>"; };
72967295 003F1FE911E6AB43008258D9 /* UserContentTypes.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserContentTypes.h; sourceTree = "<group>"; };
7297  00A629C013D0BEC70050AC52 /* MarkupTokenBase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MarkupTokenBase.h; sourceTree = "<group>"; };
72987296 00B9318113BA867F0035A948 /* XMLDocumentParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = XMLDocumentParser.cpp; sourceTree = "<group>"; };
72997297 00B9318213BA867F0035A948 /* XMLDocumentParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = XMLDocumentParser.h; sourceTree = "<group>"; };
73007298 00B9318313BA867F0035A948 /* XMLDocumentParserLibxml2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = XMLDocumentParserLibxml2.cpp; sourceTree = "<group>"; };

1457114569 isa = PBXGroup;
1457214570 children = (
1457314571 97B8FFCF16AE7F920038388D /* CharacterReferenceParserInlines.h */,
14574  00A629C013D0BEC70050AC52 /* MarkupTokenBase.h */,
1457514572 00C60E4113D797AE0092A275 /* MarkupTokenizerBase.h */,
1457614573 00C60E3E13D76D7E0092A275 /* MarkupTokenizerInlines.h */,
1457714574 00B9318113BA867F0035A948 /* XMLDocumentParser.cpp */,

2530225299 1A8F6BC60DB55CDC001DB794 /* ManifestParser.h in Headers */,
2530325300 93309DF8099E64920056E581 /* markup.h in Headers */,
2530425301 9728C3141268E4390041E89B /* MarkupAccumulator.h in Headers */,
25305  00A629C113D0BEC70050AC52 /* MarkupTokenBase.h in Headers */,
2530625302 00C60E4213D797AE0092A275 /* MarkupTokenizerBase.h in Headers */,
2530725303 00C60E3F13D76D7E0092A275 /* MarkupTokenizerInlines.h in Headers */,
2530825304 FABE72F51059C1EB00D999DD /* MathMLElement.h in Headers */,

Source/WebCore/html/parser/HTMLToken.h

11/*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
33 *
44 * Redistribution and use in source and binary forms, with or without
55 * modification, are permitted provided that the following conditions

2626#ifndef HTMLToken_h
2727#define HTMLToken_h
2828
 29#include "Attribute.h"
2930#include "CompactHTMLToken.h"
3031#include "HTMLTokenTypes.h"
31 #include "MarkupTokenBase.h"
3232#include <wtf/RefCounted.h>
3333#include <wtf/RefPtr.h>
3434

@@public:
5252 bool m_forceQuirks;
5353};
5454
55 class HTMLToken : public MarkupTokenBase<HTMLTokenTypes> {
 55static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name)
 56{
 57 for (unsigned i = 0; i < attributes.size(); ++i) {
 58 if (attributes.at(i).name().matches(name))
 59 return &attributes.at(i);
 60 }
 61 return 0;
 62}
 63
 64class HTMLToken {
 65 WTF_MAKE_NONCOPYABLE(HTMLToken);
 66 WTF_MAKE_FAST_ALLOCATED;
5667public:
57  void appendToName(UChar character)
 68 typedef HTMLTokenTypes Type;
 69
 70 class Attribute {
 71 public:
 72 class Range {
 73 public:
 74 int m_start;
 75 int m_end;
 76 };
 77
 78 Range m_nameRange;
 79 Range m_valueRange;
 80 WTF::Vector<UChar, 32> m_name;
 81 WTF::Vector<UChar, 32> m_value;
 82 };
 83
 84 typedef WTF::Vector<Attribute, 10> AttributeList;
 85 typedef WTF::Vector<UChar, 1024> DataVector;
 86
 87 HTMLToken() { clear(); }
 88
 89 void clear()
5890 {
59  ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag || m_type == HTMLTokenTypes::DOCTYPE);
60  MarkupTokenBase<HTMLTokenTypes>::appendToName(character);
 91 m_type = Type::Uninitialized;
 92 m_range.m_start = 0;
 93 m_range.m_end = 0;
 94 m_baseOffset = 0;
 95 m_data.clear();
 96 m_orAllData = 0;
 97 }
 98
 99 bool isUninitialized() { return m_type == HTMLTokenTypes::Uninitialized; }
 100 typename HTMLTokenTypes::Type type() const { return m_type; }
 101
 102 void makeEndOfFile()
 103 {
 104 ASSERT(m_type == HTMLTokenTypes::Uninitialized);
 105 m_type = HTMLTokenTypes::EndOfFile;
 106 }
 107
 108 /* Range and offset methods exposed for HTMLSourceTracker and HTMLViewSourceParser */
 109 int startIndex() const { return m_range.m_start; }
 110 int endIndex() const { return m_range.m_end; }
 111
 112 void setBaseOffset(int offset)
 113 {
 114 m_baseOffset = offset;
 115 }
 116
 117 void end(int endOffset)
 118 {
 119 m_range.m_end = endOffset - m_baseOffset;
 120 }
 121
 122 const DataVector& data() const
 123 {
 124 ASSERT(m_type == HTMLTokenTypes::Character || m_type == HTMLTokenTypes::Comment || m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 125 return m_data;
 126 }
 127
 128 bool isAll8BitData() const
 129 {
 130 return (m_orAllData <= 0xff);
61131 }
62132
63133 const DataVector& name() const
64134 {
65135 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag || m_type == HTMLTokenTypes::DOCTYPE);
66  return MarkupTokenBase<HTMLTokenTypes>::name();
 136 return m_data;
 137 }
 138
 139 void appendToName(UChar character)
 140 {
 141 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag || m_type == HTMLTokenTypes::DOCTYPE);
 142 ASSERT(character);
 143 m_data.append(character);
 144 m_orAllData |= character;
 145 }
 146
 147 // FIXME: Rename this to copyNameAsString().
 148 String nameString() const
 149 {
 150 if (!m_data.size())
 151 return emptyString();
 152 if (isAll8BitData())
 153 return String::make8BitFrom16BitSource(m_data.data(), m_data.size());
 154 return String(m_data.data(), m_data.size());
67155 }
68156
 157 /* DOCTYPE Tokens */
 158
69159 bool forceQuirks() const
70160 {
71161 ASSERT(m_type == HTMLTokenTypes::DOCTYPE);

@@public:
142232 return m_doctypeData.release();
143233 }
144234
 235 /* Start/End Tag Tokens */
 236
 237 bool selfClosing() const
 238 {
 239 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 240 return m_selfClosing;
 241 }
 242
 243 void setSelfClosing()
 244 {
 245 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 246 m_selfClosing = true;
 247 }
 248
 249 void beginStartTag(UChar character)
 250 {
 251 ASSERT(character);
 252 ASSERT(m_type == HTMLTokenTypes::Uninitialized);
 253 m_type = HTMLTokenTypes::StartTag;
 254 m_selfClosing = false;
 255 m_currentAttribute = 0;
 256 m_attributes.clear();
 257
 258 m_data.append(character);
 259 m_orAllData |= character;
 260 }
 261
 262 void beginEndTag(LChar character)
 263 {
 264 ASSERT(m_type == HTMLTokenTypes::Uninitialized);
 265 m_type = HTMLTokenTypes::EndTag;
 266 m_selfClosing = false;
 267 m_currentAttribute = 0;
 268 m_attributes.clear();
 269
 270 m_data.append(character);
 271 }
 272
 273 void beginEndTag(const Vector<LChar, 32>& characters)
 274 {
 275 ASSERT(m_type == HTMLTokenTypes::Uninitialized);
 276 m_type = HTMLTokenTypes::EndTag;
 277 m_selfClosing = false;
 278 m_currentAttribute = 0;
 279 m_attributes.clear();
 280
 281 m_data.appendVector(characters);
 282 }
 283
 284 void addNewAttribute()
 285 {
 286 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 287 m_attributes.grow(m_attributes.size() + 1);
 288 m_currentAttribute = &m_attributes.last();
 289#ifndef NDEBUG
 290 m_currentAttribute->m_nameRange.m_start = 0;
 291 m_currentAttribute->m_nameRange.m_end = 0;
 292 m_currentAttribute->m_valueRange.m_start = 0;
 293 m_currentAttribute->m_valueRange.m_end = 0;
 294#endif
 295 }
 296
 297 void beginAttributeName(int offset)
 298 {
 299 m_currentAttribute->m_nameRange.m_start = offset - m_baseOffset;
 300 }
 301
 302 void endAttributeName(int offset)
 303 {
 304 int index = offset - m_baseOffset;
 305 m_currentAttribute->m_nameRange.m_end = index;
 306 m_currentAttribute->m_valueRange.m_start = index;
 307 m_currentAttribute->m_valueRange.m_end = index;
 308 }
 309
 310 void beginAttributeValue(int offset)
 311 {
 312 m_currentAttribute->m_valueRange.m_start = offset - m_baseOffset;
 313#ifndef NDEBUG
 314 m_currentAttribute->m_valueRange.m_end = 0;
 315#endif
 316 }
 317
 318 void endAttributeValue(int offset)
 319 {
 320 m_currentAttribute->m_valueRange.m_end = offset - m_baseOffset;
 321 }
 322
 323 void appendToAttributeName(UChar character)
 324 {
 325 ASSERT(character);
 326 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 327 // FIXME: We should be able to add the following ASSERT once we fix
 328 // https://bugs.webkit.org/show_bug.cgi?id=62971
 329 // ASSERT(m_currentAttribute->m_nameRange.m_start);
 330 m_currentAttribute->m_name.append(character);
 331 }
 332
 333 void appendToAttributeValue(UChar character)
 334 {
 335 ASSERT(character);
 336 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 337 ASSERT(m_currentAttribute->m_valueRange.m_start);
 338 m_currentAttribute->m_value.append(character);
 339 }
 340
 341 void appendToAttributeValue(size_t i, const String& value)
 342 {
 343 ASSERT(!value.isEmpty());
 344 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 345 m_attributes[i].m_value.append(value.characters(), value.length());
 346 }
 347
 348 const AttributeList& attributes() const
 349 {
 350 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 351 return m_attributes;
 352 }
 353
 354 // Used by the XSSAuditor to nuke XSS-laden attributes.
 355 void eraseValueOfAttribute(size_t i)
 356 {
 357 ASSERT(m_type == HTMLTokenTypes::StartTag || m_type == HTMLTokenTypes::EndTag);
 358 m_attributes[i].m_value.clear();
 359 }
 360
 361 /* Character Tokens */
 362
 363 // Starting a character token works slightly differently than starting
 364 // other types of tokens because we want to save a per-character branch.
 365 void ensureIsCharacterToken()
 366 {
 367 ASSERT(m_type == HTMLTokenTypes::Uninitialized || m_type == HTMLTokenTypes::Character);
 368 m_type = HTMLTokenTypes::Character;
 369 }
 370
 371 const DataVector& characters() const
 372 {
 373 ASSERT(m_type == HTMLTokenTypes::Character);
 374 return m_data;
 375 }
 376
 377 void appendToCharacter(char character)
 378 {
 379 ASSERT(m_type == HTMLTokenTypes::Character);
 380 m_data.append(character);
 381 }
 382
 383 void appendToCharacter(UChar character)
 384 {
 385 ASSERT(m_type == HTMLTokenTypes::Character);
 386 m_data.append(character);
 387 m_orAllData |= character;
 388 }
 389
 390 void appendToCharacter(const Vector<LChar, 32>& characters)
 391 {
 392 ASSERT(m_type == HTMLTokenTypes::Character);
 393 m_data.appendVector(characters);
 394 }
 395
 396 /* Comment Tokens */
 397
 398 const DataVector& comment() const
 399 {
 400 ASSERT(m_type == HTMLTokenTypes::Comment);
 401 return m_data;
 402 }
 403
 404 void beginComment()
 405 {
 406 ASSERT(m_type == HTMLTokenTypes::Uninitialized);
 407 m_type = HTMLTokenTypes::Comment;
 408 }
 409
 410 void appendToComment(UChar character)
 411 {
 412 ASSERT(character);
 413 ASSERT(m_type == HTMLTokenTypes::Comment);
 414 m_data.append(character);
 415 m_orAllData |= character;
 416 }
 417
 418 void eraseCharacters()
 419 {
 420 ASSERT(m_type == HTMLTokenTypes::Character);
 421 m_data.clear();
 422 m_orAllData = 0;
 423 }
 424
145425private:
 426 typename HTMLTokenTypes::Type m_type;
 427 typename Attribute::Range m_range; // Always starts at zero.
 428 int m_baseOffset;
 429 DataVector m_data;
 430 UChar m_orAllData;
 431
 432 // For StartTag and EndTag
 433 bool m_selfClosing;
 434 AttributeList m_attributes;
 435
 436 // A pointer into m_attributes used during lexing.
 437 Attribute* m_currentAttribute;
 438
146439 // For DOCTYPE
147440 OwnPtr<DoctypeData> m_doctypeData;
148441};

Source/WebCore/html/parser/HTMLTokenTypes.h

2626#ifndef HTMLTokenTypes_h
2727#define HTMLTokenTypes_h
2828
29 #include "MarkupTokenBase.h"
30 #include <wtf/Noncopyable.h>
31 
3229namespace WebCore {
3330
3431class HTMLTokenTypes {

Source/WebCore/html/parser/XSSAuditor.h

2828
2929#include "HTMLToken.h"
3030#include "HTTPParsers.h"
 31#include "KURL.h"
3132#include "SuffixTree.h"
3233#include "TextEncoding.h"
3334#include <wtf/PassOwnPtr.h>

Source/WebCore/xml/parser/MarkupTokenBase.h

1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All Rights Reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #ifndef MarkupTokenBase_h
28 #define MarkupTokenBase_h
29 
30 #include "ElementAttributeData.h"
31 #include <wtf/Vector.h>
32 
33 #ifndef NDEBUG
34 #include <stdio.h>
35 #endif
36 
37 namespace WebCore {
38 
39 static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name)
40 {
41  for (unsigned i = 0; i < attributes.size(); ++i) {
42  if (attributes.at(i).name().matches(name))
43  return &attributes.at(i);
44  }
45  return 0;
46 }
47 
48 template<typename TypeSet>
49 class MarkupTokenBase {
50  WTF_MAKE_NONCOPYABLE(MarkupTokenBase);
51  WTF_MAKE_FAST_ALLOCATED;
52 public:
53  typedef TypeSet Type;
54 
55  class Attribute {
56  public:
57  class Range {
58  public:
59  int m_start;
60  int m_end;
61  };
62 
63  Range m_nameRange;
64  Range m_valueRange;
65  WTF::Vector<UChar, 32> m_name;
66  WTF::Vector<UChar, 32> m_value;
67  };
68 
69  typedef WTF::Vector<Attribute, 10> AttributeList;
70  typedef WTF::Vector<UChar, 1024> DataVector;
71 
72  MarkupTokenBase() { clear(); }
73  virtual ~MarkupTokenBase() { }
74 
75  virtual void clear()
76  {
77  m_type = TypeSet::Uninitialized;
78  m_range.m_start = 0;
79  m_range.m_end = 0;
80  m_baseOffset = 0;
81  m_data.clear();
82  m_orAllData = 0;
83  }
84 
85  bool isUninitialized() { return m_type == TypeSet::Uninitialized; }
86 
87  int startIndex() const { return m_range.m_start; }
88  int endIndex() const { return m_range.m_end; }
89 
90  void setBaseOffset(int offset)
91  {
92  m_baseOffset = offset;
93  }
94 
95  void end(int endOffset)
96  {
97  m_range.m_end = endOffset - m_baseOffset;
98  }
99 
100  void makeEndOfFile()
101  {
102  ASSERT(m_type == TypeSet::Uninitialized);
103  m_type = TypeSet::EndOfFile;
104  }
105 
106  void beginStartTag(UChar character)
107  {
108  ASSERT(character);
109  ASSERT(m_type == TypeSet::Uninitialized);
110  m_type = TypeSet::StartTag;
111  m_selfClosing = false;
112  m_currentAttribute = 0;
113  m_attributes.clear();
114 
115  m_data.append(character);
116  m_orAllData |= character;
117  }
118 
119  void beginEndTag(LChar character)
120  {
121  ASSERT(m_type == TypeSet::Uninitialized);
122  m_type = TypeSet::EndTag;
123  m_selfClosing = false;
124  m_currentAttribute = 0;
125  m_attributes.clear();
126 
127  m_data.append(character);
128  }
129 
130  void beginEndTag(const Vector<LChar, 32>& characters)
131  {
132  ASSERT(m_type == TypeSet::Uninitialized);
133  m_type = TypeSet::EndTag;
134  m_selfClosing = false;
135  m_currentAttribute = 0;
136  m_attributes.clear();
137 
138  m_data.appendVector(characters);
139  }
140 
141  // Starting a character token works slightly differently than starting
142  // other types of tokens because we want to save a per-character branch.
143  void ensureIsCharacterToken()
144  {
145  ASSERT(m_type == TypeSet::Uninitialized || m_type == TypeSet::Character);
146  m_type = TypeSet::Character;
147  }
148 
149  void beginComment()
150  {
151  ASSERT(m_type == TypeSet::Uninitialized);
152  m_type = TypeSet::Comment;
153  }
154 
155  void appendToCharacter(char character)
156  {
157  ASSERT(m_type == TypeSet::Character);
158  m_data.append(character);
159  }
160 
161  void appendToCharacter(UChar character)
162  {
163  ASSERT(m_type == TypeSet::Character);
164  m_data.append(character);
165  m_orAllData |= character;
166  }
167 
168  void appendToCharacter(const Vector<LChar, 32>& characters)
169  {
170  ASSERT(m_type == TypeSet::Character);
171  m_data.appendVector(characters);
172  }
173 
174  void appendToComment(UChar character)
175  {
176  ASSERT(character);
177  ASSERT(m_type == TypeSet::Comment);
178  m_data.append(character);
179  m_orAllData |= character;
180  }
181 
182  void addNewAttribute()
183  {
184  ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
185  m_attributes.grow(m_attributes.size() + 1);
186  m_currentAttribute = &m_attributes.last();
187 #ifndef NDEBUG
188  m_currentAttribute->m_nameRange.m_start = 0;
189  m_currentAttribute->m_nameRange.m_end = 0;
190  m_currentAttribute->m_valueRange.m_start = 0;
191  m_currentAttribute->m_valueRange.m_end = 0;
192 #endif
193  }
194 
195  void beginAttributeName(int offset)
196  {
197  m_currentAttribute->m_nameRange.m_start = offset - m_baseOffset;
198  }
199 
200  void endAttributeName(int offset)
201  {
202  int index = offset - m_baseOffset;
203  m_currentAttribute->m_nameRange.m_end = index;
204  m_currentAttribute->m_valueRange.m_start = index;
205  m_currentAttribute->m_valueRange.m_end = index;
206  }
207 
208  void beginAttributeValue(int offset)
209  {
210  m_currentAttribute->m_valueRange.m_start = offset - m_baseOffset;
211 #ifndef NDEBUG
212  m_currentAttribute->m_valueRange.m_end = 0;
213 #endif
214  }
215 
216  void endAttributeValue(int offset)
217  {
218  m_currentAttribute->m_valueRange.m_end = offset - m_baseOffset;
219  }
220 
221  void appendToAttributeName(UChar character)
222  {
223  ASSERT(character);
224  ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
225  // FIXME: We should be able to add the following ASSERT once we fix
226  // https://bugs.webkit.org/show_bug.cgi?id=62971
227  // ASSERT(m_currentAttribute->m_nameRange.m_start);
228  m_currentAttribute->m_name.append(character);
229  }
230 
231  void appendToAttributeValue(UChar character)
232  {
233  ASSERT(character);
234  ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
235  ASSERT(m_currentAttribute->m_valueRange.m_start);
236  m_currentAttribute->m_value.append(character);
237  }
238 
239  void appendToAttributeValue(size_t i, const String& value)
240  {
241  ASSERT(!value.isEmpty());
242  ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
243  m_attributes[i].m_value.append(value.characters(), value.length());
244  }
245 
246  typename Type::Type type() const { return m_type; }
247 
248  bool selfClosing() const
249  {
250  ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
251  return m_selfClosing;
252  }
253 
254  void setSelfClosing()
255  {
256  ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
257  m_selfClosing = true;
258  }
259 
260  const AttributeList& attributes() const
261  {
262  ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
263  return m_attributes;
264  }
265 
266  void eraseCharacters()
267  {
268  ASSERT(m_type == TypeSet::Character);
269  m_data.clear();
270  m_orAllData = 0;
271  }
272 
273  void eraseValueOfAttribute(size_t i)
274  {
275  ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
276  m_attributes[i].m_value.clear();
277  }
278 
279  const DataVector& characters() const
280  {
281  ASSERT(m_type == TypeSet::Character);
282  return m_data;
283  }
284 
285  const DataVector& comment() const
286  {
287  ASSERT(m_type == TypeSet::Comment);
288  return m_data;
289  }
290 
291  const DataVector& data() const
292  {
293  ASSERT(m_type == TypeSet::Character || m_type == TypeSet::Comment || m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
294  return m_data;
295  }
296 
297  bool isAll8BitData() const
298  {
299  return (m_orAllData <= 0xff);
300  }
301 
302  const DataVector& name() const
303  {
304  return m_data;
305  }
306 
307  String nameString() const
308  {
309  if (!m_data.size())
310  return emptyString();
311  if (isAll8BitData())
312  return String::make8BitFrom16BitSource(m_data.data(), m_data.size());
313  return String(m_data.data(), m_data.size());
314  }
315 
316 protected:
317 
318 #ifndef NDEBUG
319  void printString(const DataVector& string) const
320  {
321  DataVector::const_iterator iter = string.begin();
322  for (; iter != string.end(); ++iter)
323  fprintf(stderr, "%lc", wchar_t(*iter));
324  }
325 #endif // NDEBUG
326 
327  void appendToName(UChar character)
328  {
329  ASSERT(character);
330  m_data.append(character);
331  m_orAllData |= character;
332  }
333 
334  typename Type::Type m_type;
335  typename Attribute::Range m_range; // Always starts at zero.
336  int m_baseOffset;
337  DataVector m_data;
338  UChar m_orAllData;
339 
340  // For StartTag and EndTag
341  bool m_selfClosing;
342  AttributeList m_attributes;
343 
344  // A pointer into m_attributes used during lexing.
345  Attribute* m_currentAttribute;
346 };
347 
348 }
349 
350 #endif // MarkupTokenBase_h