1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All Rights Reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifndef MarkupTokenBase_h
28 #define MarkupTokenBase_h
29
30 #include "ElementAttributeData.h"
31 #include <wtf/Vector.h>
32
33 #ifndef NDEBUG
34 #include <stdio.h>
35 #endif
36
37 namespace WebCore {
38
39 static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name)
40 {
41 for (unsigned i = 0; i < attributes.size(); ++i) {
42 if (attributes.at(i).name().matches(name))
43 return &attributes.at(i);
44 }
45 return 0;
46 }
47
48 template<typename TypeSet>
49 class MarkupTokenBase {
50 WTF_MAKE_NONCOPYABLE(MarkupTokenBase);
51 WTF_MAKE_FAST_ALLOCATED;
52 public:
53 typedef TypeSet Type;
54
55 class Attribute {
56 public:
57 class Range {
58 public:
59 int m_start;
60 int m_end;
61 };
62
63 Range m_nameRange;
64 Range m_valueRange;
65 WTF::Vector<UChar, 32> m_name;
66 WTF::Vector<UChar, 32> m_value;
67 };
68
69 typedef WTF::Vector<Attribute, 10> AttributeList;
70 typedef WTF::Vector<UChar, 1024> DataVector;
71
72 MarkupTokenBase() { clear(); }
73 virtual ~MarkupTokenBase() { }
74
75 virtual void clear()
76 {
77 m_type = TypeSet::Uninitialized;
78 m_range.m_start = 0;
79 m_range.m_end = 0;
80 m_baseOffset = 0;
81 m_data.clear();
82 m_orAllData = 0;
83 }
84
85 bool isUninitialized() { return m_type == TypeSet::Uninitialized; }
86
87 int startIndex() const { return m_range.m_start; }
88 int endIndex() const { return m_range.m_end; }
89
90 void setBaseOffset(int offset)
91 {
92 m_baseOffset = offset;
93 }
94
95 void end(int endOffset)
96 {
97 m_range.m_end = endOffset - m_baseOffset;
98 }
99
100 void makeEndOfFile()
101 {
102 ASSERT(m_type == TypeSet::Uninitialized);
103 m_type = TypeSet::EndOfFile;
104 }
105
106 void beginStartTag(UChar character)
107 {
108 ASSERT(character);
109 ASSERT(m_type == TypeSet::Uninitialized);
110 m_type = TypeSet::StartTag;
111 m_selfClosing = false;
112 m_currentAttribute = 0;
113 m_attributes.clear();
114
115 m_data.append(character);
116 m_orAllData |= character;
117 }
118
119 void beginEndTag(LChar character)
120 {
121 ASSERT(m_type == TypeSet::Uninitialized);
122 m_type = TypeSet::EndTag;
123 m_selfClosing = false;
124 m_currentAttribute = 0;
125 m_attributes.clear();
126
127 m_data.append(character);
128 }
129
130 void beginEndTag(const Vector<LChar, 32>& characters)
131 {
132 ASSERT(m_type == TypeSet::Uninitialized);
133 m_type = TypeSet::EndTag;
134 m_selfClosing = false;
135 m_currentAttribute = 0;
136 m_attributes.clear();
137
138 m_data.appendVector(characters);
139 }
140
141 // Starting a character token works slightly differently than starting
142 // other types of tokens because we want to save a per-character branch.
143 void ensureIsCharacterToken()
144 {
145 ASSERT(m_type == TypeSet::Uninitialized || m_type == TypeSet::Character);
146 m_type = TypeSet::Character;
147 }
148
149 void beginComment()
150 {
151 ASSERT(m_type == TypeSet::Uninitialized);
152 m_type = TypeSet::Comment;
153 }
154
155 void appendToCharacter(char character)
156 {
157 ASSERT(m_type == TypeSet::Character);
158 m_data.append(character);
159 }
160
161 void appendToCharacter(UChar character)
162 {
163 ASSERT(m_type == TypeSet::Character);
164 m_data.append(character);
165 m_orAllData |= character;
166 }
167
168 void appendToCharacter(const Vector<LChar, 32>& characters)
169 {
170 ASSERT(m_type == TypeSet::Character);
171 m_data.appendVector(characters);
172 }
173
174 void appendToComment(UChar character)
175 {
176 ASSERT(character);
177 ASSERT(m_type == TypeSet::Comment);
178 m_data.append(character);
179 m_orAllData |= character;
180 }
181
182 void addNewAttribute()
183 {
184 ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
185 m_attributes.grow(m_attributes.size() + 1);
186 m_currentAttribute = &m_attributes.last();
187 #ifndef NDEBUG
188 m_currentAttribute->m_nameRange.m_start = 0;
189 m_currentAttribute->m_nameRange.m_end = 0;
190 m_currentAttribute->m_valueRange.m_start = 0;
191 m_currentAttribute->m_valueRange.m_end = 0;
192 #endif
193 }
194
195 void beginAttributeName(int offset)
196 {
197 m_currentAttribute->m_nameRange.m_start = offset - m_baseOffset;
198 }
199
200 void endAttributeName(int offset)
201 {
202 int index = offset - m_baseOffset;
203 m_currentAttribute->m_nameRange.m_end = index;
204 m_currentAttribute->m_valueRange.m_start = index;
205 m_currentAttribute->m_valueRange.m_end = index;
206 }
207
208 void beginAttributeValue(int offset)
209 {
210 m_currentAttribute->m_valueRange.m_start = offset - m_baseOffset;
211 #ifndef NDEBUG
212 m_currentAttribute->m_valueRange.m_end = 0;
213 #endif
214 }
215
216 void endAttributeValue(int offset)
217 {
218 m_currentAttribute->m_valueRange.m_end = offset - m_baseOffset;
219 }
220
221 void appendToAttributeName(UChar character)
222 {
223 ASSERT(character);
224 ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
225 // FIXME: We should be able to add the following ASSERT once we fix
226 // https://bugs.webkit.org/show_bug.cgi?id=62971
227 // ASSERT(m_currentAttribute->m_nameRange.m_start);
228 m_currentAttribute->m_name.append(character);
229 }
230
231 void appendToAttributeValue(UChar character)
232 {
233 ASSERT(character);
234 ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
235 ASSERT(m_currentAttribute->m_valueRange.m_start);
236 m_currentAttribute->m_value.append(character);
237 }
238
239 void appendToAttributeValue(size_t i, const String& value)
240 {
241 ASSERT(!value.isEmpty());
242 ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
243 m_attributes[i].m_value.append(value.characters(), value.length());
244 }
245
246 typename Type::Type type() const { return m_type; }
247
248 bool selfClosing() const
249 {
250 ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
251 return m_selfClosing;
252 }
253
254 void setSelfClosing()
255 {
256 ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
257 m_selfClosing = true;
258 }
259
260 const AttributeList& attributes() const
261 {
262 ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
263 return m_attributes;
264 }
265
266 void eraseCharacters()
267 {
268 ASSERT(m_type == TypeSet::Character);
269 m_data.clear();
270 m_orAllData = 0;
271 }
272
273 void eraseValueOfAttribute(size_t i)
274 {
275 ASSERT(m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
276 m_attributes[i].m_value.clear();
277 }
278
279 const DataVector& characters() const
280 {
281 ASSERT(m_type == TypeSet::Character);
282 return m_data;
283 }
284
285 const DataVector& comment() const
286 {
287 ASSERT(m_type == TypeSet::Comment);
288 return m_data;
289 }
290
291 const DataVector& data() const
292 {
293 ASSERT(m_type == TypeSet::Character || m_type == TypeSet::Comment || m_type == TypeSet::StartTag || m_type == TypeSet::EndTag);
294 return m_data;
295 }
296
297 bool isAll8BitData() const
298 {
299 return (m_orAllData <= 0xff);
300 }
301
302 const DataVector& name() const
303 {
304 return m_data;
305 }
306
307 String nameString() const
308 {
309 if (!m_data.size())
310 return emptyString();
311 if (isAll8BitData())
312 return String::make8BitFrom16BitSource(m_data.data(), m_data.size());
313 return String(m_data.data(), m_data.size());
314 }
315
316 protected:
317
318 #ifndef NDEBUG
319 void printString(const DataVector& string) const
320 {
321 DataVector::const_iterator iter = string.begin();
322 for (; iter != string.end(); ++iter)
323 fprintf(stderr, "%lc", wchar_t(*iter));
324 }
325 #endif // NDEBUG
326
327 void appendToName(UChar character)
328 {
329 ASSERT(character);
330 m_data.append(character);
331 m_orAllData |= character;
332 }
333
334 typename Type::Type m_type;
335 typename Attribute::Range m_range; // Always starts at zero.
336 int m_baseOffset;
337 DataVector m_data;
338 UChar m_orAllData;
339
340 // For StartTag and EndTag
341 bool m_selfClosing;
342 AttributeList m_attributes;
343
344 // A pointer into m_attributes used during lexing.
345 Attribute* m_currentAttribute;
346 };
347
348 }
349
350 #endif // MarkupTokenBase_h