-
Notifications
You must be signed in to change notification settings - Fork 70
/
Copy pathHTMLTokenizer.h
141 lines (93 loc) · 4.27 KB
/
HTMLTokenizer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// HTMLTokenizer.h
//
// Public domain. https://github.com/nolanw/HTMLReader
#import <Foundation/Foundation.h>
#import "HTMLOrderedDictionary.h"
#import "HTMLParser.h"
#import "HTMLTokenizerState.h"
/**
An HTMLTokenizer emits tokens derived from a string of HTML.
For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
*/
@interface HTMLTokenizer : NSEnumerator
/// Initializes a tokenizer.
- (instancetype)initWithString:(NSString *)string NS_DESIGNATED_INITIALIZER;
/// The string where tokens come from.
@property (readonly, copy, nonatomic) NSString *string;
/// The current state of the tokenizer. Sometimes the parser needs to change this.
@property (assign, nonatomic) HTMLTokenizerState state;
/// The parser that is consuming the tokenizer's tokens. Sometimes the tokenizer needs to know the parser's state.
@property (weak, nonatomic) HTMLParser *parser;
@end
/// An HTMLDOCTYPEToken represents a `<!DOCTYPE>` tag.
@interface HTMLDOCTYPEToken : NSObject
/// The name of the DOCTYPE, or nil if it has none.
@property (copy, nonatomic) NSString *name;
/// The public identifier of the DOCTYPE, or nil if it has none.
@property (copy, nonatomic) NSString *publicIdentifier;
/// The system identifier of the DOCTYPE, or nil if it has none.
@property (copy, nonatomic) NSString *systemIdentifier;
/// YES if the parsed HTMLDocument's quirks mode should be set, or NO if other indicators should be used.
@property (assign, nonatomic) BOOL forceQuirks;
@end
#pragma mark - Tokens
/// An HTMLTagToken abstractly represents opening (\<p\>) and closing (\</p\>) HTML tags with optional attributes.
@interface HTMLTagToken : NSObject
/// Initializes a token with a tag name.
- (instancetype)initWithTagName:(NSString *)tagName NS_DESIGNATED_INITIALIZER;
/// The name of this tag.
@property (copy, nonatomic) NSString *tagName;
/// A dictionary mapping HTMLAttributeName keys to NSString values.
@property (copy, nonatomic) HTMLOrderedDictionary *attributes;
/// YES if this tag is a self-closing tag (\<br/\>), or NO otherwise (\<br\> or \</br\>).
@property (assign, nonatomic) BOOL selfClosingFlag;
@end
/// An HTMLStartTagToken represents a start tag like `<p>`.
@interface HTMLStartTagToken : HTMLTagToken
/**
Returns an initialized copy of this start tag token with a new tag name.
@param tagName The tag name of the copied token.
*/
- (instancetype)copyWithTagName:(NSString *)tagName;
@end
/// An HTMLEndTagToken represents an end tag like \</p\>.
@interface HTMLEndTagToken : HTMLTagToken
@end
/// An HTMLCommentToken represents a comment \<!-- like this --\>.
@interface HTMLCommentToken : NSObject
/// @param data The comment's data.
- (instancetype)initWithData:(NSString *)data NS_DESIGNATED_INITIALIZER;
/// The comment's data.
@property (readonly, copy, nonatomic) NSString *data;
@end
/// An HTMLCharacterToken represents a series of code points as text in an HTML document.
@interface HTMLCharacterToken : NSObject
/// Initializes a character token with some characters.
- (instancetype)initWithString:(NSString *)string NS_DESIGNATED_INITIALIZER;
/// The code points represented by this token.
@property (readonly, copy, nonatomic) NSString *string;
/// Returns a token for the leading whitespace, or nil if there is no leading whitespace.
- (instancetype)leadingWhitespaceToken;
/// Returns a token for the characters after leading whitespace, or nil if the token is entirely whitespace.
- (instancetype)afterLeadingWhitespaceToken;
@end
/**
An HTMLParseErrorToken represents a parse error during tokenization.
Parse errors are emitted as tokens to provide context.
*/
@interface HTMLParseErrorToken : NSObject
/// @param error The reason for the parse error.
- (instancetype)initWithError:(NSString *)error NS_DESIGNATED_INITIALIZER;
/// The reason for the parse error.
@property (readonly, copy, nonatomic) NSString *error;
@end
/// A single HTMLEOFToken is emitted when the end of the file is parsed and no further tokens will be emitted.
@interface HTMLEOFToken : NSObject
@end
@interface HTMLTokenizer (Testing)
/**
Sets the name of the last start tag, which is used at certain steps of tokenization.
@param tagName The name of the pretend last start tag.
*/
- (void)setLastStartTag:(NSString *)tagName;
@end