1<lexer>
2 <config>
3 <name>WebVTT</name>
4 <alias>vtt</alias>
5 <filename>*.vtt</filename>
6 <mime_type>text/vtt</mime_type>
7 </config>
8 <!--
9 The WebVTT spec refers to a WebVTT line terminator as either CRLF, CR or LF.
10 (https://www.w3.org/TR/webvtt1/#webvtt-line-terminator) However, with this
11 definition it is unclear whether CRLF is one line terminator (CRLF) or two
12 line terminators (CR and LF).
13
14 To work around this ambiguity, only CRLF and LF are considered as line terminators.
15 To my knowledge only classic Mac OS uses CR as line terminators, so the lexer should
16 still work for most files.
17 -->
18 <rules>
19 <!-- https://www.w3.org/TR/webvtt1/#webvtt-file-body -->
20 <state name="root">
21 <rule pattern="(\AWEBVTT)((?:[ \t][^\r\n]*)?(?:\r?\n){2,})">
22 <bygroups>
23 <token type="Keyword" />
24 <token type="Text" />
25 </bygroups>
26 </rule>
27 <rule pattern="(^REGION)([ \t]*$)">
28 <bygroups>
29 <token type="Keyword" />
30 <token type="Text" />
31 </bygroups>
32 <push state="region-settings-list" />
33 </rule>
34 <rule
35 pattern="(^STYLE)([ \t]*$)((?:(?!-->)[\s\S])*?)((?:\r?\n){2})">
36 <bygroups>
37 <token type="Keyword" />
38 <token type="Text" />
39 <using lexer="CSS" />
40 <token type="Text" />
41 </bygroups>
42 </rule>
43 <rule>
44 <include state="comment" />
45 </rule>
46 <rule
47 pattern="(?=((?![^\r\n]*-->)[^\r\n]*\r?\n)?(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3}[ \t]+-->[ \t]+(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})"
48 >
49 <push state="cues" />
50 </rule>
51 </state>
52
53 <!-- https://www.w3.org/TR/webvtt1/#webvtt-region-settings-list -->
54 <state name="region-settings-list">
55 <rule pattern="(?: |\t|\r?\n(?!\r?\n))+">
56 <token type="Text" />
57 </rule>
58 <rule pattern="(?:\r?\n){2}">
59 <token type="Text" />
60 <pop depth="1" />
61 </rule>
62 <rule pattern="(id)(:)(?!-->)(\S+)">
63 <bygroups>
64 <token type="Keyword" />
65 <token type="Punctuation" />
66 <token type="Literal" />
67 </bygroups>
68 </rule>
69 <rule pattern="(width)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)">
70 <bygroups>
71 <token type="Keyword" />
72 <token type="Punctuation" />
73 <token type="Literal" />
74 <token type="KeywordType" />
75 </bygroups>
76 </rule>
77 <rule pattern="(lines)(:)(\d+)">
78 <bygroups>
79 <token type="Keyword" />
80 <token type="Punctuation" />
81 <token type="Literal" />
82 </bygroups>
83 </rule>
84 <rule
85 pattern="(regionanchor|viewportanchor)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)(,)((?:[1-9]?\d|100)(?:\.\d+)?)(%)">
86 <bygroups>
87 <token type="Keyword" />
88 <token type="Punctuation" />
89 <token type="Literal" />
90 <token type="KeywordType" />
91 <token type="Punctuation" />
92 <token type="Literal" />
93 <token type="KeywordType" />
94 </bygroups>
95 </rule>
96 <rule pattern="(scroll)(:)(up)">
97 <bygroups>
98 <token type="Keyword" />
99 <token type="Punctuation" />
100 <token type="KeywordConstant" />
101 </bygroups>
102 </rule>
103 </state>
104
105 <!-- https://www.w3.org/TR/webvtt1/#webvtt-comment-block -->
106 <state name="comment">
107 <rule
108 pattern="^NOTE( |\t|\r?\n)((?!-->)[\s\S])*?(?:(\r?\n){2}|\Z)">
109 <token type="Comment" />
110 </rule>
111 </state>
112
113 <!--
114 "Zero or more WebVTT cue blocks and WebVTT comment blocks separated from each other by one or more
115 WebVTT line terminators." (https://www.w3.org/TR/webvtt1/#file-structure)
116 -->
117 <state name="cues">
118 <rule
119 pattern="(?:((?!-->)[^\r\n]+)?(\r?\n))?((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]+)(-->)([ \t]+)((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]*)">
120 <bygroups>
121 <token type="Name" />
122 <token type="Text" />
123 <token type="LiteralDate" />
124 <token type="Text" />
125 <token type="Operator" />
126 <token type="Text" />
127 <token type="LiteralDate" />
128 <token type="Text" />
129 </bygroups>
130 <push state="cue-settings-list" />
131 </rule>
132 <rule>
133 <include state="comment" />
134 </rule>
135 </state>
136
137 <!-- https://www.w3.org/TR/webvtt1/#webvtt-cue-settings-list -->
138 <state name="cue-settings-list">
139 <rule pattern="[ \t]+">
140 <token type="Text" />
141 </rule>
142 <rule pattern="(vertical)(:)?(rl|lr)?">
143 <bygroups>
144 <token type="Keyword" />
145 <token type="Punctuation" />
146 <token type="KeywordConstant" />
147 </bygroups>
148 </rule>
149 <rule
150 pattern="(line)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(start|center|end))?)?">
151 <bygroups>
152 <token type="Keyword" />
153 <token type="Punctuation" />
154 <token type="Literal" />
155 <token type="KeywordType" />
156 <token type="Literal" />
157 <token type="Punctuation" />
158 <token type="KeywordConstant" />
159 </bygroups>
160 </rule>
161 <rule
162 pattern="(position)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(line-left|center|line-right))?)?">
163 <bygroups>
164 <token type="Keyword" />
165 <token type="Punctuation" />
166 <token type="Literal" />
167 <token type="KeywordType" />
168 <token type="Literal" />
169 <token type="Punctuation" />
170 <token type="KeywordConstant" />
171 </bygroups>
172 </rule>
173 <rule pattern="(size)(:)?(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%))?">
174 <bygroups>
175 <token type="Keyword" />
176 <token type="Punctuation" />
177 <token type="Literal" />
178 <token type="KeywordType" />
179 </bygroups>
180 </rule>
181 <rule pattern="(align)(:)?(start|center|end|left|right)?">
182 <bygroups>
183 <token type="Keyword" />
184 <token type="Punctuation" />
185 <token type="KeywordConstant" />
186 </bygroups>
187 </rule>
188 <rule pattern="(region)(:)?((?![^\r\n]*-->(?=[ \t]+?))[^ \t\r\n]+)?">
189 <bygroups>
190 <token type="Keyword" />
191 <token type="Punctuation" />
192 <token type="Literal" />
193 </bygroups>
194 </rule>
195 <rule
196 pattern="(?=\r?\n)">
197 <push state="cue-payload" />
198 </rule>
199 </state>
200
201 <!-- https://www.w3.org/TR/webvtt1/#cue-payload -->
202 <state name="cue-payload">
203 <rule pattern="(\r?\n){2,}">
204 <token type="Text" />
205 <pop depth="2" />
206 </rule>
207 <rule pattern="[^<&]+?">
208 <token type="Text" />
209 </rule>
210 <rule pattern="&(#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);">
211 <token type="Text" />
212 </rule>
213 <rule pattern="(?=<)">
214 <token type="Text" />
215 <push state="cue-span-tag" />
216 </rule>
217 </state>
218 <state name="cue-span-tag">
219 <rule
220 pattern="<(?=c|i|b|u|ruby|rt|v|lang|(?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})">
221 <token type="Punctuation" />
222 <push state="cue-span-start-tag-name" />
223 </rule>
224 <rule pattern="(</)(c|i|b|u|ruby|rt|v|lang)">
225 <bygroups>
226 <token type="Punctuation" />
227 <token type="NameTag" />
228 </bygroups>
229 </rule>
230 <rule pattern=">">
231 <token type="Punctuation" />
232 <pop depth="1" />
233 </rule>
234 </state>
235 <state name="cue-span-start-tag-name">
236 <rule pattern="(c|i|b|u|ruby|rt)|((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})">
237 <bygroups>
238 <token type="NameTag" />
239 <token type="LiteralDate" />
240 </bygroups>
241 <push state="cue-span-classes-without-annotations" />
242 </rule>
243 <rule pattern="v|lang">
244 <token type="NameTag" />
245 <push state="cue-span-classes-with-annotations" />
246 </rule>
247 </state>
248 <state name="cue-span-classes-without-annotations">
249 <rule>
250 <include state="cue-span-classes" />
251 </rule>
252 <rule pattern="(?=>)">
253 <pop depth="2" />
254 </rule>
255 </state>
256 <state name="cue-span-classes-with-annotations">
257 <rule>
258 <include state="cue-span-classes" />
259 </rule>
260 <rule pattern="(?=[ \t])">
261 <push state="cue-span-start-tag-annotations" />
262 </rule>
263 </state>
264 <state name="cue-span-classes">
265 <rule pattern="(\.)([^ \t\n\r&<>\.]+)">
266 <bygroups>
267 <token type="Punctuation" />
268 <token type="NameTag" />
269 </bygroups>
270 </rule>
271 </state>
272 <state name="cue-span-start-tag-annotations">
273 <rule
274 pattern="[ \t](?:[^\n\r&>]|&(?:#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);)+">
275 <token type="Text" />
276 </rule>
277 <rule pattern="(?=>)">
278 <token type="Text" />
279 <pop depth="3" />
280 </rule>
281 </state>
282 </rules>
283</lexer>