1<lexer>
  2  <config>
  3    <name>Pig</name>
  4    <alias>pig</alias>
  5    <filename>*.pig</filename>
  6    <mime_type>text/x-pig</mime_type>
  7    <case_insensitive>true</case_insensitive>
  8  </config>
  9  <rules>
 10    <state name="root">
 11      <rule pattern="\s+">
 12        <token type="Text"/>
 13      </rule>
 14      <rule pattern="--.*">
 15        <token type="Comment"/>
 16      </rule>
 17      <rule pattern="/\*[\w\W]*?\*/">
 18        <token type="CommentMultiline"/>
 19      </rule>
 20      <rule pattern="\\\n">
 21        <token type="Text"/>
 22      </rule>
 23      <rule pattern="\\">
 24        <token type="Text"/>
 25      </rule>
 26      <rule pattern="\'(?:\\[ntbrf\\\']|\\u[0-9a-f]{4}|[^\'\\\n\r])*\'">
 27        <token type="LiteralString"/>
 28      </rule>
 29      <rule>
 30        <include state="keywords"/>
 31      </rule>
 32      <rule>
 33        <include state="types"/>
 34      </rule>
 35      <rule>
 36        <include state="builtins"/>
 37      </rule>
 38      <rule>
 39        <include state="punct"/>
 40      </rule>
 41      <rule>
 42        <include state="operators"/>
 43      </rule>
 44      <rule pattern="[0-9]*\.[0-9]+(e[0-9]+)?[fd]?">
 45        <token type="LiteralNumberFloat"/>
 46      </rule>
 47      <rule pattern="0x[0-9a-f]+">
 48        <token type="LiteralNumberHex"/>
 49      </rule>
 50      <rule pattern="[0-9]+L?">
 51        <token type="LiteralNumberInteger"/>
 52      </rule>
 53      <rule pattern="\n">
 54        <token type="Text"/>
 55      </rule>
 56      <rule pattern="([a-z_]\w*)(\s*)(\()">
 57        <bygroups>
 58          <token type="NameFunction"/>
 59          <token type="Text"/>
 60          <token type="Punctuation"/>
 61        </bygroups>
 62      </rule>
 63      <rule pattern="[()#:]">
 64        <token type="Text"/>
 65      </rule>
 66      <rule pattern="[^(:#\'")\s]+">
 67        <token type="Text"/>
 68      </rule>
 69      <rule pattern="\S+\s+">
 70        <token type="Text"/>
 71      </rule>
 72    </state>
 73    <state name="keywords">
 74      <rule pattern="(assert|and|any|all|arrange|as|asc|bag|by|cache|CASE|cat|cd|cp|%declare|%default|define|dense|desc|describe|distinct|du|dump|eval|exex|explain|filter|flatten|foreach|full|generate|group|help|if|illustrate|import|inner|input|into|is|join|kill|left|limit|load|ls|map|matches|mkdir|mv|not|null|onschema|or|order|outer|output|parallel|pig|pwd|quit|register|returns|right|rm|rmf|rollup|run|sample|set|ship|split|stderr|stdin|stdout|store|stream|through|union|using|void)\b">
 75        <token type="Keyword"/>
 76      </rule>
 77    </state>
 78    <state name="builtins">
 79      <rule pattern="(AVG|BinStorage|cogroup|CONCAT|copyFromLocal|copyToLocal|COUNT|cross|DIFF|MAX|MIN|PigDump|PigStorage|SIZE|SUM|TextLoader|TOKENIZE)\b">
 80        <token type="NameBuiltin"/>
 81      </rule>
 82    </state>
 83    <state name="types">
 84      <rule pattern="(bytearray|BIGINTEGER|BIGDECIMAL|chararray|datetime|double|float|int|long|tuple)\b">
 85        <token type="KeywordType"/>
 86      </rule>
 87    </state>
 88    <state name="punct">
 89      <rule pattern="[;(){}\[\]]">
 90        <token type="Punctuation"/>
 91      </rule>
 92    </state>
 93    <state name="operators">
 94      <rule pattern="[#=,./%+\-?]">
 95        <token type="Operator"/>
 96      </rule>
 97      <rule pattern="(eq|gt|lt|gte|lte|neq|matches)\b">
 98        <token type="Operator"/>
 99      </rule>
100      <rule pattern="(==|<=|<|>=|>|!=)">
101        <token type="Operator"/>
102      </rule>
103    </state>
104  </rules>
105</lexer>