The repository formerly known as dotfiles
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.


  1. /* vim: set foldmethod=marker foldlevel=0: */
  2. /* Copyright (C) 2019 C. McEnroe <june@causal.agency>
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU Affero General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU Affero General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Affero General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include <assert.h>
  18. #include <err.h>
  19. #include <locale.h>
  20. #include <regex.h>
  21. #include <stdbool.h>
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #include <sys/stat.h>
  26. #include <sysexits.h>
  27. #include <unistd.h>
  28. #define ARRAY_LEN(a) (sizeof(a) / sizeof(a[0]))
  29. typedef unsigned Set;
  30. #define SET(x) ((Set)1 << (x))
  31. #define ENUM_CLASS \
  32. X(Normal) \
  33. X(Keyword) \
  34. X(Macro) \
  35. X(Tag) \
  36. X(String) \
  37. X(Escape) \
  38. X(Format) \
  39. X(Interp) \
  40. X(Comment) \
  41. X(Todo) \
  42. X(DiffOld) \
  43. X(DiffNew)
  44. enum Class {
  45. #define X(class) class,
  46. ENUM_CLASS
  47. #undef X
  48. ClassLen,
  49. };
  50. static const char *ClassName[ClassLen] = {
  51. #define X(class) [class] = #class,
  52. ENUM_CLASS
  53. #undef X
  54. };
  55. struct Syntax {
  56. enum Class class;
  57. Set parent;
  58. bool newline;
  59. size_t subexp;
  60. const char *pattern;
  61. };
  62. #define WB "(^|[^_[:alnum:]]|\n)"
  63. #define BL0 "[[:blank:]]*"
  64. #define BL1 "[[:blank:]]+"
  65. #define SP0 "[[:space:]]*"
  66. #define SP1 "[[:space:]]+"
  67. #define PATTERN_ID "[_[:alpha:]][_[:alnum:]]*"
  68. #define PATTERN_SQ "'([^']|[\\]')*'"
  69. #define PATTERN_DQ "\"([^\"]|[\\]\")*\""
  70. #define PATTERN_BC "/[*]" "([^*]|[*][^/])*" "[*]+/"
  71. #define PATTERN_TODO "FIXME|TODO|XXX"
  72. // C syntax {{{
  73. static const struct Syntax CSyntax[] = {
  74. { Keyword, .subexp = 2, .pattern = WB
  75. "(" "auto|extern|register|static|(_T|t)hread_local|typedef"
  76. "|" "_Atomic|const|restrict|volatile"
  77. "|" "inline|(_N|n)oreturn"
  78. "|" "(_A|a)lignas"
  79. "|" "enum|struct|union"
  80. "|" "do|else|for|if|switch|while"
  81. "|" "break|case|continue|default|goto|return"
  82. ")" WB },
  83. { Macro,
  84. .pattern = "^" BL0 "#(.|[\\]\n)*" },
  85. { Tag, .parent = SET(Macro), .subexp = 1,
  86. .pattern = "define" BL1 "(" PATTERN_ID ")" "[(]" },
  87. { Tag, .subexp = 2,
  88. .pattern = "(enum|struct|union)" SP1 "(" PATTERN_ID ")" SP0 "[{]" },
  89. { Tag, .parent = ~SET(Keyword), .newline = true, .subexp = 1,
  90. .pattern = "(" PATTERN_ID ")" SP0 "[(][^()]*[)]" SP0 "[{]" },
  91. { Tag, .newline = true, .subexp = 3, .pattern =
  92. "(static|typedef)" SP1
  93. "(" "(" PATTERN_ID ")" SP0
  94. "(" "[*]" "|" "[[][^]]*[]]" "|" "[{][^}]*[}]" "|" SP0 ")*" ")+" },
  95. { String, .parent = SET(Macro), .subexp = 1,
  96. .pattern = "include" BL0 "(<[^>]*>)" },
  97. { String,
  98. .pattern = "[LUu]?" PATTERN_SQ },
  99. { String, .parent = ~SET(String),
  100. .pattern = "([LU]|u8?)?" PATTERN_DQ },
  101. { Escape, .parent = SET(String),
  102. .pattern = "[\\]([\"'?\\abfnrtv]|[0-7]{1,3}|x[0-9A-Fa-f]+)" },
  103. { Escape, .parent = SET(String),
  104. .pattern = "[\\](U[0-9A-Fa-f]{8}|u[0-9A-Fa-f]{4})" },
  105. { Format, .parent = SET(String), .pattern =
  106. "%%|%[ #+-0]*" // flags
  107. "([*]|[0-9]+)?" // field width
  108. "([.]([*]|[0-9]+))?" // precision
  109. "([Lhjltz]|hh|ll)?" // length modifier
  110. "[AEFGXacdefginopsux]" // format specifier
  111. },
  112. { Comment, .parent = ~SET(String),
  113. .pattern = "//(.|[\\]\n)*" },
  114. { Comment, .parent = ~SET(String), .newline = true,
  115. .pattern = PATTERN_BC },
  116. { Todo, .parent = SET(Comment),
  117. .pattern = PATTERN_TODO },
  118. };
  119. // }}}
  120. // diff syntax {{{
  121. static const struct Syntax DiffSyntax[] = {
  122. { Comment, .pattern = "^@@.*" },
  123. { DiffOld, .pattern = "^[-].*" },
  124. { DiffNew, .pattern = "^[+].*" },
  125. };
  126. // }}}
  127. // make syntax {{{
  128. #define MAKE_TARGET "[-./_[:alnum:]]+"
  129. static const struct Syntax MakeSyntax[] = {
  130. { Keyword, .subexp = 2,
  131. .pattern = WB "([.](PHONY|PRECIOUS|SUFFIXES))" WB },
  132. { Macro,
  133. .pattern = "^ *-?include" },
  134. { Tag, .parent = ~SET(Keyword), .subexp = 1,
  135. .pattern = "(" MAKE_TARGET ")" "(" BL1 MAKE_TARGET ")*" BL0 ":" },
  136. { String, .subexp = 1,
  137. .pattern = "[._[:alnum:]]+" BL0 "[!+:?]?=" BL0 "(.*)" },
  138. { Normal,
  139. .pattern = "^\t.*" },
  140. { String,
  141. .pattern = PATTERN_SQ },
  142. { String,
  143. .pattern = PATTERN_DQ },
  144. { Interp,
  145. .pattern = "[$]." },
  146. // Support one level of nesting with the same delimiter.
  147. { Interp,
  148. .pattern = "[$][(](" "[^$)]" "|" "[$][(][^)]*[)]" ")*[)]" },
  149. { Interp,
  150. .pattern = "[$][{](" "[^$}]" "|" "[$][{][^}]*[}]" ")*[}]" },
  151. { Escape,
  152. .pattern = "[$][$]" },
  153. { Comment, .parent = ~SET(String),
  154. .pattern = "#.*" },
  155. { Todo, .parent = SET(Comment),
  156. .pattern = PATTERN_TODO },
  157. };
  158. // }}}
  159. // mdoc syntax {{{
  160. static const struct Syntax MdocSyntax[] = {
  161. { Keyword, .subexp = 2, .pattern = WB
  162. "(" "D[dt]|N[dm]|Os"
  163. "|" "S[hsx]|[LP]p|Xr"
  164. "|" "%[ABCDIJNOPQRTUV]|[BE][dl]|D[1l]|It|Ql|R[es]|Ta"
  165. "|" "Ap|[BE]k|Ns|Pf|Sm"
  166. "|" "Ar|Cm|Ev|Fl|O[cop]|Pa"
  167. "|" "Dv|Er|F[acdnot]|In|Lb|V[at]"
  168. "|" "A[dn]|Cd|Lk|M[st]"
  169. "|" "[BE]f|Em|Li|No|Sy"
  170. "|" "(Br|[ABDPQS])[coq]|E[co]"
  171. "|" "At|(Bs|[BDEFNO])x|Rv|St"
  172. ")" WB },
  173. { Tag, .subexp = 1,
  174. .pattern = "^[.]S[hs]" BL1 "(.+)" },
  175. { String,
  176. .pattern = PATTERN_DQ },
  177. { Normal,
  178. .pattern = "^[^.].*" },
  179. { String,
  180. .pattern = "[\\](" "." "|" "[(].{2}" "|" "[[][^]]*[]]" ")" },
  181. { Comment,
  182. .pattern = "^[.][\\]\".*" },
  183. { Todo, .parent = SET(Comment),
  184. .pattern = PATTERN_TODO },
  185. };
  186. // }}}
  187. // Rust syntax {{{
  188. static const struct Syntax RustSyntax[] = {
  189. { Keyword, .subexp = 2, .pattern = WB
  190. "(" "'?static|[Ss]elf|abstract|as|async|await|become|box|break|const"
  191. "|" "continue|crate|do|dyn|else|enum|extern|false|final|fn|for|if"
  192. "|" "impl|in|let|loop|macro|match|mod|move|mut|override|priv|pub|ref"
  193. "|" "return|struct|super|trait|true|try|type(of)?|union|uns(afe|ized)"
  194. "|" "use|virtual|where|while|yield"
  195. ")" WB },
  196. { Tag, .subexp = 2, .pattern =
  197. "(enum|fn|macro_rules!|mod|struct|type|union)" SP1 "(" PATTERN_ID ")" },
  198. { Macro, .newline = true,
  199. .pattern = "#!?[[][^]]*[]]" },
  200. { Macro,
  201. .pattern = PATTERN_ID "!" },
  202. { Interp,
  203. .pattern = "[$]" PATTERN_ID },
  204. { String,
  205. .pattern = "b?'([^']|[\\]')'" },
  206. { String,
  207. .pattern = "b?" "\"([^\"]|[\\][\n\"])*\"" },
  208. { Escape, .parent = SET(String),
  209. .pattern = "[\\]([\"'0\\nrt]|u[{][0-9A-Fa-f]{1,6}[}]|x[0-9A-Fa-f]{2})" },
  210. { Format, .parent = SET(String),
  211. .pattern = "[{][{]|[{][^{}]*[}]|[}][}]" },
  212. { String, .parent = ~SET(String), .newline = true,
  213. .pattern = "b?r\"[^\"]*\"" },
  214. { String, .parent = ~SET(String), .newline = true,
  215. .pattern = "b?r#+\"" "([^\"]|\"[^#])*" "\"+#+" },
  216. { Comment, .parent = ~SET(String),
  217. .pattern = "//.*" },
  218. { Comment, .parent = ~SET(String), .newline = true,
  219. .pattern = PATTERN_BC },
  220. { Todo, .parent = SET(Comment),
  221. .pattern = PATTERN_TODO },
  222. };
  223. // }}}
  224. // sh syntax {{{
  225. static const struct Syntax ShSyntax[] = {
  226. { Keyword, .subexp = 2, .pattern = WB
  227. "(" "!|case|do|done|elif|else|esac|fi|for|if|in|then|until|while"
  228. "|" "alias|bg|cd|command|false|fc|fg|getopts|jobs|kill|newgrp|pwd|read"
  229. "|" "true|type|ulimit|umask|unalias|wait"
  230. "|" "[.:]|break|continue|eval|exec|exit|export|local|readonly|return"
  231. "|" "set|shift|times|trap|unset"
  232. ")" WB },
  233. { Tag, .subexp = 2,
  234. .pattern = WB "(" PATTERN_ID ")" BL0 "[(]" BL0 "[)]" },
  235. { String, .newline = true, .subexp = 1, .pattern =
  236. "<<-?" BL0 "EOF[^\n]*\n"
  237. "(([^\n]|\n\t*[^E]|\n\t*E[^O]|\n\t*EO[^F]|\n\t*EOF[^\n])*)"
  238. "\n\t*EOF\n" },
  239. { String, .parent = ~SET(String), .newline = true,
  240. .pattern = PATTERN_DQ },
  241. { Escape, .parent = SET(String),
  242. .pattern = "[\\][\"$\\`]" },
  243. { Interp, .parent = ~SET(Escape),
  244. .pattern = "[$][(][^)]*[)]" "|" "`[^`]*`" },
  245. { String, .parent = SET(Interp),
  246. .pattern = PATTERN_DQ },
  247. { Interp, .parent = ~SET(Escape),
  248. .pattern = "[$]([!#$*?@-]|[_[:alnum:]]+|[{][^}]*[}])" },
  249. { String, .parent = ~SET(Escape),
  250. .pattern = "[\\]." },
  251. { String, .subexp = 1, .newline = true, .pattern =
  252. "<<-?" BL0 "'EOF'[^\n]*\n"
  253. "(([^\n]|\n\t*[^E]|\n\t*E[^O]|\n\t*EO[^F]|\n\t*EOF[^\n])*)"
  254. "\n\t*EOF\n" },
  255. { String, .parent = ~SET(String), .newline = true,
  256. .pattern = "'[^']*'" },
  257. { Comment, .parent = ~SET(String), .subexp = 2,
  258. .pattern = "(^|[[:blank:]]+)(#.*)" },
  259. { Todo, .parent = SET(Comment),
  260. .pattern = PATTERN_TODO },
  261. };
  262. // }}}
  263. static const struct Language {
  264. const char *name;
  265. const char *pattern;
  266. const struct Syntax *syntax;
  267. size_t len;
  268. } Languages[] = {
  269. { "c", "[.][chly]$", CSyntax, ARRAY_LEN(CSyntax) },
  270. { "diff", "[.](diff|patch)$", DiffSyntax, ARRAY_LEN(DiffSyntax) },
  271. { "make", "[.]mk$|^Makefile$", MakeSyntax, ARRAY_LEN(MakeSyntax) },
  272. { "mdoc", "[.][1-9]$", MdocSyntax, ARRAY_LEN(MdocSyntax) },
  273. { "rust", "[.]rs$", RustSyntax, ARRAY_LEN(RustSyntax) },
  274. { "sh", "[.]sh$", ShSyntax, ARRAY_LEN(ShSyntax) },
  275. { "text", "[.]txt$", NULL, 0 },
  276. };
  277. static regex_t compile(const char *pattern, int flags) {
  278. regex_t regex;
  279. int error = regcomp(&regex, pattern, REG_EXTENDED | flags);
  280. if (!error) return regex;
  281. char buf[256];
  282. regerror(error, &regex, buf, sizeof(buf));
  283. errx(EX_SOFTWARE, "regcomp: %s: %s", buf, pattern);
  284. }
  285. enum { SubsLen = 8 };
  286. static void highlight(struct Language lang, enum Class *hi, const char *str) {
  287. for (size_t i = 0; i < lang.len; ++i) {
  288. struct Syntax syn = lang.syntax[i];
  289. regex_t regex = compile(syn.pattern, syn.newline ? 0 : REG_NEWLINE);
  290. assert(syn.subexp < SubsLen);
  291. assert(syn.subexp <= regex.re_nsub);
  292. regmatch_t subs[SubsLen] = {{0}};
  293. for (size_t offset = 0; str[offset]; offset += subs[syn.subexp].rm_eo) {
  294. int error = regexec(
  295. &regex, &str[offset], SubsLen, subs, offset ? REG_NOTBOL : 0
  296. );
  297. if (error == REG_NOMATCH) break;
  298. if (error) errx(EX_SOFTWARE, "regexec: %d", error);
  299. regmatch_t *sub = &subs[syn.subexp];
  300. if (syn.parent && !(syn.parent & SET(hi[offset + sub->rm_so]))) {
  301. sub->rm_eo = sub->rm_so + 1;
  302. continue;
  303. }
  304. for (regoff_t j = sub->rm_so; j < sub->rm_eo; ++j) {
  305. hi[offset + j] = lang.syntax[i].class;
  306. }
  307. }
  308. regfree(&regex);
  309. }
  310. }
  311. static void check(void) {
  312. for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) {
  313. regex_t regex = compile(Languages[i].pattern, REG_NOSUB);
  314. regfree(&regex);
  315. for (size_t j = 0; j < Languages[i].len; ++j) {
  316. struct Syntax syn = Languages[i].syntax[j];
  317. regex = compile(syn.pattern, 0);
  318. if (syn.subexp >= SubsLen || syn.subexp > regex.re_nsub) {
  319. errx(
  320. EX_SOFTWARE, "subexpression %zu out of bounds: %s",
  321. syn.subexp, syn.pattern
  322. );
  323. }
  324. regfree(&regex);
  325. }
  326. }
  327. }
  328. #define ENUM_OPTION \
  329. X(Anchor, "anchor") \
  330. X(CSS, "css") \
  331. X(Document, "document") \
  332. X(Inline, "inline") \
  333. X(Monospace, "monospace") \
  334. X(Tab, "tab") \
  335. X(Title, "title")
  336. enum Option {
  337. #define X(option, _) option,
  338. ENUM_OPTION
  339. #undef X
  340. OptionLen,
  341. };
  342. static const char *OptionKey[OptionLen + 1] = {
  343. #define X(option, key) [option] = key,
  344. ENUM_OPTION
  345. #undef X
  346. NULL,
  347. };
  348. typedef void HeaderFn(const char *opts[]);
  349. typedef void
  350. OutputFn(const char *opts[], enum Class class, const char *str, size_t len);
  351. // ANSI format {{{
  352. enum SGR {
  353. SGRBoldOn = 1,
  354. SGRUnderlineOn = 4,
  355. SGRBoldOff = 22,
  356. SGRUnderlineOff = 24,
  357. SGRBlack = 30,
  358. SGRRed,
  359. SGRGreen,
  360. SGRYellow,
  361. SGRBlue,
  362. SGRMagenta,
  363. SGRCyan,
  364. SGRWhite,
  365. SGRDefault = 39,
  366. };
  367. static const enum SGR ANSIStyle[ClassLen][3] = {
  368. [Normal] = { SGRDefault },
  369. [Keyword] = { SGRWhite },
  370. [Macro] = { SGRGreen },
  371. [Tag] = { SGRDefault, SGRUnderlineOn, SGRUnderlineOff },
  372. [String] = { SGRCyan },
  373. [Escape] = { SGRDefault },
  374. [Format] = { SGRCyan, SGRBoldOn, SGRBoldOff },
  375. [Interp] = { SGRYellow },
  376. [Comment] = { SGRBlue },
  377. [Todo] = { SGRBlue, SGRBoldOn, SGRBoldOff },
  378. [DiffOld] = { SGRRed },
  379. [DiffNew] = { SGRGreen },
  380. };
  381. static void
  382. ansiOutput(const char *opts[], enum Class class, const char *str, size_t len) {
  383. (void)opts;
  384. if (ANSIStyle[class][1]) {
  385. printf(
  386. "\x1B[%d;%dm%.*s\x1B[%dm",
  387. ANSIStyle[class][0], ANSIStyle[class][1],
  388. (int)len, str,
  389. ANSIStyle[class][2]
  390. );
  391. } else {
  392. printf("\x1B[%dm%.*s", ANSIStyle[class][0], (int)len, str);
  393. }
  394. }
  395. // }}}
  396. // IRC format {{{
  397. enum IRC {
  398. IRCWhite,
  399. IRCBlack,
  400. IRCBlue,
  401. IRCGreen,
  402. IRCRed,
  403. IRCBrown,
  404. IRCMagenta,
  405. IRCOrange,
  406. IRCYellow,
  407. IRCLightGreen,
  408. IRCCyan,
  409. IRCLightCyan,
  410. IRCLightBlue,
  411. IRCPink,
  412. IRCGray,
  413. IRCLightGray,
  414. IRCBold = 0x02,
  415. IRCColor = 0x03,
  416. IRCMonospace = 0x11,
  417. };
  418. static const enum IRC SGRIRC[] = {
  419. [SGRBoldOn] = IRCBold,
  420. [SGRBoldOff] = IRCBold,
  421. [SGRBlack] = IRCBlack,
  422. [SGRRed] = IRCRed,
  423. [SGRGreen] = IRCGreen,
  424. [SGRYellow] = IRCYellow,
  425. [SGRBlue] = IRCBlue,
  426. [SGRMagenta] = IRCMagenta,
  427. [SGRCyan] = IRCCyan,
  428. [SGRWhite] = IRCGray,
  429. [SGRDefault] = 0,
  430. };
  431. static void ircHeader(const char *opts[]) {
  432. if (opts[Monospace]) printf("%c", IRCMonospace);
  433. }
  434. static void
  435. ircOutput(const char *opts[], enum Class class, const char *str, size_t len) {
  436. char cc[3] = "";
  437. if (ANSIStyle[class][0] != SGRDefault) {
  438. snprintf(cc, sizeof(cc), "%d", SGRIRC[ANSIStyle[class][0]]);
  439. }
  440. // Prevent trailing formatting after newline ...
  441. bool newline = (str[len - 1] == '\n');
  442. if (ANSIStyle[class][1]) {
  443. printf(
  444. "%c%s%c%.*s%c%s",
  445. IRCColor, cc, SGRIRC[ANSIStyle[class][1]],
  446. (int)(newline ? len - 1 : len), str,
  447. SGRIRC[ANSIStyle[class][2]],
  448. (newline ? "\n" : "")
  449. );
  450. } else {
  451. // Double-toggle bold to prevent str being interpreted as color.
  452. printf("%c%s%c%c%.*s", IRCColor, cc, IRCBold, IRCBold, (int)len, str);
  453. }
  454. // ... except for monospace, at the beginning of each line.
  455. if (newline && opts[Monospace]) printf("%c", IRCMonospace);
  456. }
  457. // }}}
  458. // HTML format {{{
  459. static void htmlEscape(const char *str, size_t len) {
  460. while (len) {
  461. size_t run = strcspn(str, "\"&<>");
  462. if (run > len) run = len;
  463. switch (str[0]) {
  464. break; case '"': run = 1; printf("&quot;");
  465. break; case '&': run = 1; printf("&amp;");
  466. break; case '<': run = 1; printf("&lt;");
  467. break; case '>': run = 1; printf("&gt;");
  468. break; default: printf("%.*s", (int)run, str);
  469. }
  470. str += run;
  471. len -= run;
  472. }
  473. }
  474. static const char *HTMLStyle[ClassLen] = {
  475. [Keyword] = "color: dimgray;",
  476. [Macro] = "color: green;",
  477. [Tag] = "color: inherit; text-decoration: underline;",
  478. [String] = "color: teal;",
  479. [Format] = "color: teal; font-weight: bold;",
  480. [Interp] = "color: olive;",
  481. [Comment] = "color: navy;",
  482. [Todo] = "color: navy; font-weight: bold;",
  483. [DiffOld] = "color: red;",
  484. [DiffNew] = "color: green;",
  485. };
  486. static void htmlTabSize(const char *tab) {
  487. printf("-moz-tab-size: ");
  488. htmlEscape(tab, strlen(tab));
  489. printf("; tab-size: ");
  490. htmlEscape(tab, strlen(tab));
  491. printf(";");
  492. }
  493. static void htmlHeader(const char *opts[]) {
  494. if (!opts[Document]) goto body;
  495. printf("<!DOCTYPE html>\n<title>");
  496. if (opts[Title]) htmlEscape(opts[Title], strlen(opts[Title]));
  497. printf("</title>\n");
  498. if (opts[CSS]) {
  499. printf("<link rel=\"stylesheet\" href=\"");
  500. htmlEscape(opts[CSS], strlen(opts[CSS]));
  501. printf("\">\n");
  502. } else if (!opts[Inline]) {
  503. printf("<style>\n");
  504. if (opts[Tab]) {
  505. printf("pre.hi { ");
  506. htmlTabSize(opts[Tab]);
  507. printf(" }\n");
  508. }
  509. for (enum Class class = 0; class < ClassLen; ++class) {
  510. if (!HTMLStyle[class]) continue;
  511. printf(".hi.%s { %s }\n", ClassName[class], HTMLStyle[class]);
  512. }
  513. printf(
  514. ".hi.%s:focus { color: goldenrod; outline: none; }\n",
  515. ClassName[Tag]
  516. );
  517. printf("</style>\n");
  518. }
  519. body:
  520. if (opts[Inline] && opts[Tab]) {
  521. printf("<pre class=\"hi\" style=\"");
  522. htmlTabSize(opts[Tab]);
  523. printf("\">");
  524. } else {
  525. printf("<pre class=\"hi\">");
  526. }
  527. }
  528. static void htmlFooter(const char *opts[]) {
  529. (void)opts;
  530. printf("</pre>\n");
  531. }
  532. static void htmlAnchor(const char *opts[], const char *str, size_t len) {
  533. if (opts[Inline]) {
  534. printf("<a style=\"%s\" id=\"", HTMLStyle[Tag] ? HTMLStyle[Tag] : "");
  535. } else {
  536. printf("<a class=\"hi %s\" id=\"", ClassName[Tag]);
  537. }
  538. htmlEscape(str, len);
  539. printf("\" href=\"#");
  540. htmlEscape(str, len);
  541. printf("\">");
  542. htmlEscape(str, len);
  543. printf("</a>");
  544. }
  545. static void
  546. htmlOutput(const char *opts[], enum Class class, const char *str, size_t len) {
  547. if (opts[Anchor] && class == Tag) {
  548. htmlAnchor(opts, str, len);
  549. return;
  550. }
  551. if (opts[Inline]) {
  552. printf("<span style=\"%s\">", HTMLStyle[class] ? HTMLStyle[class] : "");
  553. } else {
  554. printf("<span class=\"hi %s\">", ClassName[class]);
  555. }
  556. htmlEscape(str, len);
  557. printf("</span>");
  558. }
  559. // }}}
  560. // Debug format {{{
  561. static void
  562. debugOutput(const char *opts[], enum Class class, const char *str, size_t len) {
  563. (void)opts;
  564. printf("%s\t\"", ClassName[class]);
  565. while (len) {
  566. size_t run = strcspn(str, "\t\n\"\\");
  567. if (run > len) run = len;
  568. switch (str[0]) {
  569. break; case '\t': run = 1; printf("\\t");
  570. break; case '\n': run = 1; printf("\\n");
  571. break; case '"': run = 1; printf("\\\"");
  572. break; case '\\': run = 1; printf("\\\\");
  573. break; default: printf("%.*s", (int)run, str);
  574. }
  575. str += run;
  576. len -= run;
  577. }
  578. printf("\"\n");
  579. }
  580. // }}}
  581. static const struct Format {
  582. const char *name;
  583. OutputFn *output;
  584. HeaderFn *header;
  585. HeaderFn *footer;
  586. } Formats[] = {
  587. { "ansi", ansiOutput, NULL, NULL },
  588. { "irc", ircOutput, ircHeader, NULL },
  589. { "html", htmlOutput, htmlHeader, htmlFooter },
  590. { "debug", debugOutput, NULL, NULL },
  591. };
  592. static bool findLanguage(struct Language *lang, const char *name) {
  593. for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) {
  594. if (strcmp(name, Languages[i].name)) continue;
  595. *lang = Languages[i];
  596. return true;
  597. }
  598. return false;
  599. }
  600. static bool matchLanguage(struct Language *lang, const char *name) {
  601. for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) {
  602. regex_t regex = compile(Languages[i].pattern, REG_NOSUB);
  603. int error = regexec(&regex, name, 0, NULL, 0);
  604. regfree(&regex);
  605. if (error == REG_NOMATCH) continue;
  606. if (error) errx(EX_SOFTWARE, "regexec: %d", error);
  607. *lang = Languages[i];
  608. return true;
  609. }
  610. return false;
  611. }
  612. static bool findFormat(struct Format *format, const char *name) {
  613. for (size_t i = 0; i < ARRAY_LEN(Formats); ++i) {
  614. if (strcmp(name, Formats[i].name)) continue;
  615. *format = Formats[i];
  616. return true;
  617. }
  618. return false;
  619. }
  620. int main(int argc, char *argv[]) {
  621. setlocale(LC_CTYPE, "");
  622. const char *name = NULL;
  623. struct Language lang = {0};
  624. struct Format format = Formats[0];
  625. const char *opts[OptionLen] = {0};
  626. int opt;
  627. while (0 < (opt = getopt(argc, argv, "cf:l:n:o:"))) {
  628. switch (opt) {
  629. break; case 'c': check(); return EX_OK;
  630. break; case 'f': {
  631. if (!findFormat(&format, optarg)) {
  632. errx(EX_USAGE, "no such format %s", optarg);
  633. }
  634. }
  635. break; case 'l': {
  636. if (!findLanguage(&lang, optarg)) {
  637. errx(EX_USAGE, "no such language %s", optarg);
  638. }
  639. }
  640. break; case 'n': name = optarg;
  641. break; case 'o': {
  642. char *val;
  643. enum Option key;
  644. while (optarg[0]) {
  645. key = getsubopt(&optarg, (char *const *)OptionKey, &val);
  646. if (key >= OptionLen) {
  647. errx(EX_USAGE, "no such option %s", val);
  648. }
  649. opts[key] = (val ? val : "");
  650. }
  651. }
  652. break; default: return EX_USAGE;
  653. }
  654. }
  655. const char *path = "(stdin)";
  656. FILE *file = stdin;
  657. if (optind < argc) {
  658. path = argv[optind];
  659. file = fopen(path, "r");
  660. if (!file) err(EX_NOINPUT, "%s", path);
  661. }
  662. if (!name) {
  663. name = strrchr(path, '/');
  664. name = (name ? &name[1] : path);
  665. }
  666. if (!lang.name && !matchLanguage(&lang, name)) {
  667. errx(EX_USAGE, "cannot infer language for %s", name);
  668. }
  669. if (!opts[Title]) opts[Title] = name;
  670. struct stat stat;
  671. int error = fstat(fileno(file), &stat);
  672. if (error) err(EX_IOERR, "fstat");
  673. size_t cap = (stat.st_mode & S_IFREG ? stat.st_size + 1 : 4096);
  674. char *str = malloc(cap);
  675. if (!str) err(EX_OSERR, "malloc");
  676. size_t len = 0, read;
  677. while (0 < (read = fread(&str[len], 1, cap - len - 1, file))) {
  678. len += read;
  679. if (len + 1 < cap) continue;
  680. cap *= 2;
  681. str = realloc(str, cap);
  682. if (!str) err(EX_OSERR, "realloc");
  683. }
  684. if (ferror(file)) err(EX_IOERR, "fread");
  685. str[len] = '\0';
  686. enum Class *hi = calloc(len, sizeof(*hi));
  687. if (!hi) err(EX_OSERR, "calloc");
  688. highlight(lang, hi, str);
  689. size_t run = 0;
  690. if (format.header) format.header(opts);
  691. for (size_t i = 0; i < len; i += run) {
  692. for (run = 1; i + run < len; ++run) {
  693. if (hi[i + run] != hi[i]) break;
  694. if (str[i + run - 1] == '\n') break;
  695. }
  696. format.output(opts, hi[i], &str[i], run);
  697. }
  698. if (format.footer) format.footer(opts);
  699. }