regex.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. /******************************************************************************
  2. * This file is part of TinTin++ *
  3. * *
  4. * Copyright 2004-2019 Igor van den Hoven *
  5. * *
  6. * TinTin++ is free software; you can redistribute it and/or modify *
  7. * it under the terms of the GNU General Public License as published by *
  8. * the Free Software Foundation; either version 3 of the License, or *
  9. * (at your option) any later version. *
  10. * *
  11. * This program is distributed in the hope that it will be useful, *
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  14. * GNU General Public License for more details. *
  15. * *
  16. * *
  17. * You should have received a copy of the GNU General Public License *
  18. * along with TinTin++. If not, see https://www.gnu.org/licenses. *
  19. ******************************************************************************/
  20. /******************************************************************************
  21. * (T)he K(I)cki(N) (T)ickin D(I)kumud Clie(N)t *
  22. * *
  23. * coded by Igor van den Hoven 2004 *
  24. ******************************************************************************/
  25. #include <sys/types.h>
  26. #include <pcre.h>
  27. #include "tintin.h"
  28. int match(struct session *ses, char *str, char *exp, int sub)
  29. {
  30. char expbuf[BUFFER_SIZE];
  31. sprintf(expbuf, "\\A%s\\Z", exp);
  32. substitute(ses, expbuf, expbuf, sub);
  33. return tintin_regexp(ses, NULL, str, expbuf, 0, 0);
  34. }
  35. int find(struct session *ses, char *str, char *exp, int sub, int flag)
  36. {
  37. if (HAS_BIT(sub, SUB_VAR|SUB_FUN))
  38. {
  39. char expbuf[BUFFER_SIZE], strbuf[BUFFER_SIZE];
  40. substitute(ses, str, strbuf, SUB_VAR|SUB_FUN);
  41. substitute(ses, exp, expbuf, SUB_VAR|SUB_FUN);
  42. return tintin_regexp(ses, NULL, strbuf, expbuf, 0, flag);
  43. }
  44. else
  45. {
  46. return tintin_regexp(ses, NULL, str, exp, 0, flag);
  47. }
  48. }
  49. DO_COMMAND(do_regexp)
  50. {
  51. char arg1[BUFFER_SIZE], arg2[BUFFER_SIZE], is_t[BUFFER_SIZE], is_f[BUFFER_SIZE];
  52. arg = sub_arg_in_braces(ses, arg, arg1, GET_ONE, SUB_VAR|SUB_FUN);
  53. arg = sub_arg_in_braces(ses, arg, arg2, GET_ONE, SUB_VAR|SUB_FUN);
  54. arg = get_arg_in_braces(ses, arg, is_t, GET_ALL);
  55. arg = get_arg_in_braces(ses, arg, is_f, GET_ALL);
  56. if (*is_t == 0)
  57. {
  58. show_error(ses, LIST_COMMAND, "SYNTAX: #REGEXP {string} {expression} {true} {false}.");
  59. }
  60. else
  61. {
  62. if (tintin_regexp(ses, NULL, arg1, arg2, 0, SUB_CMD))
  63. {
  64. substitute(ses, is_t, is_t, SUB_CMD);
  65. ses = script_driver(ses, LIST_COMMAND, is_t);
  66. }
  67. else if (*is_f)
  68. {
  69. ses = script_driver(ses, LIST_COMMAND, is_f);
  70. }
  71. }
  72. return ses;
  73. }
  74. int regexp_compare(pcre *nodepcre, char *str, char *exp, int option, int flag)
  75. {
  76. pcre *regex;
  77. const char *error;
  78. int i, j, matches, match[303];
  79. if (nodepcre == NULL)
  80. {
  81. regex = pcre_compile(exp, option, &error, &i, NULL);
  82. }
  83. else
  84. {
  85. regex = nodepcre;
  86. }
  87. if (regex == NULL)
  88. {
  89. return FALSE;
  90. }
  91. matches = pcre_exec(regex, NULL, str, strlen(str), 0, 0, match, 303);
  92. if (matches <= 0)
  93. {
  94. if (nodepcre == NULL)
  95. {
  96. free(regex);
  97. }
  98. return FALSE;
  99. }
  100. // SUB_FIX handles %1 to %99 usage. Backward compatibility.
  101. switch (flag)
  102. {
  103. case SUB_CMD:
  104. for (i = 0 ; i < matches ; i++)
  105. {
  106. gtd->cmds[i] = restringf(gtd->cmds[i], "%.*s", match[i*2+1] - match[i*2], &str[match[i*2]]);
  107. }
  108. break;
  109. case SUB_CMD + SUB_FIX:
  110. for (i = 0 ; i < matches ; i++)
  111. {
  112. j = gtd->args[i];
  113. gtd->cmds[j] = restringf(gtd->cmds[j], "%.*s", match[i*2+1] - match[i*2], &str[match[i*2]]);
  114. }
  115. break;
  116. case SUB_ARG:
  117. for (i = 0 ; i < matches ; i++)
  118. {
  119. gtd->vars[i] = restringf(gtd->vars[i], "%.*s", match[i*2+1] - match[i*2], &str[match[i*2]]);
  120. }
  121. break;
  122. case SUB_ARG + SUB_FIX:
  123. for (i = 0 ; i < matches ; i++)
  124. {
  125. j = gtd->args[i];
  126. gtd->vars[j] = restringf(gtd->vars[j], "%.*s", match[i*2+1] - match[i*2], &str[match[i*2]]);
  127. }
  128. break;
  129. }
  130. if (nodepcre == NULL)
  131. {
  132. free(regex);
  133. }
  134. return TRUE;
  135. }
  136. pcre *regexp_compile(char *exp, int option)
  137. {
  138. const char *error;
  139. int i;
  140. return pcre_compile(exp, option, &error, &i, NULL);
  141. }
  142. /******************************************************************************
  143. * Calls tintin_regexp checking if the string matches, and automatically fills *
  144. * in the text represented by the wildcards on success. *
  145. ******************************************************************************/
  146. int check_one_regexp(struct session *ses, struct listnode *node, char *line, char *original, int option)
  147. {
  148. char *exp, *str;
  149. if (node->regex == NULL)
  150. {
  151. char result[BUFFER_SIZE];
  152. substitute(ses, node->arg1, result, SUB_VAR|SUB_FUN);
  153. exp = result;
  154. }
  155. else
  156. {
  157. exp = node->arg1;
  158. }
  159. if (*node->arg1 == '~')
  160. {
  161. exp++;
  162. str = original;
  163. }
  164. else
  165. {
  166. str = line;
  167. }
  168. return tintin_regexp(ses, node->regex, str, exp, option, SUB_ARG);
  169. }
  170. /*
  171. Keep synched with tintin_regexp and tintin_regexp_compile
  172. */
  173. int tintin_regexp_check(struct session *ses, char *exp)
  174. {
  175. if (*exp == '^')
  176. {
  177. return TRUE;
  178. }
  179. while (*exp)
  180. {
  181. if (HAS_BIT(ses->flags, SES_FLAG_BIG5) && *exp & 128 && exp[1] != 0)
  182. {
  183. exp += 2;
  184. continue;
  185. }
  186. switch (exp[0])
  187. {
  188. case '\\':
  189. case '{':
  190. return TRUE;
  191. case '$':
  192. if (exp[1] == 0)
  193. {
  194. return TRUE;
  195. }
  196. break;
  197. case '%':
  198. switch (exp[1])
  199. {
  200. case '0':
  201. case '1':
  202. case '2':
  203. case '3':
  204. case '4':
  205. case '5':
  206. case '6':
  207. case '7':
  208. case '8':
  209. case '9':
  210. case 'd':
  211. case 'D':
  212. case 'i':
  213. case 'I':
  214. case 's':
  215. case 'S':
  216. case 'w':
  217. case 'W':
  218. case '?':
  219. case '*':
  220. case '+':
  221. case '.':
  222. case '%':
  223. return TRUE;
  224. case '!':
  225. switch (exp[2])
  226. {
  227. case 'd':
  228. case 'D':
  229. case 's':
  230. case 'S':
  231. case 'w':
  232. case 'W':
  233. case '?':
  234. case '*':
  235. case '+':
  236. case '.':
  237. case '{':
  238. return TRUE;
  239. }
  240. break;
  241. }
  242. break;
  243. }
  244. exp++;
  245. }
  246. return FALSE;
  247. }
  248. int tintin_regexp(struct session *ses, pcre *nodepcre, char *str, char *exp, int option, int flag)
  249. {
  250. char out[BUFFER_SIZE], *pti, *pto;
  251. int arg = 1, var = 1, fix = 0;
  252. pti = exp;
  253. pto = out;
  254. while (*pti == '^')
  255. {
  256. *pto++ = *pti++;
  257. }
  258. while (*pti)
  259. {
  260. if (HAS_BIT(ses->flags, SES_FLAG_BIG5) && *pti & 128 && pti[1] != 0)
  261. {
  262. *pto++ = *pti++;
  263. switch (*pti)
  264. {
  265. case '\\':
  266. case '[':
  267. case ']':
  268. case '(':
  269. case ')':
  270. case '|':
  271. case '.':
  272. case '?':
  273. case '+':
  274. case '*':
  275. case '$':
  276. case '^':
  277. *pto++ = '\\';
  278. break;
  279. }
  280. *pto++ = *pti++;
  281. continue;
  282. }
  283. switch (pti[0])
  284. {
  285. case '\\':
  286. *pto++ = *pti++;
  287. *pto++ = *pti++;
  288. break;
  289. case '{':
  290. gtd->args[next_arg(var)] = next_arg(arg);
  291. *pto++ = '(';
  292. pti = get_arg_in_braces(ses, pti, pto, GET_ALL);
  293. pto += strlen(pto);
  294. *pto++ = ')';
  295. break;
  296. case '[':
  297. case ']':
  298. case '(':
  299. case ')':
  300. case '|':
  301. case '.':
  302. case '?':
  303. case '+':
  304. case '*':
  305. case '^':
  306. *pto++ = '\\';
  307. *pto++ = *pti++;
  308. break;
  309. case '$':
  310. if (pti[1] != DEFAULT_OPEN && !isalnum((int) pti[1]))
  311. {
  312. int i = 0;
  313. while (pti[++i] == '$')
  314. {
  315. continue;
  316. }
  317. if (pti[i])
  318. {
  319. *pto++ = '\\';
  320. }
  321. }
  322. *pto++ = *pti++;
  323. break;
  324. case '%':
  325. switch (pti[1])
  326. {
  327. case '0':
  328. case '1':
  329. case '2':
  330. case '3':
  331. case '4':
  332. case '5':
  333. case '6':
  334. case '7':
  335. case '8':
  336. case '9':
  337. fix = SUB_FIX;
  338. arg = isdigit((int) pti[2]) ? (pti[1] - '0') * 10 + (pti[2] - '0') : pti[1] - '0';
  339. gtd->args[next_arg(var)] = next_arg(arg);
  340. pti += isdigit((int) pti[2]) ? 3 : 2;
  341. strcpy(pto, *pti == 0 ? "(.*)" : "(.*?)");
  342. pto += strlen(pto);
  343. break;
  344. case 'd':
  345. gtd->args[next_arg(var)] = next_arg(arg);
  346. pti += 2;
  347. strcpy(pto, *pti == 0 ? "([0-9]*)" : "([0-9]*?)");
  348. pto += strlen(pto);
  349. break;
  350. case 'D':
  351. gtd->args[next_arg(var)] = next_arg(arg);
  352. pti += 2;
  353. strcpy(pto, *pti == 0 ? "([^0-9]*)" : "([^0-9]*?)");
  354. pto += strlen(pto);
  355. break;
  356. case 'i':
  357. pti += 2;
  358. strcpy(pto, "(?i)");
  359. pto += strlen(pto);
  360. break;
  361. case 'I':
  362. pti += 2;
  363. strcpy(pto, "(?-i)");
  364. pto += strlen(pto);
  365. break;
  366. case 's':
  367. gtd->args[next_arg(var)] = next_arg(arg);
  368. pti += 2;
  369. strcpy(pto, *pti == 0 ? "(\\s*)" : "(\\s*?)");
  370. pto += strlen(pto);
  371. break;
  372. case 'S':
  373. gtd->args[next_arg(var)] = next_arg(arg);
  374. pti += 2;
  375. strcpy(pto, *pti == 0 ? "(\\S*)" : "(\\S*?)");
  376. pto += strlen(pto);
  377. break;
  378. case 'w':
  379. gtd->args[next_arg(var)] = next_arg(arg);
  380. pti += 2;
  381. strcpy(pto, *pti == 0 ? "([a-zA-Z]*)" : "([a-zA-Z]*?)");
  382. pto += strlen(pto);
  383. break;
  384. case 'W':
  385. gtd->args[next_arg(var)] = next_arg(arg);
  386. pti += 2;
  387. strcpy(pto, *pti == 0 ? "([^a-zA-Z]*)" : "([^a-zA-Z]*?)");
  388. pto += strlen(pto);
  389. break;
  390. case '?':
  391. gtd->args[next_arg(var)] = next_arg(arg);
  392. pti += 2;
  393. strcpy(pto, *pti == 0 ? "(.?)" : "(.?" "?)");
  394. pto += strlen(pto);
  395. break;
  396. case '*':
  397. gtd->args[next_arg(var)] = next_arg(arg);
  398. pti += 2;
  399. strcpy(pto, *pti == 0 ? "(.*)" : "(.*?)");
  400. pto += strlen(pto);
  401. break;
  402. case '+':
  403. gtd->args[next_arg(var)] = next_arg(arg);
  404. pti += 2;
  405. strcpy(pto, *pti == 0 ? "(.+)" : "(.+?)");
  406. pto += strlen(pto);
  407. break;
  408. case '.':
  409. gtd->args[next_arg(var)] = next_arg(arg);
  410. pti += 2;
  411. strcpy(pto, "(.)");
  412. pto += strlen(pto);
  413. break;
  414. case '%':
  415. *pto++ = *pti++;
  416. pti++;
  417. break;
  418. case '!':
  419. switch (pti[2])
  420. {
  421. case 'd':
  422. pti += 3;
  423. strcpy(pto, *pti == 0 ? "[0-9]*" : "[0-9]*?");
  424. pto += strlen(pto);
  425. break;
  426. case 'D':
  427. pti += 3;
  428. strcpy(pto, *pti == 0 ? "[^0-9]*" : "[^0-9]*?");
  429. pto += strlen(pto);
  430. break;
  431. case 's':
  432. pti += 3;
  433. strcpy(pto, *pti == 0 ? "\\s*" : "\\s*?");
  434. pto += strlen(pto);
  435. break;
  436. case 'S':
  437. pti += 3;
  438. strcpy(pto, *pti == 0 ? "\\S*" : "\\S*?");
  439. pto += strlen(pto);
  440. break;
  441. case 'w':
  442. pti += 3;
  443. strcpy(pto, *pti == 0 ? "[a-zA-Z]*" : "[a-zA-Z]*?");
  444. pto += strlen(pto);
  445. break;
  446. case 'W':
  447. pti += 3;
  448. strcpy(pto, *pti == 0 ? "[^a-zA-Z]*" : "[^a-zA-Z]*?");
  449. pto += strlen(pto);
  450. break;
  451. case '?':
  452. pti += 3;
  453. strcpy(pto, *pti == 0 ? ".?" : ".?" "?");
  454. pto += strlen(pto);
  455. break;
  456. case '*':
  457. pti += 3;
  458. strcpy(pto, *pti == 0 ? ".*" : ".*?");
  459. pto += strlen(pto);
  460. break;
  461. case '+':
  462. pti += 3;
  463. strcpy(pto, *pti == 0 ? ".+" : ".+?");
  464. pto += strlen(pto);
  465. break;
  466. case '.':
  467. pti += 3;
  468. strcpy(pto, ".");
  469. pto += strlen(pto);
  470. break;
  471. case '{':
  472. pti = get_arg_in_braces(ses, pti+2, pto, GET_ALL);
  473. pto += strlen(pto);
  474. break;
  475. default:
  476. *pto++ = *pti++;
  477. break;
  478. }
  479. break;
  480. default:
  481. *pto++ = *pti++;
  482. break;
  483. }
  484. break;
  485. default:
  486. *pto++ = *pti++;
  487. break;
  488. }
  489. }
  490. *pto = 0;
  491. return regexp_compare(nodepcre, str, out, option, flag + fix);
  492. }
  493. pcre *tintin_regexp_compile(struct session *ses, struct listnode *node, char *exp, int option)
  494. {
  495. char out[BUFFER_SIZE], *pti, *pto;
  496. pti = exp;
  497. pto = out;
  498. if (*pti == '~')
  499. {
  500. pti++;
  501. }
  502. while (*pti == '^')
  503. {
  504. *pto++ = *pti++;
  505. }
  506. while (*pti)
  507. {
  508. if (HAS_BIT(ses->flags, SES_FLAG_BIG5) && *pti & 128 && pti[1] != 0)
  509. {
  510. *pto++ = *pti++;
  511. switch (*pti)
  512. {
  513. case '\\':
  514. case '[':
  515. case ']':
  516. case '(':
  517. case ')':
  518. case '|':
  519. case '.':
  520. case '?':
  521. case '+':
  522. case '*':
  523. case '$':
  524. case '^':
  525. *pto++ = '\\';
  526. break;
  527. }
  528. *pto++ = *pti++;
  529. continue;
  530. }
  531. switch (pti[0])
  532. {
  533. case '\\':
  534. *pto++ = *pti++;
  535. *pto++ = *pti++;
  536. break;
  537. case '{':
  538. *pto++ = '(';
  539. pti = get_arg_in_braces(ses, pti, pto, GET_ALL);
  540. while (*pto)
  541. {
  542. if (pto[0] == '$' || pto[0] == '@')
  543. {
  544. if (pto[1])
  545. {
  546. return NULL;
  547. }
  548. }
  549. pto++;
  550. }
  551. *pto++ = ')';
  552. break;
  553. case '&':
  554. if (pti[1] == DEFAULT_OPEN || isalnum((int) pti[1]) || pti[1] == '&')
  555. {
  556. return NULL;
  557. }
  558. *pto++ = *pti++;
  559. break;
  560. case '@':
  561. if (pti[1] == DEFAULT_OPEN || isalnum((int) pti[1]) || pti[1] == '@')
  562. {
  563. return NULL;
  564. }
  565. *pto++ = *pti++;
  566. break;
  567. case '$':
  568. if (pti[1] == DEFAULT_OPEN || isalnum((int) pti[1]))
  569. {
  570. return NULL;
  571. }
  572. {
  573. int i = 0;
  574. while (pti[++i] == '$')
  575. {
  576. continue;
  577. }
  578. if (pti[i])
  579. {
  580. *pto++ = '\\';
  581. }
  582. }
  583. *pto++ = *pti++;
  584. break;
  585. case '[':
  586. case ']':
  587. case '(':
  588. case ')':
  589. case '|':
  590. case '.':
  591. case '?':
  592. case '+':
  593. case '*':
  594. case '^':
  595. *pto++ = '\\';
  596. *pto++ = *pti++;
  597. break;
  598. case '%':
  599. switch (pti[1])
  600. {
  601. case '0':
  602. case '1':
  603. case '2':
  604. case '3':
  605. case '4':
  606. case '5':
  607. case '6':
  608. case '7':
  609. case '8':
  610. case '9':
  611. pti += isdigit((int) pti[2]) ? 3 : 2;
  612. strcpy(pto, *pti == 0 ? "(.*)" : "(.*?)");
  613. pto += strlen(pto);
  614. break;
  615. case 'd':
  616. pti += 2;
  617. strcpy(pto, *pti == 0 ? "([0-9]*)" : "([0-9]*?)");
  618. pto += strlen(pto);
  619. break;
  620. case 'D':
  621. pti += 2;
  622. strcpy(pto, *pti == 0 ? "([^0-9]*)" : "([^0-9]*?)");
  623. pto += strlen(pto);
  624. break;
  625. case 'i':
  626. pti += 2;
  627. strcpy(pto, "(?i)");
  628. pto += strlen(pto);
  629. break;
  630. case 'I':
  631. pti += 2;
  632. strcpy(pto, "(?-i)");
  633. pto += strlen(pto);
  634. break;
  635. case 's':
  636. pti += 2;
  637. strcpy(pto, *pti == 0 ? "(\\s*)" : "(\\s*?)");
  638. pto += strlen(pto);
  639. break;
  640. case 'S':
  641. pti += 2;
  642. strcpy(pto, *pti == 0 ? "(\\S*)" : "(\\S*?)");
  643. pto += strlen(pto);
  644. break;
  645. case 'w':
  646. pti += 2;
  647. strcpy(pto, *pti == 0 ? "([a-zA-Z]*)" : "([a-zA-Z]*?)");
  648. pto += strlen(pto);
  649. break;
  650. case 'W':
  651. pti += 2;
  652. strcpy(pto, *pti == 0 ? "([^a-zA-Z]*)" : "([^a-zA-Z]*?)");
  653. pto += strlen(pto);
  654. break;
  655. case '?':
  656. pti += 2;
  657. strcpy(pto, *pti == 0 ? "(.?)" : "(.?" "?)");
  658. pto += strlen(pto);
  659. break;
  660. case '*':
  661. pti += 2;
  662. strcpy(pto, *pti == 0 ? "(.*)" : "(.*?)");
  663. pto += strlen(pto);
  664. break;
  665. case '+':
  666. pti += 2;
  667. strcpy(pto, *pti == 0 ? "(.+)" : "(.+?)");
  668. pto += strlen(pto);
  669. break;
  670. case '.':
  671. pti += 2;
  672. strcpy(pto, "(.)");
  673. pto += strlen(pto);
  674. break;
  675. case '%':
  676. *pto++ = *pti++;
  677. pti++;
  678. break;
  679. case '!':
  680. switch (pti[2])
  681. {
  682. case 'd':
  683. pti += 3;
  684. strcpy(pto, *pti == 0 ? "[0-9]*" : "[0-9]*?");
  685. pto += strlen(pto);
  686. break;
  687. case 'D':
  688. pti += 3;
  689. strcpy(pto, *pti == 0 ? "[^0-9]*" : "[^0-9]*?");
  690. pto += strlen(pto);
  691. break;
  692. case 's':
  693. pti += 3;
  694. strcpy(pto, *pti == 0 ? "\\s*" : "\\s*?");
  695. pto += strlen(pto);
  696. break;
  697. case 'S':
  698. pti += 3;
  699. strcpy(pto, *pti == 0 ? "\\S*" : "\\S*?");
  700. pto += strlen(pto);
  701. break;
  702. case 'w':
  703. pti += 3;
  704. strcpy(pto, *pti == 0 ? "[a-zA-Z]*" : "[a-zA-Z]*?");
  705. pto += strlen(pto);
  706. break;
  707. case 'W':
  708. pti += 3;
  709. strcpy(pto, *pti == 0 ? "[^a-zA-Z]*" : "[^a-zA-Z]*?");
  710. pto += strlen(pto);
  711. break;
  712. case '?':
  713. pti += 3;
  714. strcpy(pto, *pti == 0 ? ".?" : ".?" "?");
  715. pto += strlen(pto);
  716. break;
  717. case '*':
  718. pti += 3;
  719. strcpy(pto, *pti == 0 ? ".*" : ".*?");
  720. pto += strlen(pto);
  721. break;
  722. case '+':
  723. pti += 3;
  724. strcpy(pto, *pti == 0 ? ".+" : ".+?");
  725. pto += strlen(pto);
  726. break;
  727. case '.':
  728. pti += 3;
  729. strcpy(pto, ".");
  730. pto += strlen(pto);
  731. break;
  732. case '{':
  733. pti = get_arg_in_braces(ses, pti+2, pto, GET_ALL);
  734. while (*pto)
  735. {
  736. if (pto[0] == '$' || pto[0] == '@')
  737. {
  738. if (pto[1])
  739. {
  740. return NULL;
  741. }
  742. }
  743. pto++;
  744. }
  745. break;
  746. default:
  747. *pto++ = *pti++;
  748. break;
  749. }
  750. break;
  751. default:
  752. *pto++ = *pti++;
  753. break;
  754. }
  755. break;
  756. default:
  757. *pto++ = *pti++;
  758. break;
  759. }
  760. }
  761. *pto = 0;
  762. // printf("debug regex compile (%s)\n", out);
  763. return regexp_compile(out, option);
  764. }
  765. void tintin_macro_compile(char *input, char *output)
  766. {
  767. char *pti, *pto;
  768. pti = input;
  769. pto = output;
  770. if (*pti == '^')
  771. {
  772. pti++;
  773. }
  774. while (*pti)
  775. {
  776. switch (pti[0])
  777. {
  778. case '\\':
  779. switch (pti[1])
  780. {
  781. case 'C':
  782. if (pti[2] == '-' && pti[3])
  783. {
  784. *pto++ = pti[3] - 'a' + 1;
  785. pti += 4;
  786. }
  787. else
  788. {
  789. *pto++ = *pti++;
  790. }
  791. break;
  792. case 'c':
  793. *pto++ = pti[2] % 32;
  794. pti += 3;
  795. break;
  796. case 'a':
  797. *pto++ = ASCII_BEL;
  798. pti += 2;
  799. break;
  800. case 'b':
  801. *pto++ = 127;
  802. pti += 2;
  803. break;
  804. case 'e':
  805. *pto++ = ASCII_ESC;
  806. pti += 2;
  807. break;
  808. case 'r':
  809. *pto++ = ASCII_CR;
  810. pti += 2;
  811. break;
  812. case 't':
  813. *pto++ = ASCII_HTAB;
  814. pti += 2;
  815. break;
  816. case 'x':
  817. if (pti[2] && pti[3])
  818. {
  819. *pto++ = hex_number_8bit(&pti[2]);
  820. pti += 4;
  821. }
  822. else
  823. {
  824. *pto++ = *pti++;
  825. }
  826. break;
  827. default:
  828. *pto++ = *pti++;
  829. break;
  830. }
  831. break;
  832. default:
  833. *pto++ = *pti++;
  834. break;
  835. }
  836. }
  837. *pto = 0;
  838. }