| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200 |
- /******************************************************************************
- * This file is part of TinTin++ *
- * *
- * Copyright 2004-2020 Igor van den Hoven *
- * *
- * TinTin++ is free software; you can redistribute it and/or modify *
- * it under the terms of the GNU General Public License as published by *
- * the Free Software Foundation; either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU General Public License for more details. *
- * *
- * You should have received a copy of the GNU General Public License *
- * along with TinTin++. If not, see https://www.gnu.org/licenses. *
- ******************************************************************************/
- /******************************************************************************
- * T I N T I N + + *
- * *
- * coded by Igor van den Hoven 2004 *
- ******************************************************************************/
- #include <sys/types.h>
- #include <pcre.h>
- #include "tintin.h"
- int match(struct session *ses, char *str, char *exp, int sub)
- {
- char expbuf[BUFFER_SIZE];
- sprintf(expbuf, "\\A%s\\Z", exp);
- substitute(ses, expbuf, expbuf, sub);
- return tintin_regexp(ses, NULL, str, expbuf, 0, 0);
- }
- int find(struct session *ses, char *str, char *exp, int sub, int flag)
- {
- if (HAS_BIT(sub, SUB_VAR|SUB_FUN))
- {
- char expbuf[BUFFER_SIZE], strbuf[BUFFER_SIZE];
- substitute(ses, str, strbuf, SUB_VAR|SUB_FUN);
- substitute(ses, exp, expbuf, SUB_VAR|SUB_FUN);
- return tintin_regexp(ses, NULL, strbuf, expbuf, 0, flag);
- }
- else
- {
- return tintin_regexp(ses, NULL, str, exp, 0, flag);
- }
- }
- DO_COMMAND(do_regexp)
- {
- char arg1[BUFFER_SIZE], arg2[BUFFER_SIZE], is_t[BUFFER_SIZE], is_f[BUFFER_SIZE];
- arg = sub_arg_in_braces(ses, arg, arg1, GET_ONE, SUB_VAR|SUB_FUN);
- arg = sub_arg_in_braces(ses, arg, arg2, GET_ONE, SUB_VAR|SUB_FUN);
- arg = get_arg_in_braces(ses, arg, is_t, GET_ALL);
- arg = get_arg_in_braces(ses, arg, is_f, GET_ALL);
- if (*is_t == 0)
- {
- show_error(ses, LIST_COMMAND, "SYNTAX: #REGEXP {string} {expression} {true} {false}.");
- }
- else
- {
- if (tintin_regexp(ses, NULL, arg1, arg2, 0, REGEX_FLAG_CMD))
- {
- substitute(ses, is_t, is_t, SUB_CMD);
- ses = script_driver(ses, LIST_COMMAND, is_t);
- }
- else if (*is_f)
- {
- ses = script_driver(ses, LIST_COMMAND, is_f);
- }
- }
- return ses;
- }
- int regexp_compare(struct session *ses, pcre *nodepcre, char *str, char *exp, int option, int flag)
- {
- pcre *regex;
- int i, j, matches, match[303];
- if (nodepcre == NULL)
- {
- regex = regexp_compile(ses, exp, option);
- }
- else
- {
- regex = nodepcre;
- }
- if (regex == NULL)
- {
- return FALSE;
- }
- matches = pcre_exec(regex, NULL, str, strlen(str), 0, 0, match, 303);
- if (matches <= 0)
- {
- if (nodepcre == NULL)
- {
- free(regex);
- }
- return FALSE;
- }
- // REGEX_FLAG_FIX handles %1 to %99 usage. Backward compatibility.
- switch (flag)
- {
- case REGEX_FLAG_CMD:
- for (i = 0 ; i < matches ; i++)
- {
- gtd->cmds[i] = restringf(gtd->cmds[i], "%.*s", match[i*2+1] - match[i*2], &str[match[i*2]]);
- }
- break;
- case REGEX_FLAG_CMD + REGEX_FLAG_FIX:
- for (i = 0 ; i < matches ; i++)
- {
- j = gtd->args[i];
- gtd->cmds[j] = restringf(gtd->cmds[j], "%.*s", match[i*2+1] - match[i*2], &str[match[i*2]]);
- }
- break;
- case REGEX_FLAG_ARG:
- for (i = 0 ; i < matches ; i++)
- {
- gtd->vars[i] = restringf(gtd->vars[i], "%.*s", match[i*2+1] - match[i*2], &str[match[i*2]]);
- }
- break;
- case REGEX_FLAG_ARG + REGEX_FLAG_FIX:
- for (i = 0 ; i < matches ; i++)
- {
- j = gtd->args[i];
- gtd->vars[j] = restringf(gtd->vars[j], "%.*s", match[i*2+1] - match[i*2], &str[match[i*2]]);
- }
- break;
- }
- if (nodepcre == NULL)
- {
- free(regex);
- }
- return TRUE;
- }
- pcre *regexp_compile(struct session *ses, char *exp, int option)
- {
- const char *error;
- int i;
- /*
- if (HAS_BIT(ses->charset, CHARSET_FLAG_UTF8))
- {
- option |= PCRE_UTF8|PCRE_NO_UTF8_CHECK;
- }
- */
- return pcre_compile(exp, option, &error, &i, NULL);
- }
- /******************************************************************************
- * Calls tintin_regexp checking if the string matches, and automatically fills *
- * in the text represented by the wildcards on success. *
- ******************************************************************************/
- int check_one_regexp(struct session *ses, struct listnode *node, char *line, char *original, int option)
- {
- char *exp, *str;
- if (node->regex == NULL)
- {
- char result[BUFFER_SIZE];
- substitute(ses, node->arg1, result, SUB_VAR|SUB_FUN);
- exp = result;
- }
- else
- {
- exp = node->arg1;
- }
- if (*node->arg1 == '~')
- {
- exp++;
- str = original;
- }
- else
- {
- str = line;
- }
- return tintin_regexp(ses, node->regex, str, exp, option, REGEX_FLAG_ARG);
- }
- /*
- Keep synched with tintin_regexp and tintin_regexp_compile
- */
- int get_regex_range(char *in, char *out, int *var, int *arg)
- {
- char *pti, *pto, *ptr, range[BUFFER_SIZE];
- pto = out;
- pti = in;
- ptr = range;
- while (*pti)
- {
- switch (*pti)
- {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- *ptr++ = *pti++;
- continue;
- case '.':
- if (pti[1] != '.')
- {
- goto end;
- }
- if (ptr == range)
- {
- *ptr++ = '0';
- }
- *ptr++ = ',';
- pti += 2;
- continue;
- case 'a':
- pto += sprintf(pto, "(.");
- break;
- case 'A':
- pto += sprintf(pto, "(\\n");
- break;
- case 'd':
- pto += sprintf(pto, "([0-9]");
- break;
- case 'D':
- pto += sprintf(pto, "([^0-9]");
- break;
- case 'p':
- pto += sprintf(pto, "([\\x20-\\xfe]");
- break;
- case 'P':
- pto += sprintf(pto, "([^\\x20-\\xfe]");
- break;
- case 's':
- pto += sprintf(pto, "(\\s");
- break;
- case 'S':
- pto += sprintf(pto, "(\\S");
- break;
- case 'u':
- pto += sprintf(pto, "((?:[\\xC0-\\xFE][\\x80-\\xC0]{1,3})");
- break;
- case 'U':
- pto += sprintf(pto, "([\\x00-\\x7F\\xFF]");
- break;
- case 'w':
- pto += sprintf(pto, "([a-zA-Z]");
- break;
- case 'W':
- pto += sprintf(pto, "([^a-zA-Z]");
- break;
- default:
- goto end;
- }
- *ptr = 0;
- pti++;
- pto += sprintf(pto, "{%s}%s", range, *pti ? "?)" : ")");
- return pti - in;
- }
- end:
- if (var)
- {
- gtd->args[next_arg(*var)] = next_arg(*arg);
- }
- strcpy(out, in[2] == 0 ? "(.+)" : "(.+?)");
- return 0;
- }
- int tintin_regexp_check(struct session *ses, char *exp)
- {
- if (*exp == '^')
- {
- return TRUE;
- }
- while (*exp)
- {
- if (HAS_BIT(ses->charset, CHARSET_FLAG_EUC) && is_euc_head(ses, exp))
- {
- exp += 2;
- continue;
- }
- switch (exp[0])
- {
- case '\\':
- case '{':
- return TRUE;
- case '$':
- if (exp[1] == 0)
- {
- return TRUE;
- }
- break;
- case '%':
- switch (exp[1])
- {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- case 'a':
- case 'A':
- case 'd':
- case 'D':
- case 'i':
- case 'I':
- case 'p':
- case 'P':
- case 's':
- case 'S':
- case 'u':
- case 'U':
- case 'w':
- case 'W':
- case '?':
- case '*':
- case '+':
- case '.':
- case '%':
- return TRUE;
- case '!':
- switch (exp[2])
- {
- case 'a':
- case 'A':
- case 'd':
- case 'D':
- case 'p':
- case 'P':
- case 's':
- case 'S':
- case 'u':
- case 'U':
- case 'w':
- case 'W':
- case '?':
- case '*':
- case '+':
- case '.':
- case '{':
- return TRUE;
- }
- break;
- }
- break;
- }
- exp++;
- }
- return FALSE;
- }
- int tintin_regexp(struct session *ses, pcre *nodepcre, char *str, char *exp, int option, int flag)
- {
- char out[BUFFER_SIZE], *pti, *pto;
- int arg = 1, var = 1, fix = 0;
- pti = exp;
- pto = out;
- while (*pti == '^')
- {
- *pto++ = *pti++;
- }
- while (*pti)
- {
- if (HAS_BIT(ses->charset, CHARSET_FLAG_EUC) && is_euc_head(ses, pti))
- {
- *pto++ = *pti++;
- switch (*pti)
- {
- case '\\':
- case '[':
- case ']':
- case '(':
- case ')':
- case '|':
- case '.':
- case '?':
- case '+':
- case '*':
- case '$':
- case '^':
- *pto++ = '\\';
- break;
- }
- *pto++ = *pti++;
- continue;
- }
- switch (pti[0])
- {
- case '\\':
- *pto++ = *pti++;
- *pto++ = *pti++;
- break;
- case '{':
- gtd->args[next_arg(var)] = next_arg(arg);
- *pto++ = '(';
- pti = get_arg_in_braces(ses, pti, pto, GET_ALL);
- pto += strlen(pto);
- *pto++ = ')';
- break;
- case '[':
- case ']':
- case '(':
- case ')':
- case '|':
- case '.':
- case '?':
- case '+':
- case '*':
- case '^':
- *pto++ = '\\';
- *pto++ = *pti++;
- break;
- case '$':
- if (pti[1] != DEFAULT_OPEN && !isalnum((int) pti[1]))
- {
- int i = 0;
- while (pti[++i] == '$')
- {
- continue;
- }
- if (pti[i])
- {
- *pto++ = '\\';
- }
- }
- *pto++ = *pti++;
- break;
- case '%':
- switch (pti[1])
- {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- fix = REGEX_FLAG_FIX;
- arg = isdigit((int) pti[2]) ? (pti[1] - '0') * 10 + (pti[2] - '0') : pti[1] - '0';
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += isdigit((int) pti[2]) ? 3 : 2;
- strcpy(pto, *pti == 0 ? "(.*)" : "(.*?)");
- pto += strlen(pto);
- break;
- case 'a':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "([^\\n]*)" : "([^\\n]*?)");
- pto += strlen(pto);
- break;
- case 'A':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "(\\n*)" : "(\\n*?)");
- pto += strlen(pto);
- break;
- case 'd':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "([0-9]*)" : "([0-9]*?)");
- pto += strlen(pto);
- break;
- case 'D':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "([^0-9]*)" : "([^0-9]*?)");
- pto += strlen(pto);
- break;
- case 'i':
- pti += 2;
- strcpy(pto, "(?i)");
- pto += strlen(pto);
- break;
- case 'I':
- pti += 2;
- strcpy(pto, "(?-i)");
- pto += strlen(pto);
- break;
- case 'p':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- pto += sprintf(pto, "%s", *pti == 0 ? "([\\x20-\\xfe]*)" : "([\\x20-\\xfe]*?)");
- break;
- case 'P':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- pto += sprintf(pto, "%s", *pti == 0 ? "([^\\x20-\\xfe]*)" : "([^\\x20-\\xfe]*?)");
- break;
- case 's':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "(\\s*)" : "(\\s*?)");
- pto += strlen(pto);
- break;
- case 'S':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "(\\S*)" : "(\\S*?)");
- pto += strlen(pto);
- break;
- case 'u':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "((?:[\\x00-\\x7F|\\xC0-\\xFE][\\x80-\\xC0]{1,3})*)" : "((?:[\\xC0-\\xFE][\\x80-\\xC0]{1,3})*?)");
- pto += strlen(pto);
- break;
- case 'U':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "(^[\xFF]*)" : "([\\x00-\\x7F\\xFF]*?)");
- pto += strlen(pto);
- break;
- case 'w':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "([a-zA-Z]*)" : "([a-zA-Z]*?)");
- pto += strlen(pto);
- break;
- case 'W':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "([^a-zA-Z]*)" : "([^a-zA-Z]*?)");
- pto += strlen(pto);
- break;
- case '*':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "(.*)" : "(.*?)");
- pto += strlen(pto);
- break;
- case '+':
- pti += 2 + get_regex_range(&pti[2], pto, &var, &arg);
- pto += strlen(pto);
- break;
- case '%':
- *pto++ = *pti++;
- pti++;
- break;
- case '.':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, "(.)");
- pto += strlen(pto);
- break;
- case '?':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "(.?)" : "(.?" "?)");
- pto += strlen(pto);
- break;
- case '!':
- switch (pti[2])
- {
- case 'a':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "[^\\n]*" : "[^\\n]*?");
- pto += strlen(pto);
- break;
- case 'A':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 2;
- strcpy(pto, *pti == 0 ? "\\n*" : "\\n*?");
- pto += strlen(pto);
- break;
- case 'd':
- pti += 3;
- strcpy(pto, *pti == 0 ? "[0-9]*" : "[0-9]*?");
- pto += strlen(pto);
- break;
- case 'D':
- pti += 3;
- strcpy(pto, *pti == 0 ? "[^0-9]*" : "[^0-9]*?");
- pto += strlen(pto);
- break;
- case 'p':
- pti += 3;
- pto += sprintf(pto, "%s", *pti == 0 ? "[\\x20-\\xfe]*" : "[\\x20-\\xfe]*?");
- break;
- case 'P':
- pti += 3;
- pto += sprintf(pto, "%s", *pti == 0 ? "[^\\x20-\\xfe]*" : "[^\\x20-\\xfe]*?");
- break;
- case 's':
- pti += 3;
- strcpy(pto, *pti == 0 ? "\\s*" : "\\s*?");
- pto += strlen(pto);
- break;
- case 'S':
- pti += 3;
- strcpy(pto, *pti == 0 ? "\\S*" : "\\S*?");
- pto += strlen(pto);
- break;
- case 'u':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 3;
- strcpy(pto, *pti == 0 ? "(?:[\\xC0-\\xFE][\\x80-\\xC0]{1,3})*" : "(?:[\\xC0-\\xFE][\\x80-\\xC0]{1,3})*?");
- pto += strlen(pto);
- break;
- case 'U':
- gtd->args[next_arg(var)] = next_arg(arg);
- pti += 3;
- strcpy(pto, *pti == 0 ? "[\\x00-\\x7F\\xFF]*" : "[\\x00-\\x7F\\xFF]*?");
- pto += strlen(pto);
- break;
- case 'w':
- pti += 3;
- strcpy(pto, *pti == 0 ? "[a-zA-Z]*" : "[a-zA-Z]*?");
- pto += strlen(pto);
- break;
- case 'W':
- pti += 3;
- strcpy(pto, *pti == 0 ? "[^a-zA-Z]*" : "[^a-zA-Z]*?");
- pto += strlen(pto);
- break;
- case '?':
- pti += 3;
- strcpy(pto, *pti == 0 ? ".?" : ".?" "?");
- pto += strlen(pto);
- break;
- case '*':
- pti += 3;
- strcpy(pto, *pti == 0 ? ".*" : ".*?");
- pto += strlen(pto);
- break;
- case '+':
- pti += 3 + get_regex_range(&pti[3], pto, NULL, NULL);
- pto += strlen(pto);
- break;
- case '.':
- pti += 3;
- strcpy(pto, ".");
- pto += strlen(pto);
- break;
- case '{':
- pti = get_arg_in_braces(ses, pti+2, pto, GET_ALL);
- pto += strlen(pto);
- break;
- default:
- *pto++ = *pti++;
- break;
- }
- break;
- default:
- *pto++ = *pti++;
- break;
- }
- break;
- default:
- *pto++ = *pti++;
- break;
- }
- }
- *pto = 0;
- return regexp_compare(ses, nodepcre, str, out, option, flag + fix);
- }
- pcre *tintin_regexp_compile(struct session *ses, struct listnode *node, char *exp, int option)
- {
- char out[BUFFER_SIZE], *pti, *pto;
- pti = exp;
- pto = out;
- if (*pti == '~')
- {
- pti++;
- }
- while (*pti == '^')
- {
- *pto++ = *pti++;
- }
- while (*pti)
- {
- if (HAS_BIT(ses->charset, CHARSET_FLAG_EUC) && is_euc_head(ses, pti))
- {
- *pto++ = *pti++;
- switch (*pti)
- {
- case '\\':
- case '[':
- case ']':
- case '(':
- case ')':
- case '|':
- case '.':
- case '?':
- case '+':
- case '*':
- case '$':
- case '^':
- *pto++ = '\\';
- break;
- }
- *pto++ = *pti++;
- continue;
- }
- switch (pti[0])
- {
- case '\\':
- *pto++ = *pti++;
- *pto++ = *pti++;
- break;
- case '{':
- *pto++ = '(';
- pti = get_arg_in_braces(ses, pti, pto, GET_ALL);
- while (*pto)
- {
- if (pto[0] == '$' || pto[0] == '@')
- {
- if (pto[1])
- {
- return NULL;
- }
- }
- pto++;
- }
- *pto++ = ')';
- break;
- case '&':
- if (pti[1] == DEFAULT_OPEN || isalnum((int) pti[1]) || pti[1] == '&')
- {
- return NULL;
- }
- *pto++ = *pti++;
- break;
- case '@':
- if (pti[1] == DEFAULT_OPEN || isalnum((int) pti[1]) || pti[1] == '@')
- {
- return NULL;
- }
- *pto++ = *pti++;
- break;
- case '$':
- if (pti[1] == DEFAULT_OPEN || isalnum((int) pti[1]))
- {
- return NULL;
- }
- {
- int i = 0;
- while (pti[++i] == '$')
- {
- continue;
- }
- if (pti[i])
- {
- *pto++ = '\\';
- }
- }
- *pto++ = *pti++;
- break;
- case '[':
- case ']':
- case '(':
- case ')':
- case '|':
- case '.':
- case '?':
- case '+':
- case '*':
- case '^':
- *pto++ = '\\';
- *pto++ = *pti++;
- break;
- case '%':
- switch (pti[1])
- {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- pti += isdigit((int) pti[2]) ? 3 : 2;
- strcpy(pto, *pti == 0 ? "(.*)" : "(.*?)");
- pto += strlen(pto);
- break;
- case 'd':
- pti += 2;
- strcpy(pto, *pti == 0 ? "([0-9]*)" : "([0-9]*?)");
- pto += strlen(pto);
- break;
- case 'D':
- pti += 2;
- strcpy(pto, *pti == 0 ? "([^0-9]*)" : "([^0-9]*?)");
- pto += strlen(pto);
- break;
- case 'i':
- pti += 2;
- strcpy(pto, "(?i)");
- pto += strlen(pto);
- break;
- case 'I':
- pti += 2;
- strcpy(pto, "(?-i)");
- pto += strlen(pto);
- break;
- case 'p':
- pti += 2;
- pto += sprintf(pto, "%s", *pti == 0 ? "([\\x20-\\xfe]*)" : "([\\x20-\\xfe]*?)");
- break;
- case 'P':
- pti += 2;
- pto += sprintf(pto, "%s", *pti == 0 ? "([^\\x20-\\xfe]*)" : "([^\\x20-\\xfe]*?)");
- break;
-
- case 's':
- pti += 2;
- strcpy(pto, *pti == 0 ? "(\\s*)" : "(\\s*?)");
- pto += strlen(pto);
- break;
- case 'S':
- pti += 2;
- strcpy(pto, *pti == 0 ? "(\\S*)" : "(\\S*?)");
- pto += strlen(pto);
- break;
- case 'u':
- pti += 2;
- strcpy(pto, *pti == 0 ? "((?:[\\xC0-\\xFE][\\x80-\\xC0]{1,3})*)" : "((?:[\\xC0-\\xFE][\\x80-\\xC0]{1,3})*?)");
- pto += strlen(pto);
- break;
- case 'U':
- pti += 2;
- strcpy(pto, *pti == 0 ? "([\\x00-\\x7F\\xFF]*)" : "([\\x00-\\x7F\\xFF]*?)");
- pto += strlen(pto);
- break;
- case 'w':
- pti += 2;
- strcpy(pto, *pti == 0 ? "([a-zA-Z]*)" : "([a-zA-Z]*?)");
- pto += strlen(pto);
- break;
- case 'W':
- pti += 2;
- strcpy(pto, *pti == 0 ? "([^a-zA-Z]*)" : "([^a-zA-Z]*?)");
- pto += strlen(pto);
- break;
- case '?':
- pti += 2;
- strcpy(pto, *pti == 0 ? "(.?)" : "(.?" "?)");
- pto += strlen(pto);
- break;
- case '*':
- pti += 2;
- strcpy(pto, *pti == 0 ? "(.*)" : "(.*?)");
- pto += strlen(pto);
- break;
- case '+':
- pti += 2 + get_regex_range(&pti[2], pto, NULL, NULL);
- pto += strlen(pto);
- break;
- case '.':
- pti += 2;
- strcpy(pto, "(.)");
- pto += strlen(pto);
- break;
- case '%':
- *pto++ = *pti++;
- pti++;
- break;
- case '!':
- switch (pti[2])
- {
- case 'd':
- pti += 3;
- strcpy(pto, *pti == 0 ? "[0-9]*" : "[0-9]*?");
- pto += strlen(pto);
- break;
- case 'D':
- pti += 3;
- strcpy(pto, *pti == 0 ? "[^0-9]*" : "[^0-9]*?");
- pto += strlen(pto);
- break;
- case 'p':
- pti += 3;
- pto += sprintf(pto, "%s", *pti == 0 ? "[\\x21-\\x7E]*" : "[\\x21-\\x7E]?*");
- break;
- case 'P':
- pti += 3;
- pto += sprintf(pto, "%s", *pti == 0 ? "[^\\x20-\\xfe]*" : "[^\\x20-\\xfe]*?");
- break;
- case 's':
- pti += 3;
- strcpy(pto, *pti == 0 ? "\\s*" : "\\s*?");
- pto += strlen(pto);
- break;
- case 'S':
- pti += 3;
- strcpy(pto, *pti == 0 ? "\\S*" : "\\S*?");
- pto += strlen(pto);
- break;
- case 'w':
- pti += 3;
- strcpy(pto, *pti == 0 ? "[a-zA-Z]*" : "[a-zA-Z]*?");
- pto += strlen(pto);
- break;
- case 'W':
- pti += 3;
- strcpy(pto, *pti == 0 ? "[^a-zA-Z]*" : "[^a-zA-Z]*?");
- pto += strlen(pto);
- break;
- case '?':
- pti += 3;
- strcpy(pto, *pti == 0 ? ".?" : ".?" "?");
- pto += strlen(pto);
- break;
- case '*':
- pti += 3;
- strcpy(pto, *pti == 0 ? ".*" : ".*?");
- pto += strlen(pto);
- break;
- case '+':
- pti += 3 + get_regex_range(&pti[3], pto, NULL, NULL);
- pto += strlen(pto);
- break;
- case '.':
- pti += 3;
- strcpy(pto, ".");
- pto += strlen(pto);
- break;
- case '{':
- pti = get_arg_in_braces(ses, pti+2, pto, GET_ALL);
- while (*pto)
- {
- if (pto[0] == '$' || pto[0] == '@')
- {
- if (pto[1])
- {
- return NULL;
- }
- }
- pto++;
- }
- break;
- default:
- *pto++ = *pti++;
- break;
- }
- break;
- default:
- *pto++ = *pti++;
- break;
- }
- break;
- default:
- *pto++ = *pti++;
- break;
- }
- }
- *pto = 0;
- return regexp_compile(ses, out, option);
- }
- void tintin_macro_compile(char *input, char *output)
- {
- char *pti, *pto;
- pti = input;
- pto = output;
- if (*pti == '^')
- {
- pti++;
- }
- while (*pti)
- {
- switch (pti[0])
- {
- case '\\':
- switch (pti[1])
- {
- case 'C':
- if (pti[2] == '-' && pti[3])
- {
- *pto++ = pti[3] - 'a' + 1;
- pti += 4;
- }
- else
- {
- *pto++ = *pti++;
- }
- break;
- case 'c':
- *pto++ = pti[2] % 32;
- pti += 3;
- break;
- case 'a':
- *pto++ = ASCII_BEL;
- pti += 2;
- break;
- case 'b':
- *pto++ = 127;
- pti += 2;
- break;
- case 'e':
- *pto++ = ASCII_ESC;
- pti += 2;
- break;
- case 'r':
- *pto++ = ASCII_CR;
- pti += 2;
- break;
- case 't':
- *pto++ = ASCII_HTAB;
- pti += 2;
- break;
- case 'x':
- if (pti[2] && pti[3])
- {
- *pto++ = hex_number_8bit(&pti[2]);
- pti += 4;
- }
- else
- {
- *pto++ = *pti++;
- }
- break;
- default:
- *pto++ = *pti++;
- break;
- }
- break;
- default:
- *pto++ = *pti++;
- break;
- }
- }
- *pto = 0;
- }
|