[prev in list] [next in list] [prev in thread] [next in thread]
List: busybox
Subject: vi: substitution command (:s) REGEX implementation
From: Andrey Dobrovolsky <andrey.dobrovolsky.odessa () gmail ! com>
Date: 2021-02-09 23:07:34
Message-ID: CAN5FJ7eQGUngkB0wz6LNOZJ4kCPKWe_+OW-zDBcPhBGGsZhoMA () mail ! gmail ! com
[Download RAW message or body]
commit 87baae194e360e67865aeb3bc7a47da600607b04
Author: AndreyDobrovolskyOdessa <andrey.dobrovolsky.odessa@gmail.com>
Date: Wed Feb 10 00:17:06 2021 +0200
Implementing :s command utilizing full REGEXes power
Additional code and variables conditioned by ENABLE_FEATURE_VI_REGEX_SEARCH.
Added:
function do_subtitution()
couple of local variables in colon() function
diff --git a/editors/vi.c b/editors/vi.c
index 458ca62..7c86f10 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2354,6 +2354,61 @@ static void setops(const char *args, const char
*opname, int flg_no,
#endif /* FEATURE_VI_COLON */
+
+#if ENABLE_FEATURE_VI_REGEX_SEARCH
+
+#define MAX_SUBPATTERN 10 // subpatterns \0 .. \9
+#define MAX_SUBSTITUTED_LEN (SIZE_MAX / 2)
+
+/*
+** orig - input string
+** regm - array of subpatterns bounds
+** s - replace pattern
+** result - buffer to place the substitution result
+*/
+
+static size_t do_substitution(char *orig, regmatch_t *regm, char *s,
char *result)
+{
+char *from; // memcpy source pointer
+ int step; // number of s symbols proceeded
+ size_t len=0, total_len=0;
+ regmatch_t *cur_match;
+
+ while( *s ){
+ len = 1; // assuming we will copy the single char
form replace pattern
+ from = s;
+ step = 1;
+ if((*s == '\\') && (s[1] >= '0') && (s[1] < ('0' +
MAX_SUBPATTERN))){
+ cur_match = regm + (s[1] - '0');
+ if(cur_match->rm_so >= 0){
+ len = cur_match->rm_eo - cur_match->rm_so ;
+ if(len >= MAX_SUBSTITUTED_LEN)
+ return (len);
+ from = orig + cur_match->rm_so;
+ step = 2; // subpattern envocation
consists 2 chars
+ }
+ }
+ total_len += len;
+ if (total_len >= MAX_SUBSTITUTED_LEN)
+ break;
+ if( result ){
+ memcpy( result, from, len);
+ result += len;
+ *result = '\0';
+ }
+ s += step;
+ }
+
+ return (total_len);
+}
+
+// do_substitution dry run
+
+#define get_substituted_size(x,y) do_substitution(NULL,x,y,NULL)
+
+#endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
+
// buf must be no longer than MAX_INPUT_LEN!
static void colon(char *buf)
{
@@ -2759,6 +2814,14 @@ static void colon(char *buf)
int dont_chain_first_item = ALLOW_UNDO;
# endif
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_t preg;
+ int reg_err;
+ int cflags;
+ regmatch_t regmatch[MAX_SUBPATTERN];
+ char *Rsubst = NULL, *Rorig;
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
// F points to the "find" pattern
// R points to the "replace" pattern
// replace the cmd line delimiters "/" with NULs
@@ -2784,11 +2847,46 @@ static void colon(char *buf)
if (e < 0)
e = b; // maybe :.s/foo/bar/
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ Rorig = R;
+ cflags=REG_EXTENDED;
+ if (ignorecase)
+ cflags = REG_EXTENDED | REG_ICASE;
+ memset(&preg, 0, sizeof(preg));
+ reg_err=regcomp(&preg,F,cflags);
+ if (reg_err) {
+ status_line(":s bad search pattern");
+ goto regex_search_end;
+ }
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
for (i = b; i <= e; i++) { // so, :20,23 s \0
find \0 replace \0
char *ls = q; // orig line start
char *found;
vc4:
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ found = NULL;
+ regmatch[0].rm_so = 0;
+ regmatch[0].rm_eo = end_line(q) - q;
+
if(regexec(&preg,q,MAX_SUBPATTERN,regmatch,REG_STARTEND)==0){
+ found = q + regmatch[0].rm_so;
+ len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
+ len_R = get_substituted_size(regmatch,Rorig);
+ if(len_R > MAX_SUBSTITUTED_LEN){
+ status_line("Substituted
string length exceeded limit.");
+ break;
+ }
+ Rsubst = xmalloc(len_R + 1);
+ do_substitution(q,regmatch,Rorig,Rsubst);
+ R = Rsubst;
+ }
+# else
+
found = char_search(q, F, (FORWARD << 1) |
LIMITED); // search cur line only for "find"
+
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
if (found) {
uintptr_t bias;
// we found the "find" pattern - delete it
@@ -2802,6 +2900,12 @@ static void colon(char *buf)
found += bias;
ls += bias;
//q += bias; - recalculated anyway
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ free(Rsubst);
+ Rsubst=NULL;
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
// check for "global" :s/foo/bar/g
if (gflag == 'g') {
if ((found + len_R) < end_line(ls)) {
@@ -2812,6 +2916,12 @@ static void colon(char *buf)
}
q = next_line(ls);
}
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_search_end:
+ regfree(&preg);
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
# endif /* FEATURE_VI_SEARCH */
} else if (strncmp(cmd, "version", i) == 0) { // show software version
status_line(BB_VER);
Best regards!
["vi_regex_subst" (application/octet-stream)]
_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic