micropython-lib: move to PCRE2
authorChristian Marangi <redacted>
Thu, 28 Sep 2023 21:51:28 +0000 (23:51 +0200)
committerJosef Schlehofer <redacted>
Sun, 22 Oct 2023 11:26:14 +0000 (13:26 +0200)
Add pending patch converting the package to PCRE2.

Signed-off-by: Christian Marangi <redacted>
lang/python/micropython-lib/Makefile
lang/python/micropython-lib/patches/004-unix-ffi-re-convert-to-PCRE2.patch [new file with mode: 0644]

index 7bd167898a871edee62cba3593449ba041286d96..490e6e2e7d3549f07d59a50799895cb352275c5f 100644 (file)
@@ -8,7 +8,7 @@
 include $(TOPDIR)/rules.mk
 
 PKG_NAME:=micropython-lib
-PKG_RELEASE:=1
+PKG_RELEASE:=2
 
 PKG_SOURCE_PROTO:=git
 PKG_SOURCE_URL:=https://github.com/micropython/micropython-lib.git
@@ -50,7 +50,7 @@ endef
 define Package/micropython-lib-unix
 $(call Package/micropython-lib/Default)
   TITLE+= - Unix port packages
-  DEPENDS:=+micropython +libpcre +librt +libsqlite3
+  DEPENDS:=+micropython +libpcre2 +librt +libsqlite3
 endef
 
 define Package/micropython-lib-unix-src
diff --git a/lang/python/micropython-lib/patches/004-unix-ffi-re-convert-to-PCRE2.patch b/lang/python/micropython-lib/patches/004-unix-ffi-re-convert-to-PCRE2.patch
new file mode 100644 (file)
index 0000000..6bf539b
--- /dev/null
@@ -0,0 +1,148 @@
+From 1cbe8c4dd653336c5766dfd75eb379ad37f04249 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Thu, 28 Sep 2023 20:59:26 +0200
+Subject: [PATCH] unix-ffi: re: convert to PCRE2
+
+PCRE is marked as EOL and won't receive any new security update.
+
+Convert the re module to PCRE2 API to enforce security.
+Additional dependency is now needed with uctypes due to changes in how
+PCRE2 return the match_data in a pointer and require special handling.
+
+The converted module is tested with the test_re.py with no regression.
+
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+---
+ unix-ffi/re/re.py | 73 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 48 insertions(+), 25 deletions(-)
+
+--- a/unix-ffi/re/re.py
++++ b/unix-ffi/re/re.py
+@@ -1,36 +1,55 @@
+ import sys
+ import ffilib
+ import array
++import uctypes
++pcre2 = ffilib.open("libpcre2-8")
+-pcre = ffilib.open("libpcre")
+-
+-#       pcre *pcre_compile(const char *pattern, int options,
+-#            const char **errptr, int *erroffset,
+-#            const unsigned char *tableptr);
+-pcre_compile = pcre.func("p", "pcre_compile", "sipps")
+-
+-#       int pcre_exec(const pcre *code, const pcre_extra *extra,
+-#            const char *subject, int length, int startoffset,
+-#            int options, int *ovector, int ovecsize);
+-pcre_exec = pcre.func("i", "pcre_exec", "PPsiiipi")
+-
+-#       int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
+-#            int what, void *where);
+-pcre_fullinfo = pcre.func("i", "pcre_fullinfo", "PPip")
+-
+-
+-IGNORECASE = I = 1
+-MULTILINE = M = 2
+-DOTALL = S = 4
+-VERBOSE = X = 8
+-PCRE_ANCHORED = 0x10
++#       pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
++#           uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
++#           pcre2_compile_context *ccontext);
++pcre2_compile = pcre2.func("p", "pcre2_compile_8", "siippp")
++
++#       int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
++#           PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options,
++#           pcre2_match_data *match_data, pcre2_match_context *mcontext);
++pcre2_match = pcre2.func("i", "pcre2_match_8", "Psiiipp")
++
++#       int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
++#           void *where);
++pcre2_pattern_info = pcre2.func("i", "pcre2_pattern_info_8", "Pip")
++
++#       PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
++pcre2_get_ovector_pointer = pcre2.func("p", "pcre2_get_ovector_pointer_8", "p")
++
++#       pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code,
++#           pcre2_general_context *gcontext);
++pcre2_match_data_create_from_pattern = pcre2.func(
++    "p", "pcre2_match_data_create_from_pattern_8", "Pp"
++)
++
++# PCRE2_SIZE that is of type size_t.
++# Use ULONG as type to support both 32bit and 64bit.
++PCRE2_SIZE_SIZE = uctypes.sizeof({"field": 0 | uctypes.ULONG})
++PCRE2_SIZE_TYPE = "L"
++
++# Real value in pcre2.h is 0xFFFFFFFF for 32bit and
++# 0x0xFFFFFFFFFFFFFFFF for 64bit that is equivalent
++# to -1
++PCRE2_ZERO_TERMINATED = -1
++
++
++IGNORECASE = I = 0x8
++MULTILINE = M = 0x400
++DOTALL = S = 0x20
++VERBOSE = X = 0x80
++PCRE2_ANCHORED = 0x80000000
+ # TODO. Note that Python3 has unicode by default
+ ASCII = A = 0
+ UNICODE = U = 0
+-PCRE_INFO_CAPTURECOUNT = 2
++PCRE2_INFO_CAPTURECOUNT = 0x4
+ class PCREMatch:
+@@ -67,19 +86,23 @@ class PCREPattern:
+     def search(self, s, pos=0, endpos=-1, _flags=0):
+         assert endpos == -1, "pos: %d, endpos: %d" % (pos, endpos)
+         buf = array.array("i", [0])
+-        pcre_fullinfo(self.obj, None, PCRE_INFO_CAPTURECOUNT, buf)
++        pcre2_pattern_info(self.obj, PCRE2_INFO_CAPTURECOUNT, buf)
+         cap_count = buf[0]
+-        ov = array.array("i", [0, 0, 0] * (cap_count + 1))
+-        num = pcre_exec(self.obj, None, s, len(s), pos, _flags, ov, len(ov))
++        match_data = pcre2_match_data_create_from_pattern(self.obj, None)
++        num = pcre2_match(self.obj, s, len(s), pos, _flags, match_data, None)
+         if num == -1:
+             # No match
+             return None
++        ov_ptr = pcre2_get_ovector_pointer(match_data)
++        # pcre2_get_ovector_pointer return PCRE2_SIZE
++        ov_buf = uctypes.bytearray_at(ov_ptr, PCRE2_SIZE_SIZE * (cap_count + 1) * 2)
++        ov = array.array(PCRE2_SIZE_TYPE, ov_buf)
+         # We don't care how many matching subexpressions we got, we
+         # care only about total # of capturing ones (including empty)
+         return PCREMatch(s, cap_count + 1, ov)
+     def match(self, s, pos=0, endpos=-1):
+-        return self.search(s, pos, endpos, PCRE_ANCHORED)
++        return self.search(s, pos, endpos, PCRE2_ANCHORED)
+     def sub(self, repl, s, count=0):
+         if not callable(repl):
+@@ -141,9 +164,9 @@ class PCREPattern:
+ def compile(pattern, flags=0):
+-    errptr = bytes(4)
++    errcode = bytes(4)
+     erroffset = bytes(4)
+-    regex = pcre_compile(pattern, flags, errptr, erroffset, None)
++    regex = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, flags, errcode, erroffset, None)
+     assert regex
+     return PCREPattern(regex)
+@@ -154,7 +177,7 @@ def search(pattern, string, flags=0):
+ def match(pattern, string, flags=0):
+-    r = compile(pattern, flags | PCRE_ANCHORED)
++    r = compile(pattern, flags | PCRE2_ANCHORED)
+     return r.search(string)
git clone https://git.99rst.org/PROJECT