fix issues about SEGV, memleak, overflow, etc.

This commit is contained in:
panxiaohe 2021-11-11 20:19:02 +08:00
parent cf302c5c47
commit a3d08d8657
8 changed files with 392 additions and 1 deletions

View File

@ -0,0 +1,26 @@
From 7929f05710134b9b243952019b6c14066cd3ac9e Mon Sep 17 00:00:00 2001
From: yanjinjq <jiqin.ji@huawei.com>
Date: Sun, 30 Aug 2020 10:34:01 +0000
Subject: [PATCH] Fix SEGV in xmlSAXParseFileWithData
Fixes #181.
---
parser.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/parser.c b/parser.c
index be14c32..f779eb6 100644
--- a/parser.c
+++ b/parser.c
@@ -14077,7 +14077,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
if ((ctxt->wellFormed) || recovery) {
ret = ctxt->myDoc;
- if (ret != NULL) {
+ if ((ret != NULL) && (ctxt->input->buf != NULL)) {
if (ctxt->input->buf->compressed > 0)
ret->compression = 9;
else
--
1.8.3.1

View File

@ -0,0 +1,42 @@
From 45da175c1431d69e74e05a115f0b14cc8c97d886 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Fri, 18 Dec 2020 12:14:52 +0100
Subject: [PATCH] Fix memory leak in xmlParseElementMixedContentDecl
Free parsed content if malloc fails to avoid a memory leak.
Found with libFuzzer.
---
parser.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/parser.c b/parser.c
index 85494df..43b8835 100644
--- a/parser.c
+++ b/parser.c
@@ -6082,14 +6082,20 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
NEXT;
if (elem == NULL) {
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
- if (ret == NULL) return(NULL);
+ if (ret == NULL) {
+ xmlFreeDocElementContent(ctxt->myDoc, cur);
+ return(NULL);
+ }
ret->c1 = cur;
if (cur != NULL)
cur->parent = ret;
cur = ret;
} else {
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
- if (n == NULL) return(NULL);
+ if (n == NULL) {
+ xmlFreeDocElementContent(ctxt->myDoc, ret);
+ return(NULL);
+ }
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
if (n->c1 != NULL)
n->c1->parent = n;
--
1.8.3.1

View File

@ -0,0 +1,30 @@
From 1d73f07d67e32d8eaccd85bc46c5d277a1dc00c9 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Fri, 18 Dec 2020 00:55:00 +0100
Subject: [PATCH] Fix null deref in xmlStringGetNodeList
Check for malloc failure to avoid null deref.
Found with libFuzzer.
---
tree.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tree.c b/tree.c
index 64572d9..2130d55 100644
--- a/tree.c
+++ b/tree.c
@@ -1649,6 +1649,10 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) {
if (!xmlBufIsEmpty(buf)) {
node = xmlNewDocText(doc, NULL);
+ if (node == NULL) {
+ xmlBufFree(buf);
+ return(NULL);
+ }
node->content = xmlBufDetach(buf);
if (last == NULL) {
--
1.8.3.1

View File

@ -0,0 +1,100 @@
From dcb80b92da0417bc5b3d97ab8a61381973f1711b Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Sat, 20 Feb 2021 20:30:43 +0100
Subject: [PATCH] Fix slow parsing of HTML with encoding errors
Under certain circumstances, the HTML parser would try to guess and
switch input encodings multiple times, leading to slow processing of
documents with encoding errors. The repeated scanning of the input
buffer when guessing encodings could even lead to quadratic behavior.
The code htmlCurrentChar probably assumed that if there's an encoding
handler, it is guaranteed to produce valid UTF-8. This holds true in
general, but if the detected encoding was "UTF-8", the UTF8ToUTF8
encoding handler simply invoked memcpy without checking for invalid
UTF-8. This still must be fixed, preferably by not using this handler
at all.
Also leave a note that switching encodings twice seems impossible to
implement correctly. Add a check when handling UTF-8 encoding errors
in htmlCurrentChar to avoid this situation, even if encoders produce
invalid UTF-8.
Found by OSS-Fuzz.
---
HTMLparser.c | 18 ++++++++++++++++--
encoding.c | 5 +++++
parserInternals.c | 5 +++++
3 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/HTMLparser.c b/HTMLparser.c
index 14cc56f..c9a64c7 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -457,7 +457,12 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
ctxt->input->encoding = guess;
handler = xmlFindCharEncodingHandler((const char *) guess);
if (handler != NULL) {
- xmlSwitchToEncoding(ctxt, handler);
+ /*
+ * Don't use UTF-8 encoder which isn't required and
+ * can produce invalid UTF-8.
+ */
+ if (!xmlStrEqual(BAD_CAST handler->name, BAD_CAST "UTF-8"))
+ xmlSwitchToEncoding(ctxt, handler);
} else {
htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
"Unsupported encoding %s", guess, NULL);
@@ -570,7 +575,16 @@ encoding_error:
BAD_CAST buffer, NULL);
}
- ctxt->charset = XML_CHAR_ENCODING_8859_1;
+ /*
+ * Don't switch encodings twice. Note that if there's an encoder, we
+ * shouldn't receive invalid UTF-8 anyway.
+ *
+ * Note that if ctxt->input->buf == NULL, switching encodings is
+ * impossible, see Gitlab issue #34.
+ */
+ if ((ctxt->input->buf != NULL) &&
+ (ctxt->input->buf->encoder == NULL))
+ xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
*len = 1;
return((int) *ctxt->input->cur);
}
diff --git a/encoding.c b/encoding.c
index d67c16d..cdff6ae 100644
--- a/encoding.c
+++ b/encoding.c
@@ -373,6 +373,11 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
if (len < 0)
return(-1);
+ /*
+ * FIXME: Conversion functions must assure valid UTF-8, so we have
+ * to check for UTF-8 validity. Preferably, this converter shouldn't
+ * be used at all.
+ */
memcpy(out, inb, len);
*outlen = len;
diff --git a/parserInternals.c b/parserInternals.c
index b0629ef..cbcfde0 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1153,6 +1153,11 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
* Note: this is a bit dangerous, but that's what it
* takes to use nearly compatible signature for different
* encodings.
+ *
+ * FIXME: Encoders might buffer partial byte sequences, so
+ * this probably can't work. We should return an error and
+ * make sure that callers never try to switch the encoding
+ * twice.
*/
xmlCharEncCloseFunc(input->buf->encoder);
input->buf->encoder = handler;
--
1.8.3.1

View File

@ -0,0 +1,39 @@
From 2f9382033e4c398dd1c9aae4d24fa9f649fbf23d Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Mon, 15 Jun 2020 15:45:47 +0200
Subject: [PATCH] Fix undefined behavior in UTF16LEToUTF8
Don't perform arithmetic on null pointer.
Found with libFuzzer and UBSan.
---
encoding.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/encoding.c b/encoding.c
index 8b6f349..1a6386a 100644
--- a/encoding.c
+++ b/encoding.c
@@ -496,13 +496,18 @@ UTF16LEToUTF8(unsigned char* out, int *outlen,
{
unsigned char* outstart = out;
const unsigned char* processed = inb;
- unsigned char* outend = out + *outlen;
+ unsigned char* outend;
unsigned short* in = (unsigned short*) inb;
unsigned short* inend;
unsigned int c, d, inlen;
unsigned char *tmp;
int bits;
+ if (*outlen == 0) {
+ *inlenb = 0;
+ return(0);
+ }
+ outend = out + *outlen;
if ((*inlenb % 2) == 1)
(*inlenb)--;
inlen = *inlenb / 2;
--
1.8.3.1

View File

@ -0,0 +1,79 @@
From 681f094e5bd1d0f6b38b27701d0d1bf1ca7a9a26 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Mon, 15 Jun 2020 15:23:05 +0200
Subject: [PATCH] Fix unsigned integer overflow in htmlParseTryOrFinish
Cast to signed type before subtraction to avoid unsigned integer
overflow. Also use ptrdiff_t to avoid potential integer truncation.
Found with libFuzzer and UBSan.
---
HTMLparser.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/HTMLparser.c b/HTMLparser.c
index be7e14f..9ade663 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -5339,7 +5339,7 @@ static int
htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
int ret = 0;
htmlParserInputPtr in;
- int avail = 0;
+ ptrdiff_t avail = 0;
xmlChar cur, next;
htmlParserNodeInfo node_info;
@@ -5404,7 +5404,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
if ((avail == 0) && (terminate)) {
htmlAutoCloseOnEnd(ctxt);
if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
@@ -5440,7 +5441,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
}
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
ctxt->sax->setDocumentLocator(ctxt->userData,
@@ -5482,7 +5484,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
/*
* no chars in buffer
*/
@@ -5555,7 +5558,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
if (avail < 2)
goto done;
cur = in->cur[0];
@@ -5596,7 +5600,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
if (in->buf == NULL)
avail = in->length - (in->cur - in->base);
else
- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
+ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
+ (in->cur - in->base);
if (avail < 1)
goto done;
cur = in->cur[0];
--
1.8.3.1

View File

@ -0,0 +1,55 @@
From 649d02eaa419fa72ae6b131718a4ac77063d7a5a Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Mon, 7 Dec 2020 20:19:53 +0800
Subject: [PATCH] encoding: fix memleak in xmlRegisterCharEncodingHandler()
The return type of xmlRegisterCharEncodingHandler() is void. The invoker
cannot determine whether xmlRegisterCharEncodingHandler() is executed
successfully. when nbCharEncodingHandler >= MAX_ENCODING_HANDLERS, the
"handler" is not added to the array "handlers". As a result, the memory
of "handler" cannot be managed and released: memory leakage.
so add "xmlfree(handler)" to fix memory leakage on the failure branch of
xmlRegisterCharEncodingHandler().
Reported-by: wuqing <wuqing30@huawei.com>
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
---
encoding.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/encoding.c b/encoding.c
index 264f60b..d67c16d 100644
--- a/encoding.c
+++ b/encoding.c
@@ -1488,16 +1488,25 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
if ((handler == NULL) || (handlers == NULL)) {
xmlEncodingErr(XML_I18N_NO_HANDLER,
"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
- return;
+ goto free_handler;
}
if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
"MAX_ENCODING_HANDLERS");
- return;
+ goto free_handler;
}
handlers[nbCharEncodingHandler++] = handler;
+ return;
+
+free_handler:
+ if (handler != NULL) {
+ if (handler->name != NULL) {
+ xmlFree(handler->name);
+ }
+ xmlFree(handler);
+ }
}
/**
--
1.8.3.1

View File

@ -1,7 +1,7 @@
Summary: Library providing XML and HTML support
Name: libxml2
Version: 2.9.10
Release: 21
Release: 22
License: MIT
Group: Development/Libraries
Source: ftp://xmlsoft.org/libxml2/libxml2-%{version}.tar.gz
@ -103,6 +103,13 @@ Patch90: Fix-XPath-recursion-limit.patch
Patch91: Fix-Null-deref-in-xmlSchemaGetComponentTargetNs.patch
Patch92: Fix-memleaks-in-xmlXIncludeProcessFlags.patch
Patch93: Fix-heap-use-after-free-in-xmlAddNextSibling-and-xmlAddChild.patch
Patch94: Fix-unsigned-integer-overflow-in-htmlParseTryOrFinis.patch
Patch95: Fix-undefined-behavior-in-UTF16LEToUTF8.patch
Patch96: Fix-SEGV-in-xmlSAXParseFileWithData.patch
Patch97: encoding-fix-memleak-in-xmlRegisterCharEncodingHandl.patch
Patch98: Fix-null-deref-in-xmlStringGetNodeList.patch
Patch99: Fix-memory-leak-in-xmlParseElementMixedContentDecl.patch
Patch100:Fix-slow-parsing-of-HTML-with-encoding-errors.patch
BuildRoot: %{_tmppath}/%{name}-%{version}-root
BuildRequires: python2-devel
@ -295,6 +302,19 @@ rm -fr %{buildroot}
%changelog
* Thu Nov 11 2021 panxiaohe <panxiaohe@huawei.com> - 2.9.10-22
- Type:bugfix
- ID:NA
- SUG:NA
- DESC:fix following issues:
fix unsigned integer overflow in htmlParseTryOrFinish
fix undefined behavior in UTF16LEToUTF8
fix SEGV in xmlSAXParseFileWithData
encoding: fix memleak in xmlRegisterCharEncodingHandler()
fix null deref in xmlStringGetNodeList
fix memory leak in xmlParseElementMixedContentDecl
fix slow parsing of HTML with encoding errors
* Thu Nov 11 2021 panxiaohe <panxiaohe@huawei.com> - 2.9.10-21
- Type:bugfix
- ID:NA