From 25b3fa9861b53746e27849591b71310e91dfc46f Mon Sep 17 00:00:00 2001 From: Andreas Heinisch Date: Mon, 23 Jan 2023 09:18:08 +0000 Subject: [PATCH 1/2] Changed skip_bom in order to address tdf#150023 - - Double BOM (invalid XML) prevents importing some Excel 2003 XML (Excel reads them)(https://bugs.documentfoundation.org/show_bug.cgi?id=150023) --- src/parser/sax_parser_base.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp index 95d8c6e77..6fbc647c0 100644 --- a/src/parser/sax_parser_base.cpp +++ b/src/parser/sax_parser_base.cpp @@ -168,20 +168,18 @@ void parser_base::skip_bom() // Stream too short to have a byte order mark. return; - if (is_blank(cur_char())) - // Allow leading whitespace in the XML stream. - // TODO : Make this configurable since strictly speaking such an XML - // sttream is invalid. - return; - // 0xef 0xbb 0 xbf is the UTF-8 byte order mark unsigned char c = static_cast(cur_char()); if (c != '<') { - if (c != 0xef || static_cast(next_and_char()) != 0xbb || - static_cast(next_and_char()) != 0xbf || next_and_char() != '<') - throw malformed_xml_error( - "unsupported encoding. only 8 bit encodings are supported", offset()); + do + { + if (c != 0xef || static_cast(next_and_char()) != 0xbb + || static_cast(next_and_char()) != 0xbf) + throw malformed_xml_error( + "unsupported encoding. only 8 bit encodings are supported", offset()); + c = static_cast(next_and_char()); + } while (c == 0xef); } } -- GitLab From 5dd43bb9221272e273e0cb3f27611226c9304fd0 Mon Sep 17 00:00:00 2001 From: Andreas Heinisch Date: Mon, 23 Jan 2023 09:18:43 +0000 Subject: [PATCH 2/2] Changed skip_bom in order to address tdf#150023 - - Double BOM (invalid XML) prevents importing some Excel 2003 XML (Excel reads them)(https://bugs.documentfoundation.org/show_bug.cgi?id=150023) -- GitLab