Git commit 47f6e57b2fa3768feb4f1f4a2cd3ce46660d90f2 by Christoph Cullmann. Committed on 11/09/2016 at 17:14. Pushed by cullmann into branch 'master'. Improve epub extractor, less segfaults Improve epub extractor: 1) check for more nullpointers (e.g. data can be null for some fields, iter= ators, ...) 2) actually close the epub file again at all 3) iterator seems to handle clink as stated in docs, fix double free e.g. see bug 361727 could be the double freed clink in the last iterator BUG: 361727 REVIEW: 128888 M +47 -42 src/extractors/epubextractor.cpp http://commits.kde.org/kfilemetadata/47f6e57b2fa3768feb4f1f4a2cd3ce46660d90= f2 diff --git a/src/extractors/epubextractor.cpp b/src/extractors/epubextracto= r.cpp index 88a4caa..bca24a6 100644 --- a/src/extractors/epubextractor.cpp +++ b/src/extractors/epubextractor.cpp @@ -1,5 +1,6 @@ /* Copyright (C) 2013 Vishesh Handa + Copyright (C) 2016 Christoph Cullmann = This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -46,11 +47,14 @@ namespace QString fetchMetadata(struct epub* e, const epub_metadata& type) { int size =3D 0; - unsigned char** data =3D epub_get_metadata(e, type, &size); if (data) { QStringList strList; for (int i =3D 0; i < size; i++) { + // skip nullptr entries, can happen for broken xml files + if (!data[i]) + continue; + strList << QString::fromUtf8((char*)data[i]); free(data[i]); } @@ -65,7 +69,8 @@ QString fetchMetadata(struct epub* e, const epub_metadata= & type) = void EPubExtractor::extract(ExtractionResult* result) { - struct epub* ePubDoc =3D epub_open(result->inputUrl().toUtf8().constDa= ta(), 1); + // open epub, return on exit, file will be closed again at end of func= tion + auto ePubDoc =3D epub_open(result->inputUrl().toUtf8().constData(), 1); if (!ePubDoc) { qWarning() << "Invalid document"; return; @@ -138,48 +143,48 @@ void EPubExtractor::extract(ExtractionResult* result) // // Plain Text // - if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { - return; - } - - struct eiterator* iter =3D epub_get_iterator(ePubDoc, EITERATOR_SPINE,= 0); - do { - char* curr =3D epub_it_get_curr(iter); - if (!curr) - continue; - QString html =3D QString::fromUtf8(curr); - html.remove(QRegularExpression(QStringLiteral("<[^>]*>"))); - - result->append(html); - } while (epub_it_get_next(iter)); - - epub_free_iterator(iter); - - struct titerator* tit; - - tit =3D epub_get_titerator(ePubDoc, TITERATOR_NAVMAP, 0); - if (!tit) { - tit =3D epub_get_titerator(ePubDoc, TITERATOR_GUIDE, 0); - } - - if (epub_tit_curr_valid(tit)) { - do { - char* clink =3D epub_tit_get_curr_link(tit); - - char* data; - int size =3D epub_get_data(ePubDoc, clink, &data); - free(clink); - - // epub_get_data returns -1 on failure - if (size > 0 && data) { - QString html =3D QString::fromUtf8(data, size); - // strip html tags + if (result->inputFlags() & ExtractionResult::ExtractPlainText) { + if (auto iter =3D epub_get_iterator(ePubDoc, EITERATOR_SPINE, 0)) { + do { + char* curr =3D epub_it_get_curr(iter); + if (!curr) + continue; + + QString html =3D QString::fromUtf8(curr); html.remove(QRegularExpression(QStringLiteral("<[^>]*>"))); - result->append(html); - free(data); + } while (epub_it_get_next(iter)); + + epub_free_iterator(iter); + } + + auto tit =3D epub_get_titerator(ePubDoc, TITERATOR_NAVMAP, 0); + if (!tit) { + tit =3D epub_get_titerator(ePubDoc, TITERATOR_GUIDE, 0); + } + if (tit) { + if (epub_tit_curr_valid(tit)) { + do { + // get link, iterator handles freeing of it + char* clink =3D epub_tit_get_curr_link(tit); + + // epub_get_data returns -1 on failure + char* data =3D nullptr; + const int size =3D epub_get_data(ePubDoc, clink, &data= ); + if (size >=3D 0 && data) { + QString html =3D QString::fromUtf8(data, size); + // strip html tags + html.remove(QRegularExpression(QStringLiteral("<[^= >]*>"))); + + result->append(html); + free(data); + } + } while (epub_tit_next(tit)); } - } while (epub_tit_next(tit)); + epub_free_titerator(tit); + } } - epub_free_titerator(tit); + + // close epub file again + epub_close(ePubDoc); }