From 5988fa86523ba3356f9da4cb4af757e2d27c8fa8 Mon Sep 17 00:00:00 2001 From: zarelaky Date: Wed, 23 Aug 2017 21:21:50 +0800 Subject: [PATCH] format output of Powerword dicts --- stardict.cc | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/stardict.cc b/stardict.cc index 8848ca4a..b5abb56a 100644 --- a/stardict.cc +++ b/stardict.cc @@ -40,7 +40,8 @@ #include #include #include - +#include +#include #include "ufile.hh" #include "qt4x5.hh" @@ -329,6 +330,98 @@ void StardictDictionary::getArticleProps( uint32_t articleAddress, headword = articleData; } +class PowerWordDataProcessor{ + class PWSyntaxTranslate{ + public: + PWSyntaxTranslate(const char* re, const char* replacement) + : _re(re) + , _replacement(replacement) + { + } + const QRegExp& re() const { + return _re; + } + const QString & replacement() const { + return _replacement; + } + private: + QRegExp _re; + QString _replacement; + }; +public: + PowerWordDataProcessor(const char* resource, size_t size) + : _data(QString::fromUtf8(resource, size)) + { + } + + string process() { + QDomDocument doc; + QString ss; + ss = "
"; + if (!doc.setContent(_data)) { + ss += _data ; + } else { + QStringList sl; + walkNode(doc.firstChild(), sl); + + QStringListIterator itr(sl); + while (itr.hasNext()) { + QString s = itr.next(); + translatePW(s); + ss += s; + ss += "
"; + } + } + ss += "
"; + QByteArray ba = ss.toUtf8(); + return string(ba.data(), ba.size()); + } +private: + void walkNode(const QDomNode& e, QStringList& sl) { + if (e.isNull()) { + return; + } + if (e.isText()) { + sl.append(e.toText().data()); + } else { + QDomNodeList l = e.childNodes(); + for (int i = 0; i < l.size(); ++i) { + QDomNode n = l.at(i); + if (n.isText()) { + sl.append(n.toText().data()); + } else { + walkNode(n, sl); + } + } + } + } + + void translatePW(QString& s){ + const int TRANSLATE_TBL_SIZE=5; + static PWSyntaxTranslate t[TRANSLATE_TBL_SIZE]={ + PWSyntaxTranslate("&[bB]\\s*\\{([^\\{}&]+)\\}", "\\1"), + PWSyntaxTranslate("&[iI]\\s*\\{([^\\{}&]+)\\}", "\\1"), + PWSyntaxTranslate("&[uU]\\s*\\{([^\\{}&]+)\\}", "\\1"), + PWSyntaxTranslate("&[lL]\\s*\\{([^\\{}&]+)\\}", "\\1"), + PWSyntaxTranslate("&[2]\\s*\\{([^\\{}&]+)\\}", "\\1") + }; + + QString old; + while (s.compare(old) != 0) { + for (int i = 0; i < TRANSLATE_TBL_SIZE; ++i) { + PWSyntaxTranslate& a = t[i]; + s.replace(a.re(), a.replacement()); + } + old = s; + } + s.replace(QRegExp("&.\\s*\\{"), ""); + s.replace("}", ""); + } +private: + QString _data; +}; + + /// This function tries to make an html of the Stardict's resource typed /// 'type', contained in a block pointed to by 'resource', 'size' bytes long. string StardictDictionary::handleResource( char type, char const * resource, size_t size ) @@ -410,7 +503,10 @@ string StardictDictionary::handleResource( char type, char const * resource, siz // just output as pure escaped utf8. return "
" + Html::escape( string( resource, size ) ) + "
"; case 'k': // KingSoft PowerWord data. We don't know how to handle that. - return "
" + Html::escape( string( resource, size ) ) + "
"; + { + PowerWordDataProcessor pwdp(resource, size); + return pwdp.process(); + } case 'w': // MediaWiki markup. We don't handle this right now. return "
" + Html::escape( string( resource, size ) ) + "
"; case 'n': // WordNet data. We don't know anything about it.