mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 04:24:09 +00:00
format output of Powerword dicts
This commit is contained in:
parent
4c1dd16ce2
commit
5988fa8652
100
stardict.cc
100
stardict.cc
|
@ -40,7 +40,8 @@
|
||||||
#include <QDebug>
|
#include <QDebug>
|
||||||
#include <QRegExp>
|
#include <QRegExp>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
|
#include <QDomDocument>
|
||||||
|
#include <QDomNode>
|
||||||
#include "ufile.hh"
|
#include "ufile.hh"
|
||||||
#include "qt4x5.hh"
|
#include "qt4x5.hh"
|
||||||
|
|
||||||
|
@ -329,6 +330,98 @@ void StardictDictionary::getArticleProps( uint32_t articleAddress,
|
||||||
headword = articleData;
|
headword = articleData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class PowerWordDataProcessor{
|
||||||
|
class PWSyntaxTranslate{
|
||||||
|
public:
|
||||||
|
PWSyntaxTranslate(const char* re, const char* replacement)
|
||||||
|
: _re(re)
|
||||||
|
, _replacement(replacement)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
const QRegExp& re() const {
|
||||||
|
return _re;
|
||||||
|
}
|
||||||
|
const QString & replacement() const {
|
||||||
|
return _replacement;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
QRegExp _re;
|
||||||
|
QString _replacement;
|
||||||
|
};
|
||||||
|
public:
|
||||||
|
PowerWordDataProcessor(const char* resource, size_t size)
|
||||||
|
: _data(QString::fromUtf8(resource, size))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
string process() {
|
||||||
|
QDomDocument doc;
|
||||||
|
QString ss;
|
||||||
|
ss = "<div class=\"sdct_k\">";
|
||||||
|
if (!doc.setContent(_data)) {
|
||||||
|
ss += _data ;
|
||||||
|
} else {
|
||||||
|
QStringList sl;
|
||||||
|
walkNode(doc.firstChild(), sl);
|
||||||
|
|
||||||
|
QStringListIterator itr(sl);
|
||||||
|
while (itr.hasNext()) {
|
||||||
|
QString s = itr.next();
|
||||||
|
translatePW(s);
|
||||||
|
ss += s;
|
||||||
|
ss += "<br>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ss += "</div>";
|
||||||
|
QByteArray ba = ss.toUtf8();
|
||||||
|
return string(ba.data(), ba.size());
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
void walkNode(const QDomNode& e, QStringList& sl) {
|
||||||
|
if (e.isNull()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (e.isText()) {
|
||||||
|
sl.append(e.toText().data());
|
||||||
|
} else {
|
||||||
|
QDomNodeList l = e.childNodes();
|
||||||
|
for (int i = 0; i < l.size(); ++i) {
|
||||||
|
QDomNode n = l.at(i);
|
||||||
|
if (n.isText()) {
|
||||||
|
sl.append(n.toText().data());
|
||||||
|
} else {
|
||||||
|
walkNode(n, sl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void translatePW(QString& s){
|
||||||
|
const int TRANSLATE_TBL_SIZE=5;
|
||||||
|
static PWSyntaxTranslate t[TRANSLATE_TBL_SIZE]={
|
||||||
|
PWSyntaxTranslate("&[bB]\\s*\\{([^\\{}&]+)\\}", "<B>\\1</B>"),
|
||||||
|
PWSyntaxTranslate("&[iI]\\s*\\{([^\\{}&]+)\\}", "<I>\\1</I>"),
|
||||||
|
PWSyntaxTranslate("&[uU]\\s*\\{([^\\{}&]+)\\}", "<U>\\1</U>"),
|
||||||
|
PWSyntaxTranslate("&[lL]\\s*\\{([^\\{}&]+)\\}", "<SPAN style=\"color:#0000ff\">\\1</SPAN>"),
|
||||||
|
PWSyntaxTranslate("&[2]\\s*\\{([^\\{}&]+)\\}", "<SPAN style=\"color:#0000ff\">\\1</SPAN>")
|
||||||
|
};
|
||||||
|
|
||||||
|
QString old;
|
||||||
|
while (s.compare(old) != 0) {
|
||||||
|
for (int i = 0; i < TRANSLATE_TBL_SIZE; ++i) {
|
||||||
|
PWSyntaxTranslate& a = t[i];
|
||||||
|
s.replace(a.re(), a.replacement());
|
||||||
|
}
|
||||||
|
old = s;
|
||||||
|
}
|
||||||
|
s.replace(QRegExp("&.\\s*\\{"), "");
|
||||||
|
s.replace("}", "");
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
QString _data;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/// This function tries to make an html of the Stardict's resource typed
|
/// This function tries to make an html of the Stardict's resource typed
|
||||||
/// 'type', contained in a block pointed to by 'resource', 'size' bytes long.
|
/// 'type', contained in a block pointed to by 'resource', 'size' bytes long.
|
||||||
string StardictDictionary::handleResource( char type, char const * resource, size_t size )
|
string StardictDictionary::handleResource( char type, char const * resource, size_t size )
|
||||||
|
@ -410,7 +503,10 @@ string StardictDictionary::handleResource( char type, char const * resource, siz
|
||||||
// just output as pure escaped utf8.
|
// just output as pure escaped utf8.
|
||||||
return "<div class=\"sdct_y\">" + Html::escape( string( resource, size ) ) + "</div>";
|
return "<div class=\"sdct_y\">" + Html::escape( string( resource, size ) ) + "</div>";
|
||||||
case 'k': // KingSoft PowerWord data. We don't know how to handle that.
|
case 'k': // KingSoft PowerWord data. We don't know how to handle that.
|
||||||
return "<div class=\"sdct_k\">" + Html::escape( string( resource, size ) ) + "</div>";
|
{
|
||||||
|
PowerWordDataProcessor pwdp(resource, size);
|
||||||
|
return pwdp.process();
|
||||||
|
}
|
||||||
case 'w': // MediaWiki markup. We don't handle this right now.
|
case 'w': // MediaWiki markup. We don't handle this right now.
|
||||||
return "<div class=\"sdct_w\">" + Html::escape( string( resource, size ) ) + "</div>";
|
return "<div class=\"sdct_w\">" + Html::escape( string( resource, size ) ) + "</div>";
|
||||||
case 'n': // WordNet data. We don't know anything about it.
|
case 'n': // WordNet data. We don't know anything about it.
|
||||||
|
|
Loading…
Reference in a new issue