mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 08:34:08 +00:00
Merge pull request #322 from xiaoyifang/feature/cherry-pick-upstream
cherry pick upstream
This commit is contained in:
commit
069714e3a2
|
@ -199,7 +199,7 @@ InputPhrase Preferences::sanitizeInputPhrase( QString const & inputPhrase ) cons
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
const QString withPunct = _phase.simplified();
|
const QString withPunct = _phase.simplified().remove( QChar( 0xAD ) ); // Simplify whitespaces and remove soft hyphens;
|
||||||
result.phrase = gd::toQString( Folding::trimWhitespaceOrPunct( gd::toWString( withPunct ) ) );
|
result.phrase = gd::toQString( Folding::trimWhitespaceOrPunct( gd::toWString( withPunct ) ) );
|
||||||
if ( !result.isValid() )
|
if ( !result.isValid() )
|
||||||
return result; // The suffix of an invalid input phrase must be empty.
|
return result; // The suffix of an invalid input phrase must be empty.
|
||||||
|
|
73
xdxf.cc
73
xdxf.cc
|
@ -63,6 +63,22 @@ using BtreeIndexing::WordArticleLink;
|
||||||
using BtreeIndexing::IndexedWords;
|
using BtreeIndexing::IndexedWords;
|
||||||
using BtreeIndexing::IndexInfo;
|
using BtreeIndexing::IndexInfo;
|
||||||
|
|
||||||
|
quint32 getLanguageId( const QString & lang )
|
||||||
|
{
|
||||||
|
QString lstr = lang.left( 3 );
|
||||||
|
|
||||||
|
if( lstr.endsWith( QChar( '-' ) ) )
|
||||||
|
lstr.chop( 1 );
|
||||||
|
|
||||||
|
switch( lstr.size() )
|
||||||
|
{
|
||||||
|
case 2: return LangCoder::code2toInt( lstr.toLatin1().data() );
|
||||||
|
case 3: return LangCoder::findIdForLanguageCode3( lstr.toLatin1().data() );
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
|
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
|
||||||
|
@ -73,7 +89,7 @@ DEF_EX_STR( exDictzipError, "DICTZIP error", Dictionary::Ex )
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
Signature = 0x46584458, // XDXF on little-endian, FXDX on big-endian
|
Signature = 0x46584458, // XDXF on little-endian, FXDX on big-endian
|
||||||
CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version
|
CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ArticleFormat
|
enum ArticleFormat
|
||||||
|
@ -1208,25 +1224,19 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
// Read the xdxf
|
// Read the xdxf
|
||||||
|
|
||||||
string str = stream.attributes().value( "lang_from" ).toString().toLatin1().data();
|
string str = stream.attributes().value( "lang_from" ).toString().toLatin1().data();
|
||||||
|
if( !str.empty() )
|
||||||
if ( str.size() > 3 )
|
idxHeader.langFrom = getLanguageId( str.c_str() );
|
||||||
str.resize( 3 );
|
|
||||||
|
|
||||||
idxHeader.langFrom = LangCoder::findIdForLanguageCode3( str.c_str() );
|
|
||||||
|
|
||||||
str = stream.attributes().value( "lang_to" ).toString().toLatin1().data();
|
str = stream.attributes().value( "lang_to" ).toString().toLatin1().data();
|
||||||
|
if( !str.empty() )
|
||||||
if ( str.size() > 3 )
|
idxHeader.langTo = getLanguageId( str.c_str() );
|
||||||
str.resize( 3 );
|
|
||||||
|
|
||||||
idxHeader.langTo = LangCoder::findIdForLanguageCode3( str.c_str() );
|
|
||||||
|
|
||||||
bool isLogical = ( stream.attributes().value( "format" ) == u"logical" );
|
|
||||||
|
|
||||||
QRegExp regNum( "\\d+" );
|
QRegExp regNum( "\\d+" );
|
||||||
regNum.indexIn( stream.attributes().value( "revision" ).toString() );
|
regNum.indexIn( stream.attributes().value( "revision" ).toString() );
|
||||||
idxHeader.revisionNumber = regNum.cap().toUInt();
|
idxHeader.revisionNumber = regNum.cap().toUInt();
|
||||||
|
|
||||||
|
bool isLogical = ( stream.attributes().value( "format" ) == u"logical" || idxHeader.revisionNumber >= 34 );
|
||||||
|
|
||||||
idxHeader.articleFormat = isLogical ? Logical : Visual;
|
idxHeader.articleFormat = isLogical ? Logical : Visual;
|
||||||
|
|
||||||
unsigned articleCount = 0, wordCount = 0;
|
unsigned articleCount = 0, wordCount = 0;
|
||||||
|
@ -1269,6 +1279,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
// todo implement adding other information to the description like <publisher>, <authors>, <file_ver>, <creation_date>, <last_edited_date>, <dict_edition>, <publishing_date>, <dict_src_url>
|
// todo implement adding other information to the description like <publisher>, <authors>, <file_ver>, <creation_date>, <last_edited_date>, <dict_edition>, <publishing_date>, <dict_src_url>
|
||||||
QString desc = readXhtmlData( stream );
|
QString desc = readXhtmlData( stream );
|
||||||
|
|
||||||
|
if( isLogical )
|
||||||
|
{
|
||||||
|
desc = desc.simplified();
|
||||||
|
QRegularExpression br( "<br\\s*>\\s*</br>" );
|
||||||
|
desc.replace( br, QString("\n") );
|
||||||
|
}
|
||||||
|
|
||||||
if ( dictionaryDescription.isEmpty() )
|
if ( dictionaryDescription.isEmpty() )
|
||||||
{
|
{
|
||||||
dictionaryDescription = desc;
|
dictionaryDescription = desc;
|
||||||
|
@ -1286,6 +1303,36 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
if( stream.name() == u"languages" )
|
||||||
|
{
|
||||||
|
while( !( stream.isEndElement() && stream.name() == u"languages" ) && !stream.atEnd() )
|
||||||
|
{
|
||||||
|
if( !stream.readNext() )
|
||||||
|
break;
|
||||||
|
if ( stream.isStartElement() )
|
||||||
|
{
|
||||||
|
if( stream.name() == u"from" )
|
||||||
|
{
|
||||||
|
if( idxHeader.langFrom == 0 )
|
||||||
|
{
|
||||||
|
QString lang = stream.attributes().value( "xml:lang" ).toString();
|
||||||
|
idxHeader.langFrom = getLanguageId( lang );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if( stream.name() == u"to" )
|
||||||
|
{
|
||||||
|
if( idxHeader.langTo == 0 )
|
||||||
|
{
|
||||||
|
QString lang = stream.attributes().value( "xml:lang" ).toString();
|
||||||
|
idxHeader.langTo = getLanguageId( lang );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( stream.isEndElement() && stream.name() == u"languages" )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
if ( stream.name() == u"abbreviations" )
|
if ( stream.name() == u"abbreviations" )
|
||||||
{
|
{
|
||||||
QString s;
|
QString s;
|
||||||
|
|
2
xdxf.hh
2
xdxf.hh
|
@ -12,6 +12,8 @@ namespace Xdxf {
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
|
quint32 getLanguageId( const QString & lang );
|
||||||
|
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
|
|
31
xdxf2html.cc
31
xdxf2html.cc
|
@ -14,6 +14,7 @@
|
||||||
#include "htmlescape.hh"
|
#include "htmlescape.hh"
|
||||||
#include "utils.hh"
|
#include "utils.hh"
|
||||||
#include <QDebug>
|
#include <QDebug>
|
||||||
|
#include "xdxf.hh"
|
||||||
|
|
||||||
#include <QRegularExpression>
|
#include <QRegularExpression>
|
||||||
|
|
||||||
|
@ -231,8 +232,20 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const
|
||||||
|
|
||||||
el.setTagName( "div" );
|
el.setTagName( "div" );
|
||||||
el.setAttribute( "class", "xdxf_headwords" );
|
el.setAttribute( "class", "xdxf_headwords" );
|
||||||
if( dictPtr->isFromLanguageRTL() != dictPtr->isToLanguageRTL() )
|
bool isLanguageRtl = dictPtr->isFromLanguageRTL();
|
||||||
el.setAttribute( "dir", dictPtr->isFromLanguageRTL() ? "rtl" : "ltr" );
|
if( el.hasAttribute( "xml:lang" ) )
|
||||||
|
{
|
||||||
|
// Change xml-attribute "xml:lang" to html-attribute "lang"
|
||||||
|
QString lang = el.attribute( "xml:lang" );
|
||||||
|
el.removeAttribute( "xml:lang" );
|
||||||
|
el.setAttribute( "lang", lang );
|
||||||
|
|
||||||
|
quint32 langID = Xdxf::getLanguageId( lang );
|
||||||
|
if( langID )
|
||||||
|
isLanguageRtl = LangCoder::isLanguageRTL( langID );
|
||||||
|
}
|
||||||
|
if( isLanguageRtl != dictPtr->isToLanguageRTL() )
|
||||||
|
el.setAttribute( "dir", isLanguageRtl ? "rtl" : "ltr" );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -327,6 +340,20 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const
|
||||||
QDomElement el = nodes.at( 0 ).toElement();
|
QDomElement el = nodes.at( 0 ).toElement();
|
||||||
el.setTagName( "span" );
|
el.setTagName( "span" );
|
||||||
el.setAttribute( "class", "xdxf_def" );
|
el.setAttribute( "class", "xdxf_def" );
|
||||||
|
bool isLanguageRtl = dictPtr->isToLanguageRTL();
|
||||||
|
if( el.hasAttribute( "xml:lang" ) )
|
||||||
|
{
|
||||||
|
// Change xml-attribute "xml:lang" to html-attribute "lang"
|
||||||
|
QString lang = el.attribute( "xml:lang" );
|
||||||
|
el.removeAttribute( "xml:lang" );
|
||||||
|
el.setAttribute( "lang", lang );
|
||||||
|
|
||||||
|
quint32 langID = Xdxf::getLanguageId( lang );
|
||||||
|
if( langID )
|
||||||
|
isLanguageRtl = LangCoder::isLanguageRTL( langID );
|
||||||
|
}
|
||||||
|
if( isLanguageRtl != dictPtr->isToLanguageRTL() )
|
||||||
|
el.setAttribute( "dir", isLanguageRtl ? "rtl" : "ltr" );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
10
zim.cc
10
zim.cc
|
@ -62,6 +62,7 @@ using BtreeIndexing::IndexInfo;
|
||||||
|
|
||||||
DEF_EX_STR( exNotZimFile, "Not an Zim file", Dictionary::Ex )
|
DEF_EX_STR( exNotZimFile, "Not an Zim file", Dictionary::Ex )
|
||||||
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
|
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
|
||||||
|
DEF_EX_STR( exInvalidZimHeader, "Invalid Zim header", Dictionary::Ex )
|
||||||
DEF_EX( exUserAbort, "User abort", Dictionary::Ex )
|
DEF_EX( exUserAbort, "User abort", Dictionary::Ex )
|
||||||
|
|
||||||
|
|
||||||
|
@ -287,6 +288,9 @@ bool ZimFile::open()
|
||||||
if( read( reinterpret_cast< char * >( &zimHeader ), sizeof( zimHeader ) ) != sizeof( zimHeader ) )
|
if( read( reinterpret_cast< char * >( &zimHeader ), sizeof( zimHeader ) ) != sizeof( zimHeader ) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if( zimHeader.magicNumber != 0x44D495A || zimHeader.mimeListPos != sizeof( zimHeader ) )
|
||||||
|
return false;
|
||||||
|
|
||||||
// Clusters in zim file may be placed in random order.
|
// Clusters in zim file may be placed in random order.
|
||||||
// We create sorted offsets list to calculate clusters size.
|
// We create sorted offsets list to calculate clusters size.
|
||||||
|
|
||||||
|
@ -1567,11 +1571,15 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
|
|
||||||
df.open();
|
df.open();
|
||||||
ZIM_header const & zh = df.header();
|
ZIM_header const & zh = df.header();
|
||||||
bool new_namespaces = ( zh.majorVersion >= 6 && zh.minorVersion >= 1 );
|
|
||||||
|
|
||||||
if( zh.magicNumber != 0x44D495A )
|
if( zh.magicNumber != 0x44D495A )
|
||||||
throw exNotZimFile( i->c_str() );
|
throw exNotZimFile( i->c_str() );
|
||||||
|
|
||||||
|
if( zh.mimeListPos != sizeof( ZIM_header ) )
|
||||||
|
throw exInvalidZimHeader( i->c_str() );
|
||||||
|
|
||||||
|
bool new_namespaces = ( zh.majorVersion >= 6 && zh.minorVersion >= 1 );
|
||||||
|
|
||||||
{
|
{
|
||||||
int n = firstName.lastIndexOf( '/' );
|
int n = firstName.lastIndexOf( '/' );
|
||||||
initializing.indexingDictionary( firstName.mid( n + 1 ).toUtf8().constData() );
|
initializing.indexingDictionary( firstName.mid( n + 1 ).toUtf8().constData() );
|
||||||
|
|
Loading…
Reference in a new issue