Merge branch 'staged' into dev

This commit is contained in:
Xiao YiFang 2023-01-25 21:30:51 +08:00
commit 0e1ce78216
7 changed files with 111 additions and 23 deletions

View file

@ -199,7 +199,7 @@ InputPhrase Preferences::sanitizeInputPhrase( QString const & inputPhrase ) cons
return result;
}
const QString withPunct = _phase.simplified();
const QString withPunct = _phase.simplified().remove( QChar( 0xAD ) ); // Simplify whitespaces and remove soft hyphens;
result.phrase = gd::toQString( Folding::trimWhitespaceOrPunct( gd::toWString( withPunct ) ) );
if ( !result.isValid() )
return result; // The suffix of an invalid input phrase must be empty.

View file

@ -62,6 +62,11 @@ DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x050F00
DEFINES += MAKE_FFMPEG_PLAYER
}
contains(DEFINES, MAKE_QTMULTIMEDIA_PLAYER|MAKE_FFMPEG_PLAYER) {
HEADERS += audiooutput.h
SOURCES += audiooutput.cpp
}
# on windows platform ,only works in release build
CONFIG( use_xapian ) {
DEFINES += USE_XAPIAN
@ -267,7 +272,6 @@ HEADERS += folding.hh \
ankiconnector.h \
article_inspect.h \
articlewebpage.h \
audiooutput.h \
base/globalregex.hh \
base_type.h \
globalbroadcaster.h \
@ -413,7 +417,6 @@ SOURCES += folding.cc \
ankiconnector.cpp \
article_inspect.cpp \
articlewebpage.cpp \
audiooutput.cpp \
base/globalregex.cc \
globalbroadcaster.cpp \
headwordsmodel.cpp \
@ -673,8 +676,7 @@ TS_OUT = $$TRANSLATIONS
TS_OUT ~= s/.ts/.qm/g
PRE_TARGETDEPS += $$TS_OUT
equals(QT_VERSION,6.4.0)
{
equals(QT_VERSION,6.4.0) {
#QTBUG-105984
multimedia.files = $$[QT_PLUGIN_PATH]/multimedia/*
multimedia.path = plugins/multimedia

View file

@ -26,8 +26,10 @@ Iconv::~Iconv()
QString Iconv::convert(void const* & inBuf, size_t& inBytesLeft)
{
if( codec )
return codec->toUnicode( static_cast< const char * >( inBuf ), inBytesLeft );
QByteArray ba( static_cast< const char * >( inBuf ), inBytesLeft );
return QString( ba );
}
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData,

73
xdxf.cc
View file

@ -63,6 +63,22 @@ using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
quint32 getLanguageId( const QString & lang )
{
QString lstr = lang.left( 3 );
if( lstr.endsWith( QChar( '-' ) ) )
lstr.chop( 1 );
switch( lstr.size() )
{
case 2: return LangCoder::code2toInt( lstr.toLatin1().data() );
case 3: return LangCoder::findIdForLanguageCode3( lstr.toLatin1().data() );
}
return 0;
}
namespace {
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
@ -73,7 +89,7 @@ DEF_EX_STR( exDictzipError, "DICTZIP error", Dictionary::Ex )
enum
{
Signature = 0x46584458, // XDXF on little-endian, FXDX on big-endian
CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version
CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version
};
enum ArticleFormat
@ -1208,25 +1224,19 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
// Read the xdxf
string str = stream.attributes().value( "lang_from" ).toString().toLatin1().data();
if ( str.size() > 3 )
str.resize( 3 );
idxHeader.langFrom = LangCoder::findIdForLanguageCode3( str.c_str() );
if( !str.empty() )
idxHeader.langFrom = getLanguageId( str.c_str() );
str = stream.attributes().value( "lang_to" ).toString().toLatin1().data();
if ( str.size() > 3 )
str.resize( 3 );
idxHeader.langTo = LangCoder::findIdForLanguageCode3( str.c_str() );
bool isLogical = ( stream.attributes().value( "format" ) == u"logical" );
if( !str.empty() )
idxHeader.langTo = getLanguageId( str.c_str() );
QRegExp regNum( "\\d+" );
regNum.indexIn( stream.attributes().value( "revision" ).toString() );
idxHeader.revisionNumber = regNum.cap().toUInt();
bool isLogical = ( stream.attributes().value( "format" ) == u"logical" || idxHeader.revisionNumber >= 34 );
idxHeader.articleFormat = isLogical ? Logical : Visual;
unsigned articleCount = 0, wordCount = 0;
@ -1269,6 +1279,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
// todo implement adding other information to the description like <publisher>, <authors>, <file_ver>, <creation_date>, <last_edited_date>, <dict_edition>, <publishing_date>, <dict_src_url>
QString desc = readXhtmlData( stream );
if( isLogical )
{
desc = desc.simplified();
QRegularExpression br( "<br\\s*>\\s*</br>" );
desc.replace( br, QString("\n") );
}
if ( dictionaryDescription.isEmpty() )
{
dictionaryDescription = desc;
@ -1286,6 +1303,36 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
}
}
else
if( stream.name() == u"languages" )
{
while( !( stream.isEndElement() && stream.name() == u"languages" ) && !stream.atEnd() )
{
if( !stream.readNext() )
break;
if ( stream.isStartElement() )
{
if( stream.name() == u"from" )
{
if( idxHeader.langFrom == 0 )
{
QString lang = stream.attributes().value( "xml:lang" ).toString();
idxHeader.langFrom = getLanguageId( lang );
}
}
else if( stream.name() == u"to" )
{
if( idxHeader.langTo == 0 )
{
QString lang = stream.attributes().value( "xml:lang" ).toString();
idxHeader.langTo = getLanguageId( lang );
}
}
}
else if ( stream.isEndElement() && stream.name() == u"languages" )
break;
}
}
else
if ( stream.name() == u"abbreviations" )
{
QString s;

View file

@ -12,6 +12,8 @@ namespace Xdxf {
using std::vector;
using std::string;
quint32 getLanguageId( const QString & lang );
vector< sptr< Dictionary::Class > > makeDictionaries(
vector< string > const & fileNames,
string const & indicesDir,

View file

@ -14,6 +14,7 @@
#include "htmlescape.hh"
#include "utils.hh"
#include <QDebug>
#include "xdxf.hh"
#include <QRegularExpression>
@ -231,8 +232,20 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const
el.setTagName( "div" );
el.setAttribute( "class", "xdxf_headwords" );
if( dictPtr->isFromLanguageRTL() != dictPtr->isToLanguageRTL() )
el.setAttribute( "dir", dictPtr->isFromLanguageRTL() ? "rtl" : "ltr" );
bool isLanguageRtl = dictPtr->isFromLanguageRTL();
if( el.hasAttribute( "xml:lang" ) )
{
// Change xml-attribute "xml:lang" to html-attribute "lang"
QString lang = el.attribute( "xml:lang" );
el.removeAttribute( "xml:lang" );
el.setAttribute( "lang", lang );
quint32 langID = Xdxf::getLanguageId( lang );
if( langID )
isLanguageRtl = LangCoder::isLanguageRTL( langID );
}
if( isLanguageRtl != dictPtr->isToLanguageRTL() )
el.setAttribute( "dir", isLanguageRtl ? "rtl" : "ltr" );
}
}
@ -327,6 +340,20 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const
QDomElement el = nodes.at( 0 ).toElement();
el.setTagName( "span" );
el.setAttribute( "class", "xdxf_def" );
bool isLanguageRtl = dictPtr->isToLanguageRTL();
if( el.hasAttribute( "xml:lang" ) )
{
// Change xml-attribute "xml:lang" to html-attribute "lang"
QString lang = el.attribute( "xml:lang" );
el.removeAttribute( "xml:lang" );
el.setAttribute( "lang", lang );
quint32 langID = Xdxf::getLanguageId( lang );
if( langID )
isLanguageRtl = LangCoder::isLanguageRTL( langID );
}
if( isLanguageRtl != dictPtr->isToLanguageRTL() )
el.setAttribute( "dir", isLanguageRtl ? "rtl" : "ltr" );
}
}

10
zim.cc
View file

@ -62,6 +62,7 @@ using BtreeIndexing::IndexInfo;
DEF_EX_STR( exNotZimFile, "Not an Zim file", Dictionary::Ex )
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
DEF_EX_STR( exInvalidZimHeader, "Invalid Zim header", Dictionary::Ex )
DEF_EX( exUserAbort, "User abort", Dictionary::Ex )
@ -287,6 +288,9 @@ bool ZimFile::open()
if( read( reinterpret_cast< char * >( &zimHeader ), sizeof( zimHeader ) ) != sizeof( zimHeader ) )
return false;
if( zimHeader.magicNumber != 0x44D495A || zimHeader.mimeListPos != sizeof( zimHeader ) )
return false;
// Clusters in zim file may be placed in random order.
// We create sorted offsets list to calculate clusters size.
@ -1567,11 +1571,15 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
df.open();
ZIM_header const & zh = df.header();
bool new_namespaces = ( zh.majorVersion >= 6 && zh.minorVersion >= 1 );
if( zh.magicNumber != 0x44D495A )
throw exNotZimFile( i->c_str() );
if( zh.mimeListPos != sizeof( ZIM_header ) )
throw exInvalidZimHeader( i->c_str() );
bool new_namespaces = ( zh.majorVersion >= 6 && zh.minorVersion >= 1 );
{
int n = firstName.lastIndexOf( '/' );
initializing.indexingDictionary( firstName.mid( n + 1 ).toUtf8().constData() );