diff --git a/xdxf.cc b/xdxf.cc index bea69777..6ccb2e16 100644 --- a/xdxf.cc +++ b/xdxf.cc @@ -63,6 +63,22 @@ using BtreeIndexing::WordArticleLink; using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexInfo; +quint32 getLanguageId( const QString & lang ) +{ + QString lstr = lang.left( 3 ); + + if( lstr.endsWith( QChar( '-' ) ) ) + lstr.chop( 1 ); + + switch( lstr.size() ) + { + case 2: return LangCoder::code2toInt( lstr.toLatin1().data() ); + case 3: return LangCoder::findIdForLanguageCode3( lstr.toLatin1().data() ); + } + + return 0; +} + namespace { DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex ) @@ -73,7 +89,7 @@ DEF_EX_STR( exDictzipError, "DICTZIP error", Dictionary::Ex ) enum { Signature = 0x46584458, // XDXF on little-endian, FXDX on big-endian - CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version + CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version }; enum ArticleFormat @@ -1208,25 +1224,19 @@ vector< sptr< Dictionary::Class > > makeDictionaries( // Read the xdxf string str = stream.attributes().value( "lang_from" ).toString().toLatin1().data(); - - if ( str.size() > 3 ) - str.resize( 3 ); - - idxHeader.langFrom = LangCoder::findIdForLanguageCode3( str.c_str() ); + if( !str.empty() ) + idxHeader.langFrom = getLanguageId( str.c_str() ); str = stream.attributes().value( "lang_to" ).toString().toLatin1().data(); - - if ( str.size() > 3 ) - str.resize( 3 ); - - idxHeader.langTo = LangCoder::findIdForLanguageCode3( str.c_str() ); - - bool isLogical = ( stream.attributes().value( "format" ) == u"logical" ); + if( !str.empty() ) + idxHeader.langTo = getLanguageId( str.c_str() ); QRegExp regNum( "\\d+" ); regNum.indexIn( stream.attributes().value( "revision" ).toString() ); idxHeader.revisionNumber = regNum.cap().toUInt(); + bool isLogical = ( stream.attributes().value( "format" ) == "logical" || idxHeader.revisionNumber >= 34 ); + idxHeader.articleFormat = isLogical ? Logical : Visual; unsigned articleCount = 0, wordCount = 0; @@ -1269,6 +1279,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( // todo implement adding other information to the description like , , , , , , , QString desc = readXhtmlData( stream ); + if( isLogical ) + { + desc = desc.simplified(); + desc.replace( QRegExp( "\\s*
" ), QChar( '\n' ) ); + } + if ( dictionaryDescription.isEmpty() ) { dictionaryDescription = desc; @@ -1286,6 +1302,36 @@ vector< sptr< Dictionary::Class > > makeDictionaries( } } else + if( stream.name() == "languages" ) + { + while( !( stream.isEndElement() && stream.name() == "languages" ) && !stream.atEnd() ) + { + if( !stream.readNext() ) + break; + if ( stream.isStartElement() ) + { + if( stream.name() == "from" ) + { + if( idxHeader.langFrom == 0 ) + { + QString lang = stream.attributes().value( "xml:lang" ).toString(); + idxHeader.langFrom = getLanguageId( lang ); + } + } + else if( stream.name() == "to" ) + { + if( idxHeader.langTo == 0 ) + { + QString lang = stream.attributes().value( "xml:lang" ).toString(); + idxHeader.langTo = getLanguageId( lang ); + } + } + } + else if ( stream.isEndElement() && stream.name() == "languages" ) + break; + } + } + else if ( stream.name() == u"abbreviations" ) { QString s; diff --git a/xdxf.hh b/xdxf.hh index c562a56a..c6a9e502 100644 --- a/xdxf.hh +++ b/xdxf.hh @@ -12,6 +12,8 @@ namespace Xdxf { using std::vector; using std::string; +quint32 getLanguageId( const QString & lang ); + vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames, string const & indicesDir, diff --git a/xdxf2html.cc b/xdxf2html.cc index b5da8b66..c2570518 100644 --- a/xdxf2html.cc +++ b/xdxf2html.cc @@ -14,6 +14,7 @@ #include "htmlescape.hh" #include "utils.hh" #include +#include "xdxf.hh" #include @@ -231,8 +232,20 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const el.setTagName( "div" ); el.setAttribute( "class", "xdxf_headwords" ); - if( dictPtr->isFromLanguageRTL() != dictPtr->isToLanguageRTL() ) - el.setAttribute( "dir", dictPtr->isFromLanguageRTL() ? "rtl" : "ltr" ); + bool isLanguageRtl = dictPtr->isFromLanguageRTL(); + if( el.hasAttribute( "xml:lang" ) ) + { + // Change xml-attribute "xml:lang" to html-attribute "lang" + QString lang = el.attribute( "xml:lang" ); + el.removeAttribute( "xml:lang" ); + el.setAttribute( "lang", lang ); + + quint32 langID = Xdxf::getLanguageId( lang ); + if( langID ) + isLanguageRtl = LangCoder::isLanguageRTL( langID ); + } + if( isLanguageRtl != dictPtr->isToLanguageRTL() ) + el.setAttribute( "dir", isLanguageRtl ? "rtl" : "ltr" ); } } @@ -327,6 +340,20 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const QDomElement el = nodes.at( 0 ).toElement(); el.setTagName( "span" ); el.setAttribute( "class", "xdxf_def" ); + bool isLanguageRtl = dictPtr->isToLanguageRTL(); + if( el.hasAttribute( "xml:lang" ) ) + { + // Change xml-attribute "xml:lang" to html-attribute "lang" + QString lang = el.attribute( "xml:lang" ); + el.removeAttribute( "xml:lang" ); + el.setAttribute( "lang", lang ); + + quint32 langID = Xdxf::getLanguageId( lang ); + if( langID ) + isLanguageRtl = LangCoder::isLanguageRTL( langID ); + } + if( isLanguageRtl != dictPtr->isToLanguageRTL() ) + el.setAttribute( "dir", isLanguageRtl ? "rtl" : "ltr" ); } }