XDXF: Add support for draft revision 034 (issue #1600)

This commit is contained in:
Abs62 2023-01-19 17:57:47 +03:00 committed by Xiao YiFang
parent 620346bfaa
commit cbdfa586d1
3 changed files with 90 additions and 15 deletions

72
xdxf.cc
View file

@ -63,6 +63,22 @@ using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
quint32 getLanguageId( const QString & lang )
{
QString lstr = lang.left( 3 );
if( lstr.endsWith( QChar( '-' ) ) )
lstr.chop( 1 );
switch( lstr.size() )
{
case 2: return LangCoder::code2toInt( lstr.toLatin1().data() );
case 3: return LangCoder::findIdForLanguageCode3( lstr.toLatin1().data() );
}
return 0;
}
namespace {
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
@ -73,7 +89,7 @@ DEF_EX_STR( exDictzipError, "DICTZIP error", Dictionary::Ex )
enum
{
Signature = 0x46584458, // XDXF on little-endian, FXDX on big-endian
CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version
CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version
};
enum ArticleFormat
@ -1208,25 +1224,19 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
// Read the xdxf
string str = stream.attributes().value( "lang_from" ).toString().toLatin1().data();
if ( str.size() > 3 )
str.resize( 3 );
idxHeader.langFrom = LangCoder::findIdForLanguageCode3( str.c_str() );
if( !str.empty() )
idxHeader.langFrom = getLanguageId( str.c_str() );
str = stream.attributes().value( "lang_to" ).toString().toLatin1().data();
if ( str.size() > 3 )
str.resize( 3 );
idxHeader.langTo = LangCoder::findIdForLanguageCode3( str.c_str() );
bool isLogical = ( stream.attributes().value( "format" ) == u"logical" );
if( !str.empty() )
idxHeader.langTo = getLanguageId( str.c_str() );
QRegExp regNum( "\\d+" );
regNum.indexIn( stream.attributes().value( "revision" ).toString() );
idxHeader.revisionNumber = regNum.cap().toUInt();
bool isLogical = ( stream.attributes().value( "format" ) == "logical" || idxHeader.revisionNumber >= 34 );
idxHeader.articleFormat = isLogical ? Logical : Visual;
unsigned articleCount = 0, wordCount = 0;
@ -1269,6 +1279,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
// todo implement adding other information to the description like <publisher>, <authors>, <file_ver>, <creation_date>, <last_edited_date>, <dict_edition>, <publishing_date>, <dict_src_url>
QString desc = readXhtmlData( stream );
if( isLogical )
{
desc = desc.simplified();
desc.replace( QRegExp( "<br\\s*>\\s*</br>" ), QChar( '\n' ) );
}
if ( dictionaryDescription.isEmpty() )
{
dictionaryDescription = desc;
@ -1286,6 +1302,36 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
}
}
else
if( stream.name() == "languages" )
{
while( !( stream.isEndElement() && stream.name() == "languages" ) && !stream.atEnd() )
{
if( !stream.readNext() )
break;
if ( stream.isStartElement() )
{
if( stream.name() == "from" )
{
if( idxHeader.langFrom == 0 )
{
QString lang = stream.attributes().value( "xml:lang" ).toString();
idxHeader.langFrom = getLanguageId( lang );
}
}
else if( stream.name() == "to" )
{
if( idxHeader.langTo == 0 )
{
QString lang = stream.attributes().value( "xml:lang" ).toString();
idxHeader.langTo = getLanguageId( lang );
}
}
}
else if ( stream.isEndElement() && stream.name() == "languages" )
break;
}
}
else
if ( stream.name() == u"abbreviations" )
{
QString s;

View file

@ -12,6 +12,8 @@ namespace Xdxf {
using std::vector;
using std::string;
quint32 getLanguageId( const QString & lang );
vector< sptr< Dictionary::Class > > makeDictionaries(
vector< string > const & fileNames,
string const & indicesDir,

View file

@ -14,6 +14,7 @@
#include "htmlescape.hh"
#include "utils.hh"
#include <QDebug>
#include "xdxf.hh"
#include <QRegularExpression>
@ -231,8 +232,20 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const
el.setTagName( "div" );
el.setAttribute( "class", "xdxf_headwords" );
if( dictPtr->isFromLanguageRTL() != dictPtr->isToLanguageRTL() )
el.setAttribute( "dir", dictPtr->isFromLanguageRTL() ? "rtl" : "ltr" );
bool isLanguageRtl = dictPtr->isFromLanguageRTL();
if( el.hasAttribute( "xml:lang" ) )
{
// Change xml-attribute "xml:lang" to html-attribute "lang"
QString lang = el.attribute( "xml:lang" );
el.removeAttribute( "xml:lang" );
el.setAttribute( "lang", lang );
quint32 langID = Xdxf::getLanguageId( lang );
if( langID )
isLanguageRtl = LangCoder::isLanguageRTL( langID );
}
if( isLanguageRtl != dictPtr->isToLanguageRTL() )
el.setAttribute( "dir", isLanguageRtl ? "rtl" : "ltr" );
}
}
@ -327,6 +340,20 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const
QDomElement el = nodes.at( 0 ).toElement();
el.setTagName( "span" );
el.setAttribute( "class", "xdxf_def" );
bool isLanguageRtl = dictPtr->isToLanguageRTL();
if( el.hasAttribute( "xml:lang" ) )
{
// Change xml-attribute "xml:lang" to html-attribute "lang"
QString lang = el.attribute( "xml:lang" );
el.removeAttribute( "xml:lang" );
el.setAttribute( "lang", lang );
quint32 langID = Xdxf::getLanguageId( lang );
if( langID )
isLanguageRtl = LangCoder::isLanguageRTL( langID );
}
if( isLanguageRtl != dictPtr->isToLanguageRTL() )
el.setAttribute( "dir", isLanguageRtl ? "rtl" : "ltr" );
}
}