optimize: wchar use builtin char32_t

optimize: wchar use builtin char32_t
This commit is contained in:
yifang 2022-02-17 22:01:09 +08:00
parent 1d1e98b7ba
commit b652d50291
8 changed files with 95 additions and 159 deletions

View file

@ -450,8 +450,8 @@ QString const& DictdDictionary::getDescription()
if( !dictionaryDescription.isEmpty() ) if( !dictionaryDescription.isEmpty() )
return dictionaryDescription; return dictionaryDescription;
sptr< Dictionary::DataRequest > req = getArticle( GD_NATIVE_TO_WS( L"00databaseinfo" ), sptr< Dictionary::DataRequest > req =
vector< wstring >(), wstring(), false ); getArticle( U"00databaseinfo" , vector< wstring >(), wstring(), false );
if( req->dataSize() > 0 ) if( req->dataSize() > 0 )
dictionaryDescription = Html::unescape( QString::fromUtf8( req->getFullData().data(), req->getFullData().size() ), true ); dictionaryDescription = Html::unescape( QString::fromUtf8( req->getFullData().data(), req->getFullData().size() ), true );

118
dsl.cc
View file

@ -580,7 +580,7 @@ void DslDictionary::loadArticle( uint32_t address,
if ( !articleBody ) if ( !articleBody )
{ {
// throw exCantReadFile( getDictionaryFilenames()[ 0 ] ); // throw exCantReadFile( getDictionaryFilenames()[ 0 ] );
articleData = GD_NATIVE_TO_WS( L"\n\r\t" ) + gd::toWString( QString( "DICTZIP error: " ) + dict_error_str( dz ) ); articleData = U"\n\r\t" + gd::toWString( QString( "DICTZIP error: " ) + dict_error_str( dz ) );
} }
else else
{ {
@ -616,7 +616,7 @@ void DslDictionary::loadArticle( uint32_t address,
{ {
size_t begin = pos; size_t begin = pos;
pos = articleData.find_first_of( GD_NATIVE_TO_WS( L"\n\r" ), begin ); pos = articleData.find_first_of( U"\n\r" , begin );
if ( pos == wstring::npos ) if ( pos == wstring::npos )
pos = articleData.size(); pos = articleData.size();
@ -741,7 +741,7 @@ void DslDictionary::loadArticle( uint32_t address,
if( insidedCard ) if( insidedCard )
{ {
// Check for next insided headword // Check for next insided headword
wstring::size_type hpos = articleData.find_first_of( GD_NATIVE_TO_WS( L"\n\r" ), pos ); wstring::size_type hpos = articleData.find_first_of( U"\n\r" , pos );
if ( hpos == wstring::npos ) if ( hpos == wstring::npos )
hpos = articleData.size(); hpos = articleData.size();
@ -820,13 +820,11 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
return result; return result;
} }
if ( node.tagName == GD_NATIVE_TO_WS( L"b" ) ) if( node.tagName == U"b" )
result += "<b class=\"dsl_b\">" + processNodeChildren( node ) + "</b>"; result += "<b class=\"dsl_b\">" + processNodeChildren( node ) + "</b>";
else else if( node.tagName == U"i" )
if ( node.tagName == GD_NATIVE_TO_WS( L"i" ) )
result += "<i class=\"dsl_i\">" + processNodeChildren( node ) + "</i>"; result += "<i class=\"dsl_i\">" + processNodeChildren( node ) + "</i>";
else else if( node.tagName == U"u" )
if ( node.tagName == GD_NATIVE_TO_WS( L"u" ) )
{ {
string nodeText = processNodeChildren( node ); string nodeText = processNodeChildren( node );
@ -836,39 +834,32 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<span class=\"dsl_u\">" + nodeText + "</span>"; result += "<span class=\"dsl_u\">" + nodeText + "</span>";
} }
else else if( node.tagName == U"c" )
if ( node.tagName == GD_NATIVE_TO_WS( L"c" ) )
{ {
result += "<font color=\"" + ( node.tagAttrs.size() ? result += "<font color=\"" + ( node.tagAttrs.size() ?
Html::escape( Utf8::encode( node.tagAttrs ) ) : string( "c_default_color" ) ) Html::escape( Utf8::encode( node.tagAttrs ) ) : string( "c_default_color" ) )
+ "\">" + processNodeChildren( node ) + "</font>"; + "\">" + processNodeChildren( node ) + "</font>";
} }
else else if( node.tagName == U"*" )
if ( node.tagName == GD_NATIVE_TO_WS( L"*" ) )
{ {
string id = "O" + getId().substr( 0, 7 ) + "_" + string id = "O" + getId().substr( 0, 7 ) + "_" +
QString::number( articleNom ).toStdString() + QString::number( articleNom ).toStdString() +
"_opt_" + QString::number( optionalPartNom++ ).toStdString(); "_opt_" + QString::number( optionalPartNom++ ).toStdString();
result += "<span class=\"dsl_opt\" id=\"" + id + "\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"dsl_opt\" id=\"" + id + "\">" + processNodeChildren( node ) + "</span>";
} }
else else if( node.tagName == U"m" )
if ( node.tagName == GD_NATIVE_TO_WS( L"m" ) ) result += "<div class=\"dsl_m\">" + processNodeChildren( node ) + "</div>";
result += "<div class=\"dsl_m\">" + processNodeChildren( node ) + "</div>";
else else
if ( node.tagName.size() == 2 && node.tagName[ 0 ] == L'm' && if ( node.tagName.size() == 2 && node.tagName[ 0 ] == L'm' &&
iswdigit( node.tagName[ 1 ] ) ) iswdigit( node.tagName[ 1 ] ) )
result += "<div class=\"dsl_" + Utf8::encode( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>"; result += "<div class=\"dsl_" + Utf8::encode( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>";
else else if( node.tagName == U"trn" )
if ( node.tagName == GD_NATIVE_TO_WS( L"trn" ) )
result += "<span class=\"dsl_trn\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"dsl_trn\">" + processNodeChildren( node ) + "</span>";
else else if( node.tagName == U"ex" )
if ( node.tagName == GD_NATIVE_TO_WS( L"ex" ) )
result += "<span class=\"dsl_ex\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"dsl_ex\">" + processNodeChildren( node ) + "</span>";
else else if( node.tagName == U"com" )
if ( node.tagName == GD_NATIVE_TO_WS( L"com" ) )
result += "<span class=\"dsl_com\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"dsl_com\">" + processNodeChildren( node ) + "</span>";
else else if( node.tagName == U"s" || node.tagName == U"video" )
if ( node.tagName == GD_NATIVE_TO_WS( L"s" ) || node.tagName == GD_NATIVE_TO_WS( L"video" ) )
{ {
string filename = Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false ); string filename = Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false );
string n = resourceDir1 + FsEncoding::encode( filename ); string n = resourceDir1 + FsEncoding::encode( filename );
@ -1013,8 +1004,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
+ "\">" + processNodeChildren( node ) + "</a>"; + "\">" + processNodeChildren( node ) + "</a>";
} }
} }
else else if( node.tagName == U"url" )
if ( node.tagName == GD_NATIVE_TO_WS( L"url" ) )
{ {
string link = Html::escape( Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false ) ); string link = Html::escape( Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false ) );
if( QUrl::fromEncoded( link.c_str() ).scheme().isEmpty() ) if( QUrl::fromEncoded( link.c_str() ).scheme().isEmpty() )
@ -1037,13 +1027,11 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<a class=\"dsl_url\" href=\"" + link +"\">" + processNodeChildren( node ) + "</a>"; result += "<a class=\"dsl_url\" href=\"" + link +"\">" + processNodeChildren( node ) + "</a>";
} }
else else if( node.tagName == U"!trs" )
if ( node.tagName == GD_NATIVE_TO_WS( L"!trs" ) )
{ {
result += "<span class=\"dsl_trs\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"dsl_trs\">" + processNodeChildren( node ) + "</span>";
} }
else else if( node.tagName == U"p" )
if ( node.tagName == GD_NATIVE_TO_WS( L"p") )
{ {
result += "<span class=\"dsl_p\""; result += "<span class=\"dsl_p\"";
@ -1090,8 +1078,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += ">" + processNodeChildren( node ) + "</span>"; result += ">" + processNodeChildren( node ) + "</span>";
} }
else else if( node.tagName == U"'" )
if ( node.tagName == GD_NATIVE_TO_WS( L"'" ) )
{ {
// There are two ways to display the stress: by adding an accent sign or via font styles. // There are two ways to display the stress: by adding an accent sign or via font styles.
// We generate two spans, one with accented data and another one without it, so the // We generate two spans, one with accented data and another one without it, so the
@ -1101,8 +1088,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
+ "<span class=\"dsl_stress_with_accent\">" + data + Utf8::encode( wstring( 1, 0x301 ) ) + "<span class=\"dsl_stress_with_accent\">" + data + Utf8::encode( wstring( 1, 0x301 ) )
+ "</span></span>"; + "</span></span>";
} }
else else if( node.tagName == U"lang" )
if ( node.tagName == GD_NATIVE_TO_WS( L"lang" ) )
{ {
result += "<span class=\"dsl_lang\""; result += "<span class=\"dsl_lang\"";
if( !node.tagAttrs.empty() ) if( !node.tagAttrs.empty() )
@ -1135,8 +1121,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
} }
result += ">" + processNodeChildren( node ) + "</span>"; result += ">" + processNodeChildren( node ) + "</span>";
} }
else else if( node.tagName == U"ref" )
if ( node.tagName == GD_NATIVE_TO_WS( L"ref" ) )
{ {
QUrl url; QUrl url;
@ -1158,8 +1143,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += string( "<a class=\"dsl_ref\" href=\"" ) + url.toEncoded().data() +"\">" result += string( "<a class=\"dsl_ref\" href=\"" ) + url.toEncoded().data() +"\">"
+ processNodeChildren( node ) + "</a>"; + processNodeChildren( node ) + "</a>";
} }
else else if( node.tagName == U"@" )
if ( node.tagName == GD_NATIVE_TO_WS( L"@" ) )
{ {
// Special case - insided card header was not parsed // Special case - insided card header was not parsed
@ -1174,23 +1158,19 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += string( "<a class=\"dsl_ref\" href=\"" ) + url.toEncoded().data() +"\">" result += string( "<a class=\"dsl_ref\" href=\"" ) + url.toEncoded().data() +"\">"
+ processNodeChildren( node ) + "</a>"; + processNodeChildren( node ) + "</a>";
} }
else else if( node.tagName == U"sub" )
if ( node.tagName == GD_NATIVE_TO_WS( L"sub" ) )
{ {
result += "<sub>" + processNodeChildren( node ) + "</sub>"; result += "<sub>" + processNodeChildren( node ) + "</sub>";
} }
else else if( node.tagName == U"sup" )
if ( node.tagName == GD_NATIVE_TO_WS( L"sup" ) )
{ {
result += "<sup>" + processNodeChildren( node ) + "</sup>"; result += "<sup>" + processNodeChildren( node ) + "</sup>";
} }
else else if( node.tagName == U"t" )
if ( node.tagName == GD_NATIVE_TO_WS( L"t" ) )
{ {
result += "<span class=\"dsl_t\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"dsl_t\">" + processNodeChildren( node ) + "</span>";
} }
else else if( node.tagName == U"br" )
if ( node.tagName == GD_NATIVE_TO_WS( L"br" ) )
{ {
result += "<br />"; result += "<br />";
} }
@ -1377,7 +1357,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
{ {
size_t begin = pos; size_t begin = pos;
pos = articleData.find_first_of( GD_NATIVE_TO_WS( L"\n\r" ), begin ); pos = articleData.find_first_of( U"\n\r" , begin );
if ( articleHeadword.empty() ) if ( articleHeadword.empty() )
{ {
@ -1445,7 +1425,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
if( insidedCard ) if( insidedCard )
{ {
// Check for next insided headword // Check for next insided headword
wstring::size_type hpos = articleData.find_first_of( GD_NATIVE_TO_WS( L"\n\r" ), pos ); wstring::size_type hpos = articleData.find_first_of( U"\n\r" , pos );
if ( hpos == wstring::npos ) if ( hpos == wstring::npos )
hpos = articleData.size(); hpos = articleData.size();
@ -2075,37 +2055,37 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
try { // Here we intercept any errors during the read to save line at try { // Here we intercept any errors during the read to save line at
// which the incident happened. We need alive scanner for that. // which the incident happened. We need alive scanner for that.
if ( scanner.getDictionaryName() == GD_NATIVE_TO_WS( L"Abbrev" ) ) if( scanner.getDictionaryName() == U"Abbrev" )
continue; // For now just skip abbreviations continue; // For now just skip abbreviations
// Building the index // Building the index
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) ); initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) );
gdDebug( "Dsl: Building the index for dictionary: %s\n", gdDebug( "Dsl: Building the index for dictionary: %s\n",
gd::toQString( scanner.getDictionaryName() ).toUtf8().data() ); gd::toQString( scanner.getDictionaryName() ).toUtf8().data() );
File::Class idx( indexFile, "wb" ); File::Class idx( indexFile, "wb" );
IdxHeader idxHeader; IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) ); memset( &idxHeader, 0, sizeof( idxHeader ) );
// We write a dummy header first. At the end of the process the header // We write a dummy header first. At the end of the process the header
// will be rewritten with the right values. // will be rewritten with the right values.
idx.write( idxHeader ); idx.write( idxHeader );
string dictionaryName = Utf8::encode( scanner.getDictionaryName() ); string dictionaryName = Utf8::encode( scanner.getDictionaryName() );
idx.write( (uint32_t) dictionaryName.size() ); idx.write( (uint32_t)dictionaryName.size() );
idx.write( dictionaryName.data(), dictionaryName.size() ); idx.write( dictionaryName.data(), dictionaryName.size() );
string soundDictName = Utf8::encode( scanner.getSoundDictionaryName() ); string soundDictName = Utf8::encode( scanner.getSoundDictionaryName() );
if( !soundDictName.empty() ) if( !soundDictName.empty() )
{ {
idxHeader.hasSoundDictionaryName = 1; idxHeader.hasSoundDictionaryName = 1;
idx.write( (uint32_t) soundDictName.size() ); idx.write( (uint32_t)soundDictName.size() );
idx.write( soundDictName.data(), soundDictName.size() ); idx.write( soundDictName.data(), soundDictName.size() );
} }
idxHeader.dslEncoding = scanner.getEncoding(); idxHeader.dslEncoding = scanner.getEncoding();
@ -2163,7 +2143,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
if ( eof ) if ( eof )
break; break;
curString.erase( 0, curString.find_first_not_of( GD_NATIVE_TO_WS( L" \t" ) ) ); curString.erase( 0, curString.find_first_not_of( U" \t" ) );
if ( keys.size() ) if ( keys.size() )
expandTildes( curString, keys.front() ); expandTildes( curString, keys.front() );

View file

@ -160,7 +160,7 @@ wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
wstring result; wstring result;
for( list< Node >::const_iterator i = begin(); i != end(); ++i ) for( list< Node >::const_iterator i = begin(); i != end(); ++i )
if( !stripTrsTag || i->tagName != GD_NATIVE_TO_WS( L"!trs" ) ) if( !stripTrsTag || i->tagName != U"!trs" )
result += i->renderAsText( stripTrsTag ); result += i->renderAsText( stripTrsTag );
return result; return result;
@ -169,8 +169,7 @@ wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
// Returns true if src == 'm' and dest is 'mX', where X is a digit // Returns true if src == 'm' and dest is 'mX', where X is a digit
static inline bool checkM( wstring const & dest, wstring const & src ) static inline bool checkM( wstring const & dest, wstring const & src )
{ {
return ( src == GD_NATIVE_TO_WS( L"m" ) && dest.size() == 2 && return ( src == U"m" && dest.size() == 2 && dest[ 0 ] == L'm' && iswdigit( dest[ 1 ] ) );
dest[ 0 ] == L'm' && iswdigit( dest[ 1 ] ) );
} }
ArticleDom::ArticleDom( wstring const & str, string const & dictName, ArticleDom::ArticleDom( wstring const & str, string const & dictName,
@ -252,7 +251,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName,
wstring linkText = Folding::trimWhitespace( *entry ); wstring linkText = Folding::trimWhitespace( *entry );
ArticleDom nodeDom( linkText, dictName, headword_ ); ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"@" ), wstring() ); Node link( Node::Tag(), U"@" , wstring() );
for( Node::iterator n = nodeDom.root.begin(); n != nodeDom.root.end(); ++n ) for( Node::iterator n = nodeDom.root.begin(); n != nodeDom.root.end(); ++n )
link.push_back( *n ); link.push_back( *n );
@ -262,13 +261,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName,
{ {
root.push_back( link ); root.push_back( link );
if( entry != allLinkEntries.end() ) // Add line break before next entry if( entry != allLinkEntries.end() ) // Add line break before next entry
root.push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) ); root.push_back( Node( Node::Tag(), U"br" , wstring() ) );
} }
else else
{ {
stack.back()->push_back( link ); stack.back()->push_back( link );
if( entry != allLinkEntries.end() ) if( entry != allLinkEntries.end() )
stack.back()->push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) ); stack.back()->push_back( Node( Node::Tag(), U"br" , wstring() ) );
} }
} }
@ -349,7 +348,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName,
} }
// If the tag is [t], we update the transcriptionCount // If the tag is [t], we update the transcriptionCount
if ( name == GD_NATIVE_TO_WS( L"t" ) ) if( name == U"t" )
{ {
if ( isClosing ) if ( isClosing )
{ {
@ -361,7 +360,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName,
} }
// If the tag is [s], we update the mediaCount // If the tag is [s], we update the mediaCount
if ( name == GD_NATIVE_TO_WS( L"s" ) ) if( name == U"s" )
{ {
if ( isClosing ) if ( isClosing )
{ {
@ -374,14 +373,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName,
if ( !isClosing ) if ( !isClosing )
{ {
if ( name == GD_NATIVE_TO_WS( L"m" ) || if( name == U"m" || ( name.size() == 2 && name[ 0 ] == L'm' && iswdigit( name[ 1 ] ) ) )
( name.size() == 2 && name[ 0 ] == L'm' && iswdigit( name[ 1 ] ) ) )
{ {
// Opening an 'mX' or 'm' tag closes any previous 'm' tag // Opening an 'mX' or 'm' tag closes any previous 'm' tag
closeTag( GD_NATIVE_TO_WS( L"m" ), stack, false ); closeTag( U"m" , stack, false );
} }
openTag( name, attrs, stack ); openTag( name, attrs, stack );
if ( name == GD_NATIVE_TO_WS( L"br" ) ) if( name == U"br" )
{ {
// [br] tag don't have closing tag // [br] tag don't have closing tag
closeTag( name, stack ); closeTag( name, stack );
@ -465,7 +463,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName,
processUnsortedParts( linkText, true ); processUnsortedParts( linkText, true );
ArticleDom nodeDom( linkText, dictName, headword_ ); ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"ref" ), wstring() ); Node link( Node::Tag(), U"ref" , wstring() );
for( Node::iterator n = nodeDom.root.begin(); n != nodeDom.root.end(); ++n ) for( Node::iterator n = nodeDom.root.begin(); n != nodeDom.root.end(); ++n )
link.push_back( *n ); link.push_back( *n );
@ -640,7 +638,7 @@ void ArticleDom::openTag( wstring const & name,
{ {
list< Node > nodesToReopen; list< Node > nodesToReopen;
if( name == GD_NATIVE_TO_WS( L"m" ) || checkM( name, GD_NATIVE_TO_WS( L"m" ) ) ) if( name == U"m" || checkM( name, U"m" ) )
{ {
// All tags above [m] tag will be closed and reopened after // All tags above [m] tag will be closed and reopened after
// to avoid break this tag by closing some other tag. // to avoid break this tag by closing some other tag.
@ -734,7 +732,7 @@ void ArticleDom::closeTag( wstring const & name,
nodesToReopen.push_back( Node( Node::Tag(), stack.back()->tagName, nodesToReopen.push_back( Node( Node::Tag(), stack.back()->tagName,
stack.back()->tagAttrs ) ); stack.back()->tagAttrs ) );
if ( stack.back()->empty() && stack.back()->tagName != GD_NATIVE_TO_WS( L"br" ) ) if( stack.back()->empty() && stack.back()->tagName != U"br" )
{ {
// Empty nodes except [br] tag are deleted since they're no use // Empty nodes except [br] tag are deleted since they're no use
@ -917,19 +915,15 @@ DslScanner::DslScanner( string const & fileName ) :
bool isLangTo = false; bool isLangTo = false;
bool isSoundDict = false; bool isSoundDict = false;
if ( !str.compare( 0, 5, GD_NATIVE_TO_WS( L"#NAME" ), 5 ) ) if( !str.compare( 0, 5, U"#NAME" , 5 ) )
isName = true; isName = true;
else else if( !str.compare( 0, 15, U"#INDEX_LANGUAGE" , 15 ) )
if ( !str.compare( 0, 15, GD_NATIVE_TO_WS( L"#INDEX_LANGUAGE" ), 15 ) )
isLangFrom = true; isLangFrom = true;
else else if( !str.compare( 0, 18, U"#CONTENTS_LANGUAGE" , 18 ) )
if ( !str.compare( 0, 18, GD_NATIVE_TO_WS( L"#CONTENTS_LANGUAGE" ), 18 ) )
isLangTo = true; isLangTo = true;
else else if( !str.compare( 0, 17, U"#SOUND_DICTIONARY" , 17 ) )
if ( !str.compare( 0, 17, GD_NATIVE_TO_WS( L"#SOUND_DICTIONARY" ), 17 ) )
isSoundDict = true; isSoundDict = true;
else else if( str.compare( 0, 17, U"#SOURCE_CODE_PAGE" , 17 ) )
if ( str.compare( 0, 17, GD_NATIVE_TO_WS( L"#SOURCE_CODE_PAGE" ), 17 ) )
continue; continue;
// Locate the argument // Locate the argument
@ -962,14 +956,11 @@ DslScanner::DslScanner( string const & fileName ) :
// We don't need that! // We don't need that!
GD_FDPRINTF( stderr, "Warning: encoding was specified in a Unicode file, ignoring.\n" ); GD_FDPRINTF( stderr, "Warning: encoding was specified in a Unicode file, ignoring.\n" );
} }
else else if( !arg.compare( U"Latin" ) )
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"Latin" ) ) )
encoding = Utf8::Windows1252; encoding = Utf8::Windows1252;
else else if( !arg.compare( U"Cyrillic" ) )
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"Cyrillic" ) ) )
encoding = Utf8::Windows1251; encoding = Utf8::Windows1251;
else else if( !arg.compare( U"EasternEuropean" ) )
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"EasternEuropean" ) ) )
encoding = Utf8::Windows1250; encoding = Utf8::Windows1250;
else else
{ {
@ -1041,7 +1032,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
#ifdef __WIN32 #ifdef __WIN32
out=line.toStdU32String(); out=line.toStdU32String();
#else #else
out=line.toStdWString(); out=line.toStdU32String();
#endif #endif
return true; return true;
@ -1256,8 +1247,8 @@ void expandOptionalParts( wstring & str, list< wstring > * result,
} }
} }
static const wstring openBraces( GD_NATIVE_TO_WS( L"{{" ) ); static const wstring openBraces( U"{{" );
static const wstring closeBraces( GD_NATIVE_TO_WS( L"}}" ) ); static const wstring closeBraces( U"}}" );
void stripComments( wstring & str, bool & nextLine ) void stripComments( wstring & str, bool & nextLine )
{ {
@ -1356,11 +1347,11 @@ namespace
quint32 dslLanguageToId( wstring const & name ) quint32 dslLanguageToId( wstring const & name )
{ {
static wstring newSp( GD_NATIVE_TO_WS( L"newspelling" ) ); static wstring newSp( U"newspelling" );
static wstring st( GD_NATIVE_TO_WS( L"standard" ) ); static wstring st( U"standard" );
static wstring ms( GD_NATIVE_TO_WS( L"modernsort" ) ); static wstring ms( U"modernsort" );
static wstring ts( GD_NATIVE_TO_WS( L"traditionalsort" ) ); static wstring ts( U"traditionalsort" );
static wstring prc( GD_NATIVE_TO_WS( L"prc" ) ); static wstring prc( U"prc" );
// Any of those endings are to be removed // Any of those endings are to be removed

16
gls.cc
View file

@ -179,13 +179,13 @@ GlsScanner::GlsScanner( string const & fileName ) :
wstring str; wstring str;
wstring *currentField = 0; wstring *currentField = 0;
wstring mark = GD_NATIVE_TO_WS( L"###" ); wstring mark = U"###" ;
wstring titleMark = GD_NATIVE_TO_WS( L"### Glossary title:" ); wstring titleMark = U"### Glossary title:" ;
wstring authorMark = GD_NATIVE_TO_WS( L"### Author:" ); wstring authorMark = U"### Author:" ;
wstring descriptionMark = GD_NATIVE_TO_WS( L"### Description:" ); wstring descriptionMark = U"### Description:" ;
wstring langFromMark = GD_NATIVE_TO_WS( L"### Source language:" ); wstring langFromMark = U"### Source language:" ;
wstring langToMark = GD_NATIVE_TO_WS( L"### Target language:" ); wstring langToMark = U"### Target language:" ;
wstring endOfHeaderMark = GD_NATIVE_TO_WS( L"### Glossary section:" ); wstring endOfHeaderMark = U"### Glossary section:" ;
size_t offset; size_t offset;
for( ; ; ) for( ; ; )
@ -287,7 +287,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset )
#ifdef __WIN32 #ifdef __WIN32
out = line.toStdU32String(); out = line.toStdU32String();
#else #else
out = line.toStdWString(); out = line.toStdU32String();
#endif #endif
return true; return true;

View file

@ -413,7 +413,6 @@ SOURCES += folding.cc \
hunspell.cc \ hunspell.cc \
dictdfiles.cc \ dictdfiles.cc \
audiolink.cc \ audiolink.cc \
wstring.cc \
wstring_qt.cc \ wstring_qt.cc \
processwrapper.cc \ processwrapper.cc \
hotkeywrapper.cc \ hotkeywrapper.cc \

View file

@ -1,19 +0,0 @@
#ifdef __WIN32
#include "wstring.hh"
#include "iconv.hh"
#include <wchar.h>
#include <QString>
namespace gd
{
wstring __nativeToWs( wchar_t const * str )
{
QString qStr=QString::fromWCharArray(str);
return qStr.toStdU32String();
//return Iconv::toWstring( "WCHAR_T", str, wcslen( str ) * sizeof( wchar_t ) );
}
}
#endif

View file

@ -13,12 +13,12 @@
/// all Unicode chars were 2 bytes long. After the Unicode got expanded past /// all Unicode chars were 2 bytes long. After the Unicode got expanded past
/// two-byte representation, the guys at Microsoft had probably decided that /// two-byte representation, the guys at Microsoft had probably decided that
/// the least painful way to go is to just switch to UTF-16. Or so's the theory. /// the least painful way to go is to just switch to UTF-16. Or so's the theory.
/// ///
/// Now, the UTF family is an encoding, made for transit purposes -- is not a /// Now, the UTF family is an encoding, made for transit purposes -- is not a
/// representation. While it's good for passthrough, it's not directly /// representation. While it's good for passthrough, it's not directly
/// applicable for manipulation on Unicode symbols. It must be decoded first to /// applicable for manipulation on Unicode symbols. It must be decoded first to
/// a normal UCS. Think like this: UTF to UCS is something like Base64 to ASCII. /// a normal UCS. Think like this: UTF to UCS is something like Base64 to ASCII.
/// ///
/// The distinction between Microsoft platform and all other ones is that while /// The distinction between Microsoft platform and all other ones is that while
/// the latters are stuck in an 8-bit era and use UTF-8 to pass unicode around /// the latters are stuck in an 8-bit era and use UTF-8 to pass unicode around
/// through its venerable interfaces, the former one is stuck in a 16-bit era, /// through its venerable interfaces, the former one is stuck in a 16-bit era,
@ -27,7 +27,7 @@
/// solution is even more ugly than the 8-bit one, because it doesn't have a /// solution is even more ugly than the 8-bit one, because it doesn't have a
/// benefit of ASCII compatibility, having a much more useless UCS-2 /// benefit of ASCII compatibility, having a much more useless UCS-2
/// compatibility instead. It's stuck in the middle of nowhere, really. /// compatibility instead. It's stuck in the middle of nowhere, really.
/// ///
/// The question is, what are we going to do with all this? When we do Unicode /// The question is, what are we going to do with all this? When we do Unicode
/// processing in GoldenDict, we want to use real Unicode characters, not some /// processing in GoldenDict, we want to use real Unicode characters, not some
/// UTF-16 encoded ones. To that end, we have two options under Windows: first, /// UTF-16 encoded ones. To that end, we have two options under Windows: first,
@ -40,23 +40,14 @@
/// introduce our own gd::wstring and gd::wchar types here. On all systems but /// introduce our own gd::wstring and gd::wchar types here. On all systems but
/// Windows, they are equivalent to std::wstring and wchar_t. On Windows, they /// Windows, they are equivalent to std::wstring and wchar_t. On Windows, they
/// are basic_string< unsigned int > and unsigned int. /// are basic_string< unsigned int > and unsigned int.
///
///
/// Now we have a better built-in type as char32_t and std::u32string
namespace gd namespace gd
{ {
#ifdef __WIN32
typedef char32_t wchar; typedef char32_t wchar;
typedef std::u32string wstring; typedef std::u32string wstring;
// GD_NATIVE_TO_WS is used to convert L"" strings to a const pointer to
// wchar.
wstring __nativeToWs( wchar_t const * );
#define GD_NATIVE_TO_WS( str ) ( gd::__nativeToWs( ( str ) ).c_str() )
#else
typedef wchar_t wchar;
typedef std::basic_string<wchar> wstring;
#define GD_NATIVE_TO_WS( str ) ( str )
#endif
} }
#endif #endif

View file

@ -3,15 +3,9 @@
namespace gd namespace gd
{ {
QString toQString( wstring const & in ) QString toQString( wstring const & in )
{ {
#ifdef __WIN32 return QString::fromStdU32String( in );
return QString::fromUcs4( in.c_str() );
#else
return QString::fromStdWString(in);
#endif
} }
wstring toWString( QString const & in ) wstring toWString( QString const & in )