mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 04:24:09 +00:00
Handle insided cards in dsl dictionaries
This commit is contained in:
parent
4ad847515e
commit
07cbdb3b02
72
dsl.cc
72
dsl.cc
|
@ -70,7 +70,7 @@ DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
Signature = 0x584c5344, // DSLX on little-endian, XLSD on big-endian
|
Signature = 0x584c5344, // DSLX on little-endian, XLSD on big-endian
|
||||||
CurrentFormatVersion = 15 + BtreeIndexing::FormatVersion + Folding::Version,
|
CurrentFormatVersion = 16 + BtreeIndexing::FormatVersion + Folding::Version,
|
||||||
CurrentZipSupportVersion = 2
|
CurrentZipSupportVersion = 2
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -102,6 +102,21 @@ __attribute__((packed))
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
|
struct InsidedCard
|
||||||
|
{
|
||||||
|
uint32_t offset;
|
||||||
|
uint32_t size;
|
||||||
|
wstring headword;
|
||||||
|
InsidedCard( uint32_t _offset, uint32_t _size, wstring const & word ) :
|
||||||
|
offset( _offset ), size( _size ), headword( word )
|
||||||
|
{}
|
||||||
|
InsidedCard( InsidedCard const & e ) :
|
||||||
|
offset( e.offset ), size( e.size ), headword( e.headword )
|
||||||
|
{}
|
||||||
|
InsidedCard() {}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
bool indexIsOldOrBad( string const & indexFile, bool hasZipFile )
|
bool indexIsOldOrBad( string const & indexFile, bool hasZipFile )
|
||||||
{
|
{
|
||||||
File::Class idx( indexFile, "rb" );
|
File::Class idx( indexFile, "rb" );
|
||||||
|
@ -1131,6 +1146,9 @@ void DslArticleRequest::run()
|
||||||
|
|
||||||
dict.articleNom += 1;
|
dict.articleNom += 1;
|
||||||
|
|
||||||
|
if( displayedHeadword.empty() || isDslWs( displayedHeadword[ 0 ] ) )
|
||||||
|
displayedHeadword = word; // Special case - insided card
|
||||||
|
|
||||||
string articleText, articleAfter;
|
string articleText, articleAfter;
|
||||||
|
|
||||||
articleText += "<span class=\"dsl_article\">";
|
articleText += "<span class=\"dsl_article\">";
|
||||||
|
@ -1701,14 +1719,45 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
++articleCount;
|
++articleCount;
|
||||||
wordCount += allEntryWords.size();
|
wordCount += allEntryWords.size();
|
||||||
|
|
||||||
|
int insideInsided = 0;
|
||||||
|
wstring headword;
|
||||||
|
QVector< InsidedCard > insidedCards;
|
||||||
|
uint32_t offset = curOffset;
|
||||||
|
|
||||||
// Skip the article's body
|
// Skip the article's body
|
||||||
for( ; ; )
|
for( ; ; )
|
||||||
{
|
{
|
||||||
if ( ! ( hasString = scanner.readNextLine( curString, curOffset ) ) )
|
|
||||||
break;
|
|
||||||
|
|
||||||
if ( curString.size() && !isDslWs( curString[ 0 ] ) )
|
if ( ! ( hasString = scanner.readNextLine( curString, curOffset ) )
|
||||||
|
|| ( curString.size() && !isDslWs( curString[ 0 ] ) ) )
|
||||||
|
{
|
||||||
|
if( insideInsided )
|
||||||
|
insidedCards.append( InsidedCard( offset, curOffset - offset, headword ) );
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find embedded cards
|
||||||
|
|
||||||
|
wstring::size_type n = curString.find( L'@' );
|
||||||
|
if( n == wstring::npos || curString[ n - 1 ] == L'\\' )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Handle embedded card
|
||||||
|
|
||||||
|
if( insideInsided )
|
||||||
|
insidedCards.append( InsidedCard( offset, curOffset - offset, headword ) );
|
||||||
|
|
||||||
|
offset = curOffset;
|
||||||
|
headword = Folding::trimWhitespace( curString.substr( n + 1 ) );
|
||||||
|
|
||||||
|
if( !headword.empty() )
|
||||||
|
{
|
||||||
|
processUnsortedParts( headword, true );
|
||||||
|
expandTildes( headword, allEntryWords.front() );
|
||||||
|
insideInsided = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
insideInsided = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now that we're having read the first string after the article
|
// Now that we're having read the first string after the article
|
||||||
|
@ -1719,6 +1768,21 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
|
|
||||||
chunks.addToBlock( &articleSize, sizeof( articleSize ) );
|
chunks.addToBlock( &articleSize, sizeof( articleSize ) );
|
||||||
|
|
||||||
|
for( QVector< InsidedCard >::iterator i = insidedCards.begin(); i != insidedCards.end(); ++i )
|
||||||
|
{
|
||||||
|
uint32_t descOffset = chunks.startNewBlock();
|
||||||
|
chunks.addToBlock( &(*i).offset, sizeof( (*i).offset ) );
|
||||||
|
chunks.addToBlock( &(*i).size, sizeof( (*i).size ) );
|
||||||
|
|
||||||
|
unescapeDsl( (*i).headword );
|
||||||
|
normalizeHeadword( (*i).headword );
|
||||||
|
|
||||||
|
indexedWords.addWord( (*i).headword, descOffset );
|
||||||
|
|
||||||
|
++articleCount;
|
||||||
|
++wordCount;
|
||||||
|
}
|
||||||
|
|
||||||
if ( !hasString )
|
if ( !hasString )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,6 +76,66 @@ ArticleDom::ArticleDom( wstring const & str ):
|
||||||
{
|
{
|
||||||
nextChar();
|
nextChar();
|
||||||
|
|
||||||
|
if ( ch == L'@' && !escaped )
|
||||||
|
{
|
||||||
|
// Insided card
|
||||||
|
wstring linkTo;
|
||||||
|
nextChar();
|
||||||
|
for( ; ; nextChar() )
|
||||||
|
{
|
||||||
|
if( ch == L'\n' )
|
||||||
|
break;
|
||||||
|
if( ch != L'\r' )
|
||||||
|
linkTo.push_back( ch );
|
||||||
|
}
|
||||||
|
linkTo = Folding::trimWhitespace( linkTo );
|
||||||
|
|
||||||
|
if( !linkTo.empty() )
|
||||||
|
{
|
||||||
|
if ( !textNode )
|
||||||
|
{
|
||||||
|
Node text = Node( Node::Text(), wstring() );
|
||||||
|
|
||||||
|
if ( stack.empty() )
|
||||||
|
{
|
||||||
|
root.push_back( text );
|
||||||
|
stack.push_back( &root.back() );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stack.back()->push_back( text );
|
||||||
|
stack.push_back( &stack.back()->back() );
|
||||||
|
}
|
||||||
|
|
||||||
|
textNode = stack.back();
|
||||||
|
}
|
||||||
|
textNode->text.push_back( L'-' );
|
||||||
|
textNode->text.push_back( L' ' );
|
||||||
|
|
||||||
|
// Close the currently opened text node
|
||||||
|
stack.pop_back();
|
||||||
|
textNode = 0;
|
||||||
|
|
||||||
|
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"ref" ), wstring() );
|
||||||
|
link.push_back( Node( Node::Text(), linkTo ) );
|
||||||
|
|
||||||
|
if ( stack.empty() )
|
||||||
|
root.push_back( link );
|
||||||
|
else
|
||||||
|
stack.back()->push_back( link );
|
||||||
|
|
||||||
|
// Skip to next '@'
|
||||||
|
|
||||||
|
while( !( ch == L'@' && !escaped ) )
|
||||||
|
nextChar();
|
||||||
|
|
||||||
|
stringPos--;
|
||||||
|
ch = L'\n';
|
||||||
|
escaped = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // if ( ch == L'@' )
|
||||||
|
|
||||||
if ( ch == L'[' && !escaped )
|
if ( ch == L'[' && !escaped )
|
||||||
{
|
{
|
||||||
// Beginning of a tag.
|
// Beginning of a tag.
|
||||||
|
|
Loading…
Reference in a new issue