Handle insided cards in dsl dictionaries

This commit is contained in:
Abs62 2012-11-22 17:03:07 +04:00
parent 4ad847515e
commit 07cbdb3b02
2 changed files with 128 additions and 4 deletions

72
dsl.cc
View file

@ -70,7 +70,7 @@ DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
enum enum
{ {
Signature = 0x584c5344, // DSLX on little-endian, XLSD on big-endian Signature = 0x584c5344, // DSLX on little-endian, XLSD on big-endian
CurrentFormatVersion = 15 + BtreeIndexing::FormatVersion + Folding::Version, CurrentFormatVersion = 16 + BtreeIndexing::FormatVersion + Folding::Version,
CurrentZipSupportVersion = 2 CurrentZipSupportVersion = 2
}; };
@ -102,6 +102,21 @@ __attribute__((packed))
#endif #endif
; ;
struct InsidedCard
{
uint32_t offset;
uint32_t size;
wstring headword;
InsidedCard( uint32_t _offset, uint32_t _size, wstring const & word ) :
offset( _offset ), size( _size ), headword( word )
{}
InsidedCard( InsidedCard const & e ) :
offset( e.offset ), size( e.size ), headword( e.headword )
{}
InsidedCard() {}
};
bool indexIsOldOrBad( string const & indexFile, bool hasZipFile ) bool indexIsOldOrBad( string const & indexFile, bool hasZipFile )
{ {
File::Class idx( indexFile, "rb" ); File::Class idx( indexFile, "rb" );
@ -1131,6 +1146,9 @@ void DslArticleRequest::run()
dict.articleNom += 1; dict.articleNom += 1;
if( displayedHeadword.empty() || isDslWs( displayedHeadword[ 0 ] ) )
displayedHeadword = word; // Special case - insided card
string articleText, articleAfter; string articleText, articleAfter;
articleText += "<span class=\"dsl_article\">"; articleText += "<span class=\"dsl_article\">";
@ -1701,14 +1719,45 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
++articleCount; ++articleCount;
wordCount += allEntryWords.size(); wordCount += allEntryWords.size();
int insideInsided = 0;
wstring headword;
QVector< InsidedCard > insidedCards;
uint32_t offset = curOffset;
// Skip the article's body // Skip the article's body
for( ; ; ) for( ; ; )
{ {
if ( ! ( hasString = scanner.readNextLine( curString, curOffset ) ) )
break;
if ( curString.size() && !isDslWs( curString[ 0 ] ) ) if ( ! ( hasString = scanner.readNextLine( curString, curOffset ) )
|| ( curString.size() && !isDslWs( curString[ 0 ] ) ) )
{
if( insideInsided )
insidedCards.append( InsidedCard( offset, curOffset - offset, headword ) );
break; break;
}
// Find embedded cards
wstring::size_type n = curString.find( L'@' );
if( n == wstring::npos || curString[ n - 1 ] == L'\\' )
continue;
// Handle embedded card
if( insideInsided )
insidedCards.append( InsidedCard( offset, curOffset - offset, headword ) );
offset = curOffset;
headword = Folding::trimWhitespace( curString.substr( n + 1 ) );
if( !headword.empty() )
{
processUnsortedParts( headword, true );
expandTildes( headword, allEntryWords.front() );
insideInsided = true;
}
else
insideInsided = false;
} }
// Now that we're having read the first string after the article // Now that we're having read the first string after the article
@ -1719,6 +1768,21 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
chunks.addToBlock( &articleSize, sizeof( articleSize ) ); chunks.addToBlock( &articleSize, sizeof( articleSize ) );
for( QVector< InsidedCard >::iterator i = insidedCards.begin(); i != insidedCards.end(); ++i )
{
uint32_t descOffset = chunks.startNewBlock();
chunks.addToBlock( &(*i).offset, sizeof( (*i).offset ) );
chunks.addToBlock( &(*i).size, sizeof( (*i).size ) );
unescapeDsl( (*i).headword );
normalizeHeadword( (*i).headword );
indexedWords.addWord( (*i).headword, descOffset );
++articleCount;
++wordCount;
}
if ( !hasString ) if ( !hasString )
break; break;
} }

View file

@ -76,6 +76,66 @@ ArticleDom::ArticleDom( wstring const & str ):
{ {
nextChar(); nextChar();
if ( ch == L'@' && !escaped )
{
// Insided card
wstring linkTo;
nextChar();
for( ; ; nextChar() )
{
if( ch == L'\n' )
break;
if( ch != L'\r' )
linkTo.push_back( ch );
}
linkTo = Folding::trimWhitespace( linkTo );
if( !linkTo.empty() )
{
if ( !textNode )
{
Node text = Node( Node::Text(), wstring() );
if ( stack.empty() )
{
root.push_back( text );
stack.push_back( &root.back() );
}
else
{
stack.back()->push_back( text );
stack.push_back( &stack.back()->back() );
}
textNode = stack.back();
}
textNode->text.push_back( L'-' );
textNode->text.push_back( L' ' );
// Close the currently opened text node
stack.pop_back();
textNode = 0;
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"ref" ), wstring() );
link.push_back( Node( Node::Text(), linkTo ) );
if ( stack.empty() )
root.push_back( link );
else
stack.back()->push_back( link );
// Skip to next '@'
while( !( ch == L'@' && !escaped ) )
nextChar();
stringPos--;
ch = L'\n';
escaped = false;
}
} // if ( ch == L'@' )
if ( ch == L'[' && !escaped ) if ( ch == L'[' && !escaped )
{ {
// Beginning of a tag. // Beginning of a tag.