Handle insided cards in dsl dictionaries

This commit is contained in:
Abs62 2012-11-22 17:03:07 +04:00
parent 4ad847515e
commit 07cbdb3b02
2 changed files with 128 additions and 4 deletions

72
dsl.cc
View file

@ -70,7 +70,7 @@ DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
enum
{
Signature = 0x584c5344, // DSLX on little-endian, XLSD on big-endian
CurrentFormatVersion = 15 + BtreeIndexing::FormatVersion + Folding::Version,
CurrentFormatVersion = 16 + BtreeIndexing::FormatVersion + Folding::Version,
CurrentZipSupportVersion = 2
};
@ -102,6 +102,21 @@ __attribute__((packed))
#endif
;
struct InsidedCard
{
uint32_t offset;
uint32_t size;
wstring headword;
InsidedCard( uint32_t _offset, uint32_t _size, wstring const & word ) :
offset( _offset ), size( _size ), headword( word )
{}
InsidedCard( InsidedCard const & e ) :
offset( e.offset ), size( e.size ), headword( e.headword )
{}
InsidedCard() {}
};
bool indexIsOldOrBad( string const & indexFile, bool hasZipFile )
{
File::Class idx( indexFile, "rb" );
@ -1131,6 +1146,9 @@ void DslArticleRequest::run()
dict.articleNom += 1;
if( displayedHeadword.empty() || isDslWs( displayedHeadword[ 0 ] ) )
displayedHeadword = word; // Special case - insided card
string articleText, articleAfter;
articleText += "<span class=\"dsl_article\">";
@ -1701,16 +1719,47 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
++articleCount;
wordCount += allEntryWords.size();
int insideInsided = 0;
wstring headword;
QVector< InsidedCard > insidedCards;
uint32_t offset = curOffset;
// Skip the article's body
for( ; ; )
{
if ( ! ( hasString = scanner.readNextLine( curString, curOffset ) ) )
break;
if ( curString.size() && !isDslWs( curString[ 0 ] ) )
if ( ! ( hasString = scanner.readNextLine( curString, curOffset ) )
|| ( curString.size() && !isDslWs( curString[ 0 ] ) ) )
{
if( insideInsided )
insidedCards.append( InsidedCard( offset, curOffset - offset, headword ) );
break;
}
// Find embedded cards
wstring::size_type n = curString.find( L'@' );
if( n == wstring::npos || curString[ n - 1 ] == L'\\' )
continue;
// Handle embedded card
if( insideInsided )
insidedCards.append( InsidedCard( offset, curOffset - offset, headword ) );
offset = curOffset;
headword = Folding::trimWhitespace( curString.substr( n + 1 ) );
if( !headword.empty() )
{
processUnsortedParts( headword, true );
expandTildes( headword, allEntryWords.front() );
insideInsided = true;
}
else
insideInsided = false;
}
// Now that we're having read the first string after the article
// itself, we can use its offset to calculate the article's size.
// An end of file works here, too.
@ -1719,6 +1768,21 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
chunks.addToBlock( &articleSize, sizeof( articleSize ) );
for( QVector< InsidedCard >::iterator i = insidedCards.begin(); i != insidedCards.end(); ++i )
{
uint32_t descOffset = chunks.startNewBlock();
chunks.addToBlock( &(*i).offset, sizeof( (*i).offset ) );
chunks.addToBlock( &(*i).size, sizeof( (*i).size ) );
unescapeDsl( (*i).headword );
normalizeHeadword( (*i).headword );
indexedWords.addWord( (*i).headword, descOffset );
++articleCount;
++wordCount;
}
if ( !hasString )
break;
}

View file

@ -76,6 +76,66 @@ ArticleDom::ArticleDom( wstring const & str ):
{
nextChar();
if ( ch == L'@' && !escaped )
{
// Insided card
wstring linkTo;
nextChar();
for( ; ; nextChar() )
{
if( ch == L'\n' )
break;
if( ch != L'\r' )
linkTo.push_back( ch );
}
linkTo = Folding::trimWhitespace( linkTo );
if( !linkTo.empty() )
{
if ( !textNode )
{
Node text = Node( Node::Text(), wstring() );
if ( stack.empty() )
{
root.push_back( text );
stack.push_back( &root.back() );
}
else
{
stack.back()->push_back( text );
stack.push_back( &stack.back()->back() );
}
textNode = stack.back();
}
textNode->text.push_back( L'-' );
textNode->text.push_back( L' ' );
// Close the currently opened text node
stack.pop_back();
textNode = 0;
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"ref" ), wstring() );
link.push_back( Node( Node::Text(), linkTo ) );
if ( stack.empty() )
root.push_back( link );
else
stack.back()->push_back( link );
// Skip to next '@'
while( !( ch == L'@' && !escaped ) )
nextChar();
stringPos--;
ch = L'\n';
escaped = false;
}
} // if ( ch == L'@' )
if ( ch == L'[' && !escaped )
{
// Beginning of a tag.