mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 04:24:09 +00:00
Some more optimization of headwords retrieving
This commit is contained in:
parent
ce00f26367
commit
10fe58b745
131
btreeidx.cc
131
btreeidx.cc
|
@ -1079,7 +1079,7 @@ void BtreeIndex::getAllHeadwords( QSet< QString > & headwords )
|
|||
|
||||
Mutex::Lock _( *idxFileMutex );
|
||||
|
||||
findNodeArticleLinks( rootOffset, NULL, NULL, &headwords );
|
||||
findArticleLinks( NULL, NULL, &headwords );
|
||||
}
|
||||
|
||||
void BtreeIndex::findAllArticleLinks( QVector< FTSLink > & articleLinks )
|
||||
|
@ -1091,76 +1091,101 @@ void BtreeIndex::findAllArticleLinks( QVector< FTSLink > & articleLinks )
|
|||
|
||||
QSet< uint32_t > offsets;
|
||||
|
||||
findNodeArticleLinks( rootOffset, &articleLinks, &offsets, NULL );
|
||||
findArticleLinks( &articleLinks, &offsets, NULL );
|
||||
}
|
||||
|
||||
void BtreeIndex::findNodeArticleLinks( uint32_t currentNodeOffset,
|
||||
QVector< FTSLink > * articleLinks,
|
||||
QSet< uint32_t > * offsets,
|
||||
QSet< QString > * headwords )
|
||||
void BtreeIndex::findArticleLinks(QVector< FTSLink > * articleLinks,
|
||||
QSet< uint32_t > * offsets,
|
||||
QSet< QString > *headwords )
|
||||
{
|
||||
// Read a node
|
||||
uint32_t currentNodeOffset = rootOffset;
|
||||
uint32_t nextLeaf;
|
||||
uint32_t leafEntries;
|
||||
|
||||
if ( !rootNodeLoaded )
|
||||
{
|
||||
// Time to load our root node. We do it only once, at the first request.
|
||||
readNode( rootOffset, rootNode );
|
||||
rootNodeLoaded = true;
|
||||
}
|
||||
|
||||
char const * leaf = &rootNode.front();
|
||||
char const * leafEnd = leaf + rootNode.size();
|
||||
char const * chainPtr = 0;
|
||||
|
||||
vector< char > extLeaf;
|
||||
|
||||
if( rootNodeLoaded && currentNodeOffset == rootOffset )
|
||||
extLeaf = rootNode;
|
||||
else
|
||||
readNode( currentNodeOffset, extLeaf );
|
||||
// Find first leaf
|
||||
|
||||
char const * leaf = &extLeaf.front();
|
||||
|
||||
// Is it a leaf or a node?
|
||||
|
||||
uint32_t leafEntries = *(uint32_t *)leaf;
|
||||
|
||||
if ( leafEntries == 0xffffFFFF )
|
||||
for( ; ; )
|
||||
{
|
||||
// A node
|
||||
leafEntries = *(uint32_t *)leaf;
|
||||
|
||||
uint32_t const * offs = (uint32_t *)leaf + 1;
|
||||
|
||||
for( unsigned i = 0; i <= indexNodeSize; i++ )
|
||||
findNodeArticleLinks( offs[ i ], articleLinks, offsets, headwords );
|
||||
}
|
||||
else
|
||||
{
|
||||
// A leaf
|
||||
|
||||
if ( !leafEntries )
|
||||
if ( leafEntries == 0xffffFFFF )
|
||||
{
|
||||
// Empty leaf? This may only be possible for entirely empty trees only.
|
||||
if ( currentNodeOffset != rootOffset )
|
||||
throw exCorruptedChainData();
|
||||
else
|
||||
return; // No match
|
||||
// A node
|
||||
currentNodeOffset = *( (uint32_t *)leaf + 1 );
|
||||
readNode( currentNodeOffset, extLeaf );
|
||||
leaf = &extLeaf.front();
|
||||
leafEnd = leaf + extLeaf.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
// A leaf
|
||||
chainPtr = leaf + sizeof( uint32_t );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nextLeaf = ( currentNodeOffset != rootOffset ? idxFile->read< uint32_t >() : 0 );
|
||||
|
||||
if ( !leafEntries )
|
||||
{
|
||||
// Empty leaf? This may only be possible for entirely empty trees only.
|
||||
if ( currentNodeOffset != rootOffset )
|
||||
throw exCorruptedChainData();
|
||||
else
|
||||
return; // No match
|
||||
}
|
||||
|
||||
// Read all chains
|
||||
|
||||
for( ; ; )
|
||||
{
|
||||
vector< WordArticleLink > result = readChain( chainPtr );
|
||||
for( unsigned i = 0; i < result.size(); i++ )
|
||||
{
|
||||
if( headwords )
|
||||
headwords->insert( QString::fromUtf8( ( result[ i ].prefix + result[ i ].word ).c_str() ) );
|
||||
|
||||
if( !offsets || offsets->contains( result[ i ].articleOffset ) )
|
||||
continue;
|
||||
|
||||
offsets->insert( result[ i ].articleOffset );
|
||||
if( articleLinks )
|
||||
articleLinks->push_back( FTSLink( result[ i ].prefix + result[ i ].word, result[ i ].articleOffset ) );
|
||||
}
|
||||
|
||||
// Build an array containing all chain pointers
|
||||
char const * ptr = leaf + sizeof( uint32_t );
|
||||
|
||||
uint32_t chainSize;
|
||||
|
||||
while( leafEntries-- )
|
||||
if ( chainPtr >= leafEnd )
|
||||
{
|
||||
memcpy( &chainSize, ptr, sizeof( uint32_t ) );
|
||||
// We're past the current leaf, fetch the next one
|
||||
|
||||
char const * chainPtr = ptr;
|
||||
vector< WordArticleLink > result = readChain( chainPtr );
|
||||
for( unsigned i = 0; i < result.size(); i++ )
|
||||
if ( nextLeaf )
|
||||
{
|
||||
if( headwords )
|
||||
headwords->insert( QString::fromUtf8( ( result[ i ].prefix + result[ i ].word ).c_str() ) );
|
||||
readNode( nextLeaf, extLeaf );
|
||||
leaf = &extLeaf.front();
|
||||
leafEnd = leaf + extLeaf.size();
|
||||
|
||||
if( !offsets || offsets->contains( result[ i ].articleOffset ) )
|
||||
continue;
|
||||
nextLeaf = idxFile->read< uint32_t >();
|
||||
chainPtr = leaf + sizeof( uint32_t );
|
||||
|
||||
offsets->insert( result[ i ].articleOffset );
|
||||
if( articleLinks )
|
||||
articleLinks->push_back( FTSLink( result[ i ].prefix + result[ i ].word, result[ i ].articleOffset ) );
|
||||
leafEntries = *(uint32_t *)leaf;
|
||||
|
||||
if ( leafEntries == 0xffffFFFF )
|
||||
throw exCorruptedChainData();
|
||||
}
|
||||
|
||||
ptr += sizeof( uint32_t ) + chainSize;
|
||||
else
|
||||
break; // That was the last leaf
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -137,11 +137,10 @@ protected:
|
|||
/// are left.
|
||||
void antialias( wstring const &, vector< WordArticleLink > & );
|
||||
|
||||
/// Find all article links for node
|
||||
void findNodeArticleLinks( uint32_t currentNodeOffset,
|
||||
QVector< FTSLink > * articleLinks,
|
||||
QSet< uint32_t > * offsets,
|
||||
QSet< QString > * headwords );
|
||||
/// Find all article links
|
||||
void findArticleLinks( QVector< FTSLink > * articleLinks,
|
||||
QSet< uint32_t > * offsets,
|
||||
QSet< QString > * headwords );
|
||||
protected:
|
||||
|
||||
Mutex * idxFileMutex;
|
||||
|
|
Loading…
Reference in a new issue