Some more optimization of headwords retrieving

This commit is contained in:
Abs62 2014-03-11 20:28:52 +04:00
parent ce00f26367
commit 10fe58b745
2 changed files with 82 additions and 58 deletions

View file

@ -1079,7 +1079,7 @@ void BtreeIndex::getAllHeadwords( QSet< QString > & headwords )
Mutex::Lock _( *idxFileMutex ); Mutex::Lock _( *idxFileMutex );
findNodeArticleLinks( rootOffset, NULL, NULL, &headwords ); findArticleLinks( NULL, NULL, &headwords );
} }
void BtreeIndex::findAllArticleLinks( QVector< FTSLink > & articleLinks ) void BtreeIndex::findAllArticleLinks( QVector< FTSLink > & articleLinks )
@ -1091,41 +1091,53 @@ void BtreeIndex::findAllArticleLinks( QVector< FTSLink > & articleLinks )
QSet< uint32_t > offsets; QSet< uint32_t > offsets;
findNodeArticleLinks( rootOffset, &articleLinks, &offsets, NULL ); findArticleLinks( &articleLinks, &offsets, NULL );
} }
void BtreeIndex::findNodeArticleLinks( uint32_t currentNodeOffset, void BtreeIndex::findArticleLinks(QVector< FTSLink > * articleLinks,
QVector< FTSLink > * articleLinks,
QSet< uint32_t > * offsets, QSet< uint32_t > * offsets,
QSet< QString > * headwords ) QSet< QString > *headwords )
{ {
// Read a node uint32_t currentNodeOffset = rootOffset;
uint32_t nextLeaf;
uint32_t leafEntries;
if ( !rootNodeLoaded )
{
// Time to load our root node. We do it only once, at the first request.
readNode( rootOffset, rootNode );
rootNodeLoaded = true;
}
char const * leaf = &rootNode.front();
char const * leafEnd = leaf + rootNode.size();
char const * chainPtr = 0;
vector< char > extLeaf; vector< char > extLeaf;
if( rootNodeLoaded && currentNodeOffset == rootOffset ) // Find first leaf
extLeaf = rootNode;
else
readNode( currentNodeOffset, extLeaf );
char const * leaf = &extLeaf.front(); for( ; ; )
{
// Is it a leaf or a node? leafEntries = *(uint32_t *)leaf;
uint32_t leafEntries = *(uint32_t *)leaf;
if ( leafEntries == 0xffffFFFF ) if ( leafEntries == 0xffffFFFF )
{ {
// A node // A node
currentNodeOffset = *( (uint32_t *)leaf + 1 );
uint32_t const * offs = (uint32_t *)leaf + 1; readNode( currentNodeOffset, extLeaf );
leaf = &extLeaf.front();
for( unsigned i = 0; i <= indexNodeSize; i++ ) leafEnd = leaf + extLeaf.size();
findNodeArticleLinks( offs[ i ], articleLinks, offsets, headwords );
} }
else else
{ {
// A leaf // A leaf
chainPtr = leaf + sizeof( uint32_t );
break;
}
}
nextLeaf = ( currentNodeOffset != rootOffset ? idxFile->read< uint32_t >() : 0 );
if ( !leafEntries ) if ( !leafEntries )
{ {
@ -1136,16 +1148,10 @@ void BtreeIndex::findNodeArticleLinks( uint32_t currentNodeOffset,
return; // No match return; // No match
} }
// Build an array containing all chain pointers // Read all chains
char const * ptr = leaf + sizeof( uint32_t );
uint32_t chainSize; for( ; ; )
while( leafEntries-- )
{ {
memcpy( &chainSize, ptr, sizeof( uint32_t ) );
char const * chainPtr = ptr;
vector< WordArticleLink > result = readChain( chainPtr ); vector< WordArticleLink > result = readChain( chainPtr );
for( unsigned i = 0; i < result.size(); i++ ) for( unsigned i = 0; i < result.size(); i++ )
{ {
@ -1160,7 +1166,26 @@ void BtreeIndex::findNodeArticleLinks( uint32_t currentNodeOffset,
articleLinks->push_back( FTSLink( result[ i ].prefix + result[ i ].word, result[ i ].articleOffset ) ); articleLinks->push_back( FTSLink( result[ i ].prefix + result[ i ].word, result[ i ].articleOffset ) );
} }
ptr += sizeof( uint32_t ) + chainSize; if ( chainPtr >= leafEnd )
{
// We're past the current leaf, fetch the next one
if ( nextLeaf )
{
readNode( nextLeaf, extLeaf );
leaf = &extLeaf.front();
leafEnd = leaf + extLeaf.size();
nextLeaf = idxFile->read< uint32_t >();
chainPtr = leaf + sizeof( uint32_t );
leafEntries = *(uint32_t *)leaf;
if ( leafEntries == 0xffffFFFF )
throw exCorruptedChainData();
}
else
break; // That was the last leaf
} }
} }
} }

View file

@ -137,9 +137,8 @@ protected:
/// are left. /// are left.
void antialias( wstring const &, vector< WordArticleLink > & ); void antialias( wstring const &, vector< WordArticleLink > & );
/// Find all article links for node /// Find all article links
void findNodeArticleLinks( uint32_t currentNodeOffset, void findArticleLinks( QVector< FTSLink > * articleLinks,
QVector< FTSLink > * articleLinks,
QSet< uint32_t > * offsets, QSet< uint32_t > * offsets,
QSet< QString > * headwords ); QSet< QString > * headwords );
protected: protected: