goldendict-ng/sounddir.cc

496 lines
14 KiB
C++
Raw Normal View History

2012-02-20 21:47:14 +00:00
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "sounddir.hh"
#include "file.hh"
#include "folding.hh"
#include "utf8.hh"
#include "btreeidx.hh"
#include "chunkedstorage.hh"
#include "filetype.hh"
#include "htmlescape.hh"
#include "audiolink.hh"
#include "wstring_qt.hh"
#include "fsencoding.hh"
#include "utils.hh"
#include <set>
#include <QDir>
#include <QFileInfo>
#include <QDebug>
namespace SoundDir {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
namespace {
enum
{
Signature = 0x58524453, // SDRX on little-endian, XRDS on big-endian
CurrentFormatVersion = 1+ BtreeIndexing::FormatVersion + Folding::Version
};
struct IdxHeader
{
uint32_t signature; // First comes the signature, SDRX
uint32_t formatVersion; // File format version, is to be CurrentFormatVersion
uint32_t soundsCount; // Total number of sounds, for informative purposes only
uint32_t chunksOffset; // The offset to chunks' storage
uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
uint32_t indexRootOffset;
}
#ifndef _MSC_VER
__attribute__((packed))
#endif
;
bool indexIsOldOrBad( string const & indexFile )
{
File::Class idx( indexFile, "rb" );
IdxHeader header;
return idx.readRecords( &header, sizeof( header ), 1 ) != 1 ||
header.signature != Signature ||
header.formatVersion != CurrentFormatVersion;
}
class SoundDirDictionary: public BtreeIndexing::BtreeDictionary
{
string name;
Mutex idxMutex;
File::Class idx;
IdxHeader idxHeader;
ChunkedStorage::Reader chunks;
QString iconFilename;
public:
SoundDirDictionary( string const & id, string const & name,
string const & indexFile,
vector< string > const & dictionaryFiles,
QString const & iconFilename_ );
string getName() noexcept override
{ return name; }
map< Dictionary::Property, string > getProperties() noexcept override
{ return map< Dictionary::Property, string >(); }
unsigned long getArticleCount() noexcept override
{ return idxHeader.soundsCount; }
unsigned long getWordCount() noexcept override
{ return getArticleCount(); }
sptr< Dictionary::DataRequest > getArticle( wstring const &,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics ) override
2022-01-09 08:35:07 +00:00
;
sptr< Dictionary::DataRequest > getResource( string const & name ) override
2022-01-09 08:35:07 +00:00
;
protected:
void loadIcon() noexcept override;
};
SoundDirDictionary::SoundDirDictionary( string const & id,
string const & name_,
string const & indexFile,
vector< string > const & dictionaryFiles,
QString const & iconFilename_ ):
BtreeDictionary( id, dictionaryFiles ),
name( name_ ),
idx( indexFile, "rb" ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset ),
iconFilename( iconFilename_ )
{
// Initialize the index
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
idxHeader.indexRootOffset ),
idx, idxMutex );
}
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics )
2022-01-09 08:35:07 +00:00
{
vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics );
for( unsigned x = 0; x < alts.size(); ++x )
{
/// Make an additional query for each alt
vector< WordArticleLink > altChain = findArticles( alts[ x ], ignoreDiacritics );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
// maps to the chain number
multimap< wstring, unsigned > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
if( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
for( unsigned x = 0; x < chain.size(); ++x )
{
if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
continue; // We already have this article in the body.
// Ok. Now, does it go to main articles, or to alternate ones? We list
// main ones first, and alternates after.
// We do the case-folded comparison here.
wstring headwordStripped =
Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].word ) );
if( ignoreDiacritics )
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
2009-04-21 20:09:02 +00:00
multimap< wstring, unsigned > & mapToUse =
( wordCaseFolded == headwordStripped ) ?
mainArticles : alternateArticles;
mapToUse.insert( std::pair< wstring, uint32_t >(
Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].word ) ), x ) );
articlesIncluded.insert( chain[ x ].articleOffset );
}
if ( mainArticles.empty() && alternateArticles.empty() )
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such word
string result;
multimap< wstring, uint32_t >::const_iterator i;
string displayedName;
vector< char > chunk;
char * nameBlock;
result += "<table class=\"lsa_play\">";
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
{
if( mainArticles.size() + alternateArticles.size() <= 1 )
displayedName = chain[ i->second ].word;
else
{
try
{
Mutex::Lock _( idxMutex );
nameBlock = chunks.getBlock( chain[ i->second ].articleOffset, chunk );
if ( nameBlock >= &chunk.front() + chunk.size() )
{
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
displayedName = string( nameBlock );
}
catch( ChunkedStorage::exAddressOutOfRange & )
{
// Bad address
continue;
}
}
result += "<tr>";
QUrl url;
url.setScheme( "gdau" );
url.setHost( QString::fromUtf8( getId().c_str() ) );
url.setPath( Utils::Url::ensureLeadingSlash( QString::number( chain[ i->second ].articleOffset ) ) );
string ref = string( "\"" ) + url.toEncoded().data() + "\"";
result += addAudioLink( ref, getId() );
result += "<td><a href=" + ref + R"(><img src="qrc:///icons/playsound.png" border="0" alt="Play"/></a></td>)";
result += "<td><a href=" + ref + ">" + Html::escape( displayedName ) + "</a></td>";
result += "</tr>";
}
for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i )
{
if( mainArticles.size() + alternateArticles.size() <= 1 )
displayedName = chain[ i->second ].word;
else
{
try
{
Mutex::Lock _( idxMutex );
nameBlock = chunks.getBlock( chain[ i->second ].articleOffset, chunk );
if ( nameBlock >= &chunk.front() + chunk.size() )
{
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
displayedName = string( nameBlock );
}
catch( ChunkedStorage::exAddressOutOfRange & )
{
// Bad address
continue;
}
}
result += "<tr>";
QUrl url;
url.setScheme( "gdau" );
url.setHost( QString::fromUtf8( getId().c_str() ) );
url.setPath( Utils::Url::ensureLeadingSlash( QString::number( chain[ i->second ].articleOffset ) ) );
string ref = string( "\"" ) + url.toEncoded().data() + "\"";
result += addAudioLink( ref, getId() );
result += "<td><a href=" + ref + R"(><img src="qrc:///icons/playsound.png" border="0" alt="Play"/></a></td>)";
result += "<td><a href=" + ref + ">" + Html::escape( displayedName ) + "</a></td>";
result += "</tr>";
}
result += "</table>";
Dictionary::DataRequestInstant * ret =
new Dictionary::DataRequestInstant( true );
ret->getData().resize( result.size() );
memcpy( &(ret->getData().front()), result.data(), result.size() );
return std::shared_ptr<Dictionary::DataRequestInstant>(ret);
}
void SoundDirDictionary::loadIcon() noexcept
{
if ( dictionaryIconLoaded )
return;
if( !iconFilename.isEmpty() )
{
QFileInfo fInfo( QDir( Config::getConfigDir() ), iconFilename );
if( fInfo.isFile() )
loadIconFromFile( fInfo.absoluteFilePath(), true );
}
if( dictionaryIcon.isNull() )
dictionaryIcon = dictionaryNativeIcon = QIcon(":/icons/playsound_full.png");
dictionaryIconLoaded = true;
}
sptr< Dictionary::DataRequest > SoundDirDictionary::getResource( string const & name )
2022-01-09 08:35:07 +00:00
{
bool isNumber = false;
uint32_t articleOffset = QString::fromUtf8( name.c_str() ).toULong( &isNumber );
if ( !isNumber )
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such resource
vector< char > chunk;
char * articleData;
try
{
Mutex::Lock _( idxMutex );
2009-04-21 20:09:02 +00:00
articleData = chunks.getBlock( articleOffset, chunk );
if ( articleData >= &chunk.front() + chunk.size() )
{
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
}
catch( ChunkedStorage::exAddressOutOfRange & )
{
// Bad address
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such resource
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
QDir dir( QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) ) );
QString fileName = QDir::toNativeSeparators( dir.filePath( QString::fromUtf8( articleData ) ) );
// Now try loading that file
2009-04-21 20:09:02 +00:00
try
{
File::Class f( FsEncoding::encode( fileName ), "rb" );
2009-04-21 20:09:02 +00:00
sptr< Dictionary::DataRequestInstant > dr =
std::make_shared<Dictionary::DataRequestInstant>( true );
vector< char > & data = dr->getData();
f.seekEnd();
2009-04-21 20:09:02 +00:00
data.resize( f.tell() );
f.rewind();
f.read( &data.front(), data.size() );
return dr;
}
catch( File::Ex & )
{
return std::make_shared<Dictionary::DataRequestInstant>( false ); // No such resource
}
}
void addDir( QDir const & baseDir, QDir const & dir, IndexedWords & indexedWords,
uint32_t & soundsCount, ChunkedStorage::Writer & chunks )
{
QFileInfoList entries = dir.entryInfoList( QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot );
for( QFileInfoList::const_iterator i = entries.constBegin();
i != entries.constEnd(); ++i )
{
if ( i->isDir() )
addDir( baseDir, QDir( i->absoluteFilePath() ), indexedWords, soundsCount, chunks );
else
if ( Filetype::isNameOfSound( i->fileName().toUtf8().data() ) )
{
// Add this sound to index
string fileName = baseDir.relativeFilePath( i->filePath() ).toUtf8().data();
2009-04-21 20:09:02 +00:00
uint32_t articleOffset = chunks.startNewBlock();
chunks.addToBlock( fileName.c_str(), fileName.size() + 1 );
wstring name = gd::toWString( i->fileName() );
wstring::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos )
name.erase( pos );
indexedWords.addWord( name, articleOffset );
++soundsCount;
}
}
}
}
vector< sptr< Dictionary::Class > > makeDictionaries( Config::SoundDirs const & soundDirs,
string const & indicesDir,
Dictionary::Initializing & initializing )
2022-01-09 08:35:07 +00:00
{
vector< sptr< Dictionary::Class > > dictionaries;
for( Config::SoundDirs::const_iterator i = soundDirs.begin(); i != soundDirs.end();
++i )
{
QDir dir( i->path );
if ( !dir.exists() )
continue; // No such dir, no dictionary then
vector< string > dictFiles( 1,
FsEncoding::encode( QDir::toNativeSeparators( dir.canonicalPath() ) ) );
dictFiles.push_back( "SoundDir" ); // A mixin
2009-04-21 20:09:02 +00:00
string dictId = Dictionary::makeDictionaryId( dictFiles );
dictFiles.pop_back(); // Remove mixin
string indexFile = indicesDir + dictId;
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) )
{
// Building the index
qDebug() << "Sounds: Building the index for directory: " << i->path;
initializing.indexingDictionary( i->name.toUtf8().data() );
File::Class idx( indexFile, "wb" );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
// We write a dummy header first. At the end of the process the header
// will be rewritten with the right values.
idx.write( idxHeader );
IndexedWords indexedWords;
2009-04-21 20:09:02 +00:00
ChunkedStorage::Writer chunks( idx );
uint32_t soundsCount = 0; // Header's one is packed, we can't ref it
2009-04-21 20:09:02 +00:00
addDir( dir, dir, indexedWords, soundsCount, chunks );
2009-04-21 20:09:02 +00:00
idxHeader.soundsCount = soundsCount;
// Finish with the chunks
idxHeader.chunksOffset = chunks.finish();
2009-04-21 20:09:02 +00:00
// Build the index
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
idxHeader.indexRootOffset = idxInfo.rootOffset;
// That concludes it. Update the header.
idxHeader.signature = Signature;
idxHeader.formatVersion = CurrentFormatVersion;
idx.rewind();
idx.write( &idxHeader, sizeof( idxHeader ) );
}
dictionaries.push_back( std::make_shared<SoundDirDictionary>( dictId,
i->name.toUtf8().data(),
indexFile,
dictFiles,
i->iconFilename ) );
}
return dictionaries;
}
}