goldendict-ng/src/dict/sounddir.cc
2023-07-30 00:52:31 +08:00

502 lines
15 KiB
C++

/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "sounddir.hh"
#include "file.hh"
#include "folding.hh"
#include "utf8.hh"
#include "btreeidx.hh"
#include "chunkedstorage.hh"
#include "filetype.hh"
#include "htmlescape.hh"
#include "audiolink.hh"
#include "wstring_qt.hh"
#include "utils.hh"
#include <set>
#include <QDir>
#include <QFileInfo>
namespace SoundDir {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
namespace {
enum {
Signature = 0x58524453, // SDRX on little-endian, XRDS on big-endian
CurrentFormatVersion = 1 + BtreeIndexing::FormatVersion + Folding::Version
};
struct IdxHeader
{
uint32_t signature; // First comes the signature, SDRX
uint32_t formatVersion; // File format version, is to be CurrentFormatVersion
uint32_t soundsCount; // Total number of sounds, for informative purposes only
uint32_t chunksOffset; // The offset to chunks' storage
uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
uint32_t indexRootOffset;
}
#ifndef _MSC_VER
__attribute__( ( packed ) )
#endif
;
bool indexIsOldOrBad( string const & indexFile )
{
File::Class idx( indexFile, "rb" );
IdxHeader header;
return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || header.signature != Signature
|| header.formatVersion != CurrentFormatVersion;
}
class SoundDirDictionary: public BtreeIndexing::BtreeDictionary
{
string name;
QMutex idxMutex;
File::Class idx;
IdxHeader idxHeader;
ChunkedStorage::Reader chunks;
QString iconFilename;
public:
SoundDirDictionary( string const & id,
string const & name,
string const & indexFile,
vector< string > const & dictionaryFiles,
QString const & iconFilename_ );
string getName() noexcept override
{
return name;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.soundsCount;
}
unsigned long getWordCount() noexcept override
{
return getArticleCount();
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
protected:
void loadIcon() noexcept override;
bool get_file_name( uint32_t articleOffset, QString & file_name );
};
SoundDirDictionary::SoundDirDictionary( string const & id,
string const & name_,
string const & indexFile,
vector< string > const & dictionaryFiles,
QString const & iconFilename_ ):
BtreeDictionary( id, dictionaryFiles ),
name( name_ ),
idx( indexFile, "rb" ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset ),
iconFilename( iconFilename_ )
{
// Initialize the index
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
}
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
bool ignoreDiacritics )
{
vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics );
for ( const auto & alt : alts ) {
/// Make an additional query for each alt
vector< WordArticleLink > altChain = findArticles( alt, ignoreDiacritics );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
// maps to the chain number
multimap< wstring, unsigned > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
for ( unsigned x = 0; x < chain.size(); ++x ) {
if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
continue; // We already have this article in the body.
// Ok. Now, does it go to main articles, or to alternate ones? We list
// main ones first, and alternates after.
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word );
if ( ignoreDiacritics )
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
multimap< wstring, unsigned > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( chain[ x ].word ), x ) );
articlesIncluded.insert( chain[ x ].articleOffset );
}
if ( mainArticles.empty() && alternateArticles.empty() )
return std::make_shared< Dictionary::DataRequestInstant >( false ); // No such word
string result;
multimap< wstring, uint32_t >::const_iterator i;
string displayedName;
vector< char > chunk;
char * nameBlock;
result += "<table class=\"lsa_play\">";
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
uint32_t address = chain[ i->second ].articleOffset;
if ( mainArticles.size() + alternateArticles.size() <= 1 )
displayedName = chain[ i->second ].word;
else {
try {
QMutexLocker _( &idxMutex );
nameBlock = chunks.getBlock( address, chunk );
if ( nameBlock >= &chunk.front() + chunk.size() ) {
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
displayedName = string( nameBlock );
}
catch ( ChunkedStorage::exAddressOutOfRange & ) {
// Bad address
continue;
}
}
result += "<tr>";
auto _displayName = Html::escape( displayedName );
QString file_name;
if ( !get_file_name( address, file_name ) ) {
// Bad address
file_name = QString::fromStdString( _displayName );
}
QUrl url;
url.setScheme( "gdau" );
url.setHost( QString::fromUtf8( getId().c_str() ) );
auto path = Utils::Url::ensureLeadingSlash( QString( "%1/%2" ).arg( QString::number( address ), file_name ) );
url.setPath( path );
string ref = string( "\"" ) + url.toEncoded().data() + "\"";
result += addAudioLink( ref, getId() );
result += "<td><a href=" + ref + R"(><img src="qrc:///icons/playsound.png" border="0" alt="Play"/></a></td>)";
result += "<td><a href=" + ref + ">" + _displayName + "</a></td>";
result += "</tr>";
}
for ( i = alternateArticles.begin(); i != alternateArticles.end(); ++i ) {
uint32_t address = chain[ i->second ].articleOffset;
if ( mainArticles.size() + alternateArticles.size() <= 1 )
displayedName = chain[ i->second ].word;
else {
try {
QMutexLocker _( &idxMutex );
nameBlock = chunks.getBlock( address, chunk );
if ( nameBlock >= &chunk.front() + chunk.size() ) {
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
displayedName = string( nameBlock );
}
catch ( ChunkedStorage::exAddressOutOfRange & ) {
// Bad address
continue;
}
}
result += "<tr>";
auto _displayName = Html::escape( displayedName );
QString file_name;
if ( !get_file_name( address, file_name ) ) {
// Bad address
file_name = QString::fromStdString( _displayName );
}
QUrl url;
url.setScheme( "gdau" );
url.setHost( QString::fromUtf8( getId().c_str() ) );
auto path = Utils::Url::ensureLeadingSlash( QString( "%1/%2" ).arg( QString::number( address ), file_name ) );
url.setPath( path );
string ref = string( "\"" ) + url.toEncoded().data() + "\"";
result += addAudioLink( ref, getId() );
result += "<td><a href=" + ref + R"(><img src="qrc:///icons/playsound.png" border="0" alt="Play"/></a></td>)";
result += "<td><a href=" + ref + ">" + _displayName + "</a></td>";
result += "</tr>";
}
result += "</table>";
auto ret = std::make_shared< Dictionary::DataRequestInstant >( true );
ret->appendString( result );
return ret;
}
void SoundDirDictionary::loadIcon() noexcept
{
if ( dictionaryIconLoaded )
return;
if ( !iconFilename.isEmpty() ) {
const QFileInfo fInfo( QDir( Config::getConfigDir() ), iconFilename );
if ( fInfo.isFile() )
loadIconFromFile( fInfo.absoluteFilePath(), true );
}
if ( dictionaryIcon.isNull() )
dictionaryIcon = QIcon( ":/icons/sounddir.svg" );
dictionaryIconLoaded = true;
}
bool SoundDirDictionary::get_file_name( uint32_t articleOffset, QString & file_name )
{
vector< char > chunk;
char * articleData;
try {
QMutexLocker _( &idxMutex );
articleData = chunks.getBlock( articleOffset, chunk );
if ( articleData >= &chunk.front() + chunk.size() ) {
// chunks reader thinks it's okay since zero-sized records can exist,
// but we don't allow that.
throw ChunkedStorage::exAddressOutOfRange();
}
}
catch ( ChunkedStorage::exAddressOutOfRange & ) {
return false; // No such resource
}
chunk.back() = 0; // It must end with 0 anyway, but just in case
file_name = QString::fromUtf8( articleData );
return true;
}
sptr< Dictionary::DataRequest > SoundDirDictionary::getResource( string const & name )
{
bool isNumber = false;
uint32_t articleOffset;
const auto _name = QString::fromStdString( name );
const qint64 sep_index = _name.indexOf( '/' );
if ( sep_index > 0 ) {
const auto number = _name.left( sep_index );
articleOffset = number.toULong( &isNumber );
}
else {
articleOffset = QString::fromUtf8( name.c_str() ).toULong( &isNumber );
}
if ( !isNumber )
return std::make_shared< Dictionary::DataRequestInstant >( false ); // No such resource
QString file_name;
if ( !get_file_name( articleOffset, file_name ) ) {
// Bad address
return std::make_shared< Dictionary::DataRequestInstant >( false );
}
const QDir dir( QDir::fromNativeSeparators( getDictionaryFilenames()[ 0 ].c_str() ) );
const QString fileName = QDir::toNativeSeparators( dir.filePath( file_name ) );
// Now try loading that file
try {
File::Class f( fileName.toStdString(), "rb" );
sptr< Dictionary::DataRequestInstant > dr = std::make_shared< Dictionary::DataRequestInstant >( true );
vector< char > & data = dr->getData();
f.seekEnd();
data.resize( f.tell() );
f.rewind();
f.read( &data.front(), data.size() );
return dr;
}
catch ( File::Ex & ) {
return std::make_shared< Dictionary::DataRequestInstant >( false ); // No such resource
}
}
void addDir( QDir const & baseDir,
QDir const & dir,
IndexedWords & indexedWords,
uint32_t & soundsCount,
ChunkedStorage::Writer & chunks )
{
const QFileInfoList entries = dir.entryInfoList( QDir::Dirs | QDir::Files | QDir::NoDotAndDotDot );
for ( QFileInfoList::const_iterator i = entries.constBegin(); i != entries.constEnd(); ++i ) {
if ( i->isDir() )
addDir( baseDir, QDir( i->absoluteFilePath() ), indexedWords, soundsCount, chunks );
else if ( Filetype::isNameOfSound( i->fileName().toUtf8().data() ) ) {
// Add this sound to index
string fileName = baseDir.relativeFilePath( i->filePath() ).toUtf8().data();
const uint32_t articleOffset = chunks.startNewBlock();
chunks.addToBlock( fileName.c_str(), fileName.size() + 1 );
wstring name = gd::toWString( i->fileName() );
const wstring::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos )
name.erase( pos );
indexedWords.addWord( name, articleOffset );
++soundsCount;
}
}
}
} // namespace
vector< sptr< Dictionary::Class > > makeDictionaries( Config::SoundDirs const & soundDirs,
string const & indicesDir,
Dictionary::Initializing & initializing )
{
vector< sptr< Dictionary::Class > > dictionaries;
for ( const auto & soundDir : soundDirs ) {
QDir dir( soundDir.path );
if ( !dir.exists() )
continue; // No such dir, no dictionary then
vector< string > dictFiles( 1, QDir::toNativeSeparators( dir.canonicalPath() ).toStdString() );
dictFiles.push_back( "SoundDir" ); // A mixin
string dictId = Dictionary::makeDictionaryId( dictFiles );
dictFiles.pop_back(); // Remove mixin
string indexFile = indicesDir + dictId;
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
// Building the index
qDebug() << "Sounds: Building the index for directory: " << soundDir.path;
initializing.indexingDictionary( soundDir.name.toUtf8().data() );
File::Class idx( indexFile, "wb" );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
// We write a dummy header first. At the end of the process the header
// will be rewritten with the right values.
idx.write( idxHeader );
IndexedWords indexedWords;
ChunkedStorage::Writer chunks( idx );
uint32_t soundsCount = 0; // Header's one is packed, we can't ref it
addDir( dir, dir, indexedWords, soundsCount, chunks );
idxHeader.soundsCount = soundsCount;
// Finish with the chunks
idxHeader.chunksOffset = chunks.finish();
// Build the index
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
idxHeader.indexRootOffset = idxInfo.rootOffset;
// That concludes it. Update the header.
idxHeader.signature = Signature;
idxHeader.formatVersion = CurrentFormatVersion;
idx.rewind();
idx.write( &idxHeader, sizeof( idxHeader ) );
}
dictionaries.push_back( std::make_shared< SoundDirDictionary >( dictId,
soundDir.name.toUtf8().data(),
indexFile,
dictFiles,
soundDir.iconFilename ) );
}
return dictionaries;
}
} // namespace SoundDir