Merge remote-tracking branch 'origin/staged' into dev

This commit is contained in:
Xiao YiFang 2022-06-03 11:12:55 +08:00
commit 6a18eb9dac
12 changed files with 295 additions and 114 deletions

View file

@ -198,9 +198,6 @@ QNetworkReply * ArticleNetworkAccessManager::getArticleReply( QNetworkRequest co
QUrl refererUrl = QUrl::fromEncoded( referer ); QUrl refererUrl = QUrl::fromEncoded( referer );
//GD_DPRINTF( "Considering %s vs %s\n", getHostBase( req.url() ).toUtf8().data(),
// getHostBase( refererUrl ).toUtf8().data() );
if ( !url.host().endsWith( refererUrl.host() ) && if ( !url.host().endsWith( refererUrl.host() ) &&
getHostBaseFromUrl( url ) != getHostBaseFromUrl( refererUrl ) && !url.scheme().startsWith("data") ) getHostBaseFromUrl( url ) != getHostBaseFromUrl( refererUrl ) && !url.scheme().startsWith("data") )
{ {
@ -449,7 +446,7 @@ qint64 ArticleResourceReply::readData( char * out, qint64 maxSize )
qint64 left = avail - alreadyRead; qint64 left = avail - alreadyRead;
qint64 toRead = maxSize < left ? maxSize : left; qint64 toRead = maxSize < left ? maxSize : left;
GD_DPRINTF( "====reading %d bytes\n", (int)toRead ); GD_DPRINTF( "====reading %d bytes", (int)toRead );
try try
{ {
@ -457,7 +454,7 @@ qint64 ArticleResourceReply::readData( char * out, qint64 maxSize )
} }
catch( std::exception & e ) catch( std::exception & e )
{ {
qWarning( "getDataSlice error: %s\n", e.what() ); qWarning( "getDataSlice error: %s", e.what() );
} }
alreadyRead += toRead; alreadyRead += toRead;

View file

@ -402,6 +402,9 @@ void ArticleView::showDefinition( Config::InputPhrase const & phrase, unsigned g
if ( scrollTo.size() ) if ( scrollTo.size() )
Utils::Url::addQueryItem( req, "scrollto", scrollTo ); Utils::Url::addQueryItem( req, "scrollto", scrollTo );
if(delayedHighlightText.size())
Utils::Url::addQueryItem( req, "regexp", delayedHighlightText );
Contexts::Iterator pos = contexts.find( "gdanchor" ); Contexts::Iterator pos = contexts.find( "gdanchor" );
if( pos != contexts.end() ) if( pos != contexts.end() )
{ {
@ -579,6 +582,12 @@ void ArticleView::loadFinished( bool result )
} }
if( Utils::Url::hasQueryItem( ui.definition->url(), "regexp" ) ) if( Utils::Url::hasQueryItem( ui.definition->url(), "regexp" ) )
highlightFTSResults(); highlightFTSResults();
if( !delayedHighlightText.isEmpty() )
{
// findText( delayedHighlightText, QWebEnginePage::FindCaseSensitively ,[](bool){});
delayedHighlightText.clear();
}
} }
void ArticleView::loadProgress(int ){ void ArticleView::loadProgress(int ){
@ -1592,6 +1601,11 @@ void ArticleView::setSelectionBySingleClick( bool set )
ui.definition->setSelectionBySingleClick( set ); ui.definition->setSelectionBySingleClick( set );
} }
void ArticleView::setDelayedHighlightText(QString const & text)
{
delayedHighlightText = text;
}
void ArticleView::back() void ArticleView::back()
{ {
// Don't allow navigating back to page 0, which is usually the initial // Don't allow navigating back to page 0, which is usually the initial
@ -1712,6 +1726,7 @@ void ArticleView::contextMenuRequested( QPoint const & pos )
QAction * sendWordToInputLineAction = 0; QAction * sendWordToInputLineAction = 0;
QAction * saveImageAction = 0; QAction * saveImageAction = 0;
QAction * saveSoundAction = 0; QAction * saveSoundAction = 0;
QAction * saveBookmark = 0;
#if( QT_VERSION < QT_VERSION_CHECK( 6, 0, 0 ) ) #if( QT_VERSION < QT_VERSION_CHECK( 6, 0, 0 ) )
const QWebEngineContextMenuData * menuData = &(r->contextMenuData()); const QWebEngineContextMenuData * menuData = &(r->contextMenuData());
@ -1832,6 +1847,13 @@ void ArticleView::contextMenuRequested( QPoint const & pos )
} }
} }
if(text.size())
{
// avoid too long in the menu ,use left 30 characters.
saveBookmark = new QAction( tr( "Save &Bookmark \"%1...\"" ).arg( text.left( 30 ) ), &menu );
menu.addAction( saveBookmark );
}
// add anki menu // add anki menu
if( !text.isEmpty() && cfg.preferences.ankiConnectServer.enabled ) if( !text.isEmpty() && cfg.preferences.ankiConnectServer.enabled )
{ {
@ -1931,7 +1953,11 @@ void ArticleView::contextMenuRequested( QPoint const & pos )
QDesktopServices::openUrl( targetUrl ); QDesktopServices::openUrl( targetUrl );
else else
if ( result == lookupSelection ) if ( result == lookupSelection )
showDefinition( selectedText, getGroup( ui.definition->url() ), getCurrentArticle() ); showDefinition( text, getGroup( ui.definition->url() ), getCurrentArticle() );
else if( result == saveBookmark )
{
emit saveBookmarkSignal( text.left( 60 ) );
}
else if( result == sendToAnkiAction ) else if( result == sendToAnkiAction )
{ {
sendToAnki( ui.definition->title(), ui.definition->selectedText() ); sendToAnki( ui.definition->title(), ui.definition->selectedText() );
@ -2333,42 +2359,44 @@ void ArticleView::performFindOperation( bool restart, bool backwards, bool check
if ( backwards ) if ( backwards )
f |= QWebEnginePage::FindBackward; f |= QWebEnginePage::FindBackward;
bool setMark = text.size() && !findText(text, f); findText( text,
f,
[ &text, this ]( bool match )
{
bool setMark = !text.isEmpty() && !match;
if ( ui.searchText->property( "noResults" ).toBool() != setMark ) if( ui.searchText->property( "noResults" ).toBool() != setMark )
{ {
ui.searchText->setProperty( "noResults", setMark ); ui.searchText->setProperty( "noResults", setMark );
// Reload stylesheet // Reload stylesheet
reloadStyleSheet(); reloadStyleSheet();
} }
} );
} }
bool ArticleView::findText(QString& text, const QWebEnginePage::FindFlags& f) void ArticleView::findText( QString & text,
const QWebEnginePage::FindFlags & f,
const std::function< void( bool match ) > & callback )
{ {
bool r; #if( QT_VERSION >= QT_VERSION_CHECK( 6, 0, 0 ) )
// turn async to sync invoke. ui.definition->findText( text,
QSharedPointer<QEventLoop> loop = QSharedPointer<QEventLoop>(new QEventLoop()); f,
QTimer::singleShot(1000, loop.data(), &QEventLoop::quit); [ callback ]( const QWebEngineFindTextResult & result )
#if (QT_VERSION >= QT_VERSION_CHECK(6,0,0))
ui.definition->findText(text, f, [&](const QWebEngineFindTextResult& result)
{ {
if(loop->isRunning()){ auto r = result.numberOfMatches() > 0;
r = result.numberOfMatches()>0; if( callback )
loop->quit(); callback( r );
} }); } );
#else #else
ui.definition->findText(text, f, [&](bool result) ui.definition->findText( text,
f,
[ callback ]( bool result )
{ {
if(loop->isRunning()){ if( callback )
r = result; callback( result );
loop->quit(); } );
} });
#endif #endif
loop->exec();
return r;
} }
void ArticleView::reloadStyleSheet() void ArticleView::reloadStyleSheet()

View file

@ -79,6 +79,8 @@ class ArticleView: public QFrame
bool ftsSearchIsOpened, ftsSearchMatchCase; bool ftsSearchIsOpened, ftsSearchMatchCase;
int ftsPosition; int ftsPosition;
QString delayedHighlightText;
void highlightFTSResults(); void highlightFTSResults();
void highlightAllFtsOccurences( QWebEnginePage::FindFlags flags ); void highlightAllFtsOccurences( QWebEnginePage::FindFlags flags );
void performFtsFindOperation( bool backwards ); void performFtsFindOperation( bool backwards );
@ -157,6 +159,8 @@ public:
/// Called when preference changes /// Called when preference changes
void setSelectionBySingleClick( bool set ); void setSelectionBySingleClick( bool set );
void setDelayedHighlightText(QString const & text);
public slots: public slots:
/// Goes back in history /// Goes back in history
@ -227,6 +231,10 @@ public:
ResourceToSaveHandler * saveResource( const QUrl & url, const QString & fileName ); ResourceToSaveHandler * saveResource( const QUrl & url, const QString & fileName );
ResourceToSaveHandler * saveResource( const QUrl & url, const QUrl & ref, const QString & fileName ); ResourceToSaveHandler * saveResource( const QUrl & url, const QUrl & ref, const QString & fileName );
void findText( QString & text,
const QWebEnginePage::FindFlags & f,
const std::function< void( bool match ) > & callback = nullptr );
signals: signals:
void iconChanged( ArticleView *, QIcon const & icon ); void iconChanged( ArticleView *, QIcon const & icon );
@ -285,6 +293,8 @@ signals:
void inspectSignal(QWebEngineView * view); void inspectSignal(QWebEngineView * view);
void saveBookmarkSignal( const QString & bookmark );
public slots: public slots:
void on_searchPrevious_clicked(); void on_searchPrevious_clicked();
@ -391,7 +401,6 @@ private:
void performFindOperation( bool restart, bool backwards, bool checkHighlight = false ); void performFindOperation( bool restart, bool backwards, bool checkHighlight = false );
bool findText(QString& text, const QWebEnginePage::FindFlags& f);
void reloadStyleSheet(); void reloadStyleSheet();

View file

@ -857,7 +857,7 @@ bool EpwingBook::getNextHeadword( EpwingHeadword & head )
{ {
EB_Position pos; EB_Position pos;
QRegularExpression badLinks( "#(v|n)\\d" ); QRegularExpression badLinks( "#(v|n)\\d", QRegularExpression::UseUnicodePropertiesOption);
// At first we check references queue // At first we check references queue
while( !LinksQueue.isEmpty() ) while( !LinksQueue.isEmpty() )

View file

@ -71,8 +71,8 @@ bool parseSearchString( QString const & str, QStringList & indexWords,
{ {
searchWords.clear(); searchWords.clear();
indexWords.clear(); indexWords.clear();
QRegularExpression spacesRegExp( "\\W+" ); QRegularExpression spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption );
QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}" ); QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", QRegularExpression::UseUnicodePropertiesOption );
QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption ); QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption );
QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption); QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption);

View file

@ -4,61 +4,30 @@
#include <QString> #include <QString>
#include "gddebug.hh" #include "gddebug.hh"
#include <QDebug> #include <QDebug>
#if(QT_VERSION >= QT_VERSION_CHECK(6,0,0)) #if( QT_VERSION >= QT_VERSION_CHECK( 6, 0, 0 ) )
#include <QtCore5Compat/QTextCodec> #include <QtCore5Compat/QTextCodec>
#else #else
#include <QTextCodec> #include <QTextCodec>
#endif #endif
QFile * logFilePtr; QFile * logFilePtr;
static QTextCodec * utf8Codec;
void gdWarning(const char *msg, ...) void gdWarning( const char * msg, ... )
{ {
va_list ap; va_list ap;
va_start(ap, msg); va_start( ap, msg );
QTextCodec *localeCodec = 0;
if( logFilePtr && logFilePtr->isOpen() )
{
if( utf8Codec == 0 )
utf8Codec = QTextCodec::codecForName( "UTF8" );
localeCodec = QTextCodec::codecForLocale();
QTextCodec::setCodecForLocale( utf8Codec );
}
qWarning() << QString().vasprintf( msg, ap ); qWarning() << QString().vasprintf( msg, ap );
if( logFilePtr && logFilePtr->isOpen() ) va_end( ap );
{
QTextCodec::setCodecForLocale( localeCodec );
}
va_end(ap);
} }
void gdDebug(const char *msg, ...) void gdDebug( const char * msg, ... )
{ {
va_list ap; va_list ap;
va_start(ap, msg); va_start( ap, msg );
// QTextCodec *localeCodec = 0;
// if( logFilePtr && logFilePtr->isOpen() )
// {
// if( utf8Codec == 0 )
// utf8Codec = QTextCodec::codecForName( "UTF8" );
// localeCodec = QTextCodec::codecForLocale();
// QTextCodec::setCodecForLocale( utf8Codec );
// }
qDebug().noquote() << QString().vasprintf( msg, ap ); qDebug().noquote() << QString().vasprintf( msg, ap );
// if( logFilePtr && logFilePtr->isOpen() ) va_end( ap );
// {
// QTextCodec::setCodecForLocale( localeCodec );
// }
va_end(ap);
} }

View file

@ -12,7 +12,29 @@ system(git describe --tags --always --dirty): hasGit=1
!isEmpty(hasGit){ !isEmpty(hasGit){
GIT_HASH=$$system(git rev-parse --short=8 HEAD ) GIT_HASH=$$system(git rev-parse --short=8 HEAD )
} }
system(echo $${VERSION}.$${GIT_HASH} > version.txt)
win32{
# date /T output is locale aware.
DD=$$system(date /T)
DATE =$$replace(DD, / , )
}
else{
DATE=$$system(date '+%y%m%d')
}
system(echo $${VERSION}.$${GIT_HASH} on $${DATE} > version.txt)
!CONFIG( verbose_build_output ) {
!win32|*-msvc* {
# Reduce build log verbosity except for MinGW builds (mingw-make cannot
# execute "@echo ..." commands inserted by qmake).
CONFIG += silent
}
}
CONFIG( release, debug|release ) {
DEFINES += NDEBUG
}
# DEPENDPATH += . generators # DEPENDPATH += . generators
INCLUDEPATH += . INCLUDEPATH += .

View file

@ -64,7 +64,7 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )
if ( !ZipFile::readLocalHeader( zip, header ) ) if ( !ZipFile::readLocalHeader( zip, header ) )
{ {
GD_DPRINTF( "Failed to load header\n" ); GD_DPRINTF( "Failed to load header" );
return false; return false;
} }
@ -73,13 +73,13 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )
switch( header.compressionMethod ) switch( header.compressionMethod )
{ {
case ZipFile::Uncompressed: case ZipFile::Uncompressed:
GD_DPRINTF( "Uncompressed\n" ); GD_DPRINTF( "Uncompressed" );
data.resize( header.uncompressedSize ); data.resize( header.uncompressedSize );
return (size_t) zip.read( &data.front(), data.size() ) == data.size(); return (size_t) zip.read( &data.front(), data.size() ) == data.size();
case ZipFile::Deflated: case ZipFile::Deflated:
{ {
GD_DPRINTF( "Deflated\n" ); GD_DPRINTF( "Deflated" );
// Now do the deflation // Now do the deflation

View file

@ -329,7 +329,12 @@
<translation></translation> <translation></translation>
</message> </message>
<message> <message>
<location filename="../articleview.cc" line="1867"/> <location filename="../articleview.cc" line="1853"/>
<source>Save &amp;Bookmark &quot;%1...&quot;</source>
<translation>&amp;S%1...</translation>
</message>
<message>
<location filename="../articleview.cc" line="1861"/>
<source>&amp;Send &quot;%1&quot; to anki with selected text.</source> <source>&amp;Send &quot;%1&quot; to anki with selected text.</source>
<translation>%1anki并附带选择的文本</translation> <translation>%1anki并附带选择的文本</translation>
</message> </message>

View file

@ -1690,6 +1690,7 @@ ArticleView * MainWindow::createNewTab( bool switchToIt,
connect( view, SIGNAL( zoomIn()), this, SLOT( zoomin() ) ); connect( view, SIGNAL( zoomIn()), this, SLOT( zoomin() ) );
connect( view, SIGNAL( zoomOut()), this, SLOT( zoomout() ) ); connect( view, SIGNAL( zoomOut()), this, SLOT( zoomout() ) );
connect( view, &ArticleView::saveBookmarkSignal, this, &MainWindow::addBookmarkToFavorite );
view->setSelectionBySingleClick( cfg.preferences.selectWordBySingleClick ); view->setSelectionBySingleClick( cfg.preferences.selectWordBySingleClick );
@ -3503,7 +3504,7 @@ void MainWindow::on_saveArticle_triggered()
// MDict anchors // MDict anchors
QRegularExpression anchorLinkRe( QRegularExpression anchorLinkRe(
"(<\\s*a\\s+[^>]*\\b(?:name|id)\\b\\s*=\\s*[\"']*g[0-9a-f]{32}_)([0-9a-f]+_)(?=[^\"'])", "(<\\s*a\\s+[^>]*\\b(?:name|id)\\b\\s*=\\s*[\"']*g[0-9a-f]{32}_)([0-9a-f]+_)(?=[^\"'])",
QRegularExpression::PatternOption::CaseInsensitiveOption ); QRegularExpression::PatternOption::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption );
html.replace( anchorLinkRe, "\\1" ); html.replace( anchorLinkRe, "\\1" );
if( complete ) if( complete )
@ -4656,6 +4657,15 @@ void MainWindow::addWordToFavorites( QString const & word, unsigned groupId )
ui.favoritesPaneWidget->addHeadword( folder, word ); ui.favoritesPaneWidget->addHeadword( folder, word );
} }
void MainWindow::addBookmarkToFavorite( QString const & text )
{
// get current tab word.
QString word = unescapeTabHeader( ui.tabWidget->tabText( ui.tabWidget->currentIndex() ) );
const auto bookmark = QString( "%1~~~%2" ).arg( word, text );
ui.favoritesPaneWidget->addHeadword( nullptr, bookmark );
}
void MainWindow::addAllTabsToFavorites() void MainWindow::addAllTabsToFavorites()
{ {
QString folder; QString folder;
@ -4728,8 +4738,22 @@ void MainWindow::headwordFromFavorites( QString const & headword,
} }
// Show headword without lost of focus on Favorites tree // Show headword without lost of focus on Favorites tree
setTranslateBoxTextAndClearSuffix( headword, EscapeWildcards, DisablePopup ); // bookmark cases: the favorite item may like this "word~~~selectedtext"
showTranslationFor(headword ); auto words = headword.split( "~~~" );
setTranslateBoxTextAndClearSuffix( words[0], EscapeWildcards, DisablePopup );
//must be a bookmark.
if(words.size()>1)
{
auto view = getCurrentArticleView();
if(view)
{
view->setDelayedHighlightText(words[1]);// findText( words[ 1 ], QWebEnginePage::FindCaseSensitively );
}
}
showTranslationFor( words[ 0 ] );
} }
#ifdef Q_OS_WIN32 #ifdef Q_OS_WIN32

View file

@ -462,6 +462,8 @@ private slots:
void addWordToFavorites( QString const & word, unsigned groupId ); void addWordToFavorites( QString const & word, unsigned groupId );
void addBookmarkToFavorite( QString const & text );
bool isWordPresentedInFavorites( QString const & word, unsigned groupId ); bool isWordPresentedInFavorites( QString const & word, unsigned groupId );
void sendWordToInputLine( QString const & word ); void sendWordToInputLine( QString const & word );

183
zim.cc
View file

@ -78,7 +78,8 @@ enum CompressionType
struct ZIM_header struct ZIM_header
{ {
quint32 magicNumber; quint32 magicNumber;
quint32 version; quint16 majorVersion;
quint16 minorVersion;
quint8 uuid[ 16 ]; quint8 uuid[ 16 ];
quint32 articleCount; quint32 articleCount;
quint32 clusterCount; quint32 clusterCount;
@ -125,7 +126,7 @@ __attribute__((packed))
enum enum
{ {
Signature = 0x584D495A, // ZIMX on little-endian, XMIZ on big-endian Signature = 0x584D495A, // ZIMX on little-endian, XMIZ on big-endian
CurrentFormatVersion = 1 + BtreeIndexing::FormatVersion + Folding::Version CurrentFormatVersion = 3 + BtreeIndexing::FormatVersion + Folding::Version
}; };
struct IdxHeader struct IdxHeader
@ -158,13 +159,15 @@ struct Cache
quint32 clusterNumber; quint32 clusterNumber;
int stamp; int stamp;
int count, size; int count, size;
unsigned blobs_offset_size;
Cache() : Cache() :
data( 0 ), data( 0 ),
clusterNumber( 0 ), clusterNumber( 0 ),
stamp( -1 ), stamp( -1 ),
count( 0 ), count( 0 ),
size( 0 ) size( 0 ),
blobs_offset_size( 0 )
{} {}
}; };
@ -184,13 +187,25 @@ public:
} }
const ZIM_header & header() const const ZIM_header & header() const
{ return zimHeader; } { return zimHeader; }
string getClusterData( quint32 cluster_nom );
string getClusterData( quint32 cluster_nom, unsigned & blob_offset_size );
const QString getMimeType( quint16 nom )
{ return mimeTypes.value( nom ); }
bool isArticleMime( quint16 mime_type )
{ return getMimeType( mime_type ).startsWith( "text/html", Qt::CaseInsensitive )
|| getMimeType( mime_type ).startsWith( "text/plain", Qt::CaseInsensitive ); }
quint16 redirectedMimeType( RedirectEntry const & redEntry );
private: private:
ZIM_header zimHeader; ZIM_header zimHeader;
Cache cache[ CACHE_SIZE ]; Cache cache[ CACHE_SIZE ];
int stamp; int stamp;
QVector< QPair< quint64, quint32 > > clusterOffsets; QVector< QPair< quint64, quint32 > > clusterOffsets;
QStringList mimeTypes;
void clearCache(); void clearCache();
}; };
@ -291,10 +306,33 @@ bool ZimFile::open()
std::sort( clusterOffsets.begin(), clusterOffsets.end() ); std::sort( clusterOffsets.begin(), clusterOffsets.end() );
// Read mime types
string type;
char ch;
seek( zimHeader.mimeListPos );
for( ; ; )
{
type.clear();
while( getChar( &ch ) )
{
if( ch == 0 )
break;
type.push_back( ch );
}
if( type.empty() )
break;
QString s = QString::fromUtf8( type.c_str(), type.size() );
mimeTypes.append( s );
}
return true; return true;
} }
string ZimFile::getClusterData( quint32 cluster_nom ) string ZimFile::getClusterData( quint32 cluster_nom, unsigned & blobs_offset_size )
{ {
// Check cache // Check cache
int target = 0; int target = 0;
@ -328,6 +366,7 @@ string ZimFile::getClusterData( quint32 cluster_nom )
if( found ) if( found )
{ {
// Cache hit // Cache hit
blobs_offset_size = cache[ target ].blobs_offset_size;
return string( cache[ target ].data, cache[ target ].count ); return string( cache[ target ].data, cache[ target ].count );
} }
@ -353,9 +392,11 @@ string ZimFile::getClusterData( quint32 cluster_nom )
seek( clusterOffsets.at( nom ).first ); seek( clusterOffsets.at( nom ).first );
char compressionType; char compressionType, cluster_info;
if( !getChar( &compressionType ) ) if( !getChar( &cluster_info ) )
return string(); return string();
compressionType = cluster_info & 0x0F;
blobs_offset_size = cluster_info & 0x10 && zimHeader.majorVersion >= 6 ? 8 : 4;
string decompressedData; string decompressedData;
@ -384,9 +425,16 @@ string ZimFile::getClusterData( quint32 cluster_nom )
// Check BLOBs number in the cluster // Check BLOBs number in the cluster
// We cache multi-element clusters only // We cache multi-element clusters only
quint32 firstOffset; quint32 firstOffset32;
memcpy( &firstOffset, decompressedData.data(), sizeof(firstOffset) ); quint64 firstOffset;
quint32 blobCount = ( firstOffset - 4 ) / 4; if( blobs_offset_size == 8 )
memcpy( &firstOffset, decompressedData.data(), sizeof(firstOffset) );
else
{
memcpy( &firstOffset32, decompressedData.data(), sizeof(firstOffset32) );
firstOffset = firstOffset32;
}
quint32 blobCount = ( firstOffset - blobs_offset_size ) / blobs_offset_size;
if( blobCount > 1 ) if( blobCount > 1 )
{ {
@ -410,12 +458,52 @@ string ZimFile::getClusterData( quint32 cluster_nom )
memcpy( cache[ target ].data, decompressedData.c_str(), size ); memcpy( cache[ target ].data, decompressedData.c_str(), size );
cache[ target ].count = size; cache[ target ].count = size;
cache[ target ].clusterNumber = cluster_nom; cache[ target ].clusterNumber = cluster_nom;
cache[ target ].blobs_offset_size = blobs_offset_size;
} }
} }
return decompressedData; return decompressedData;
} }
quint16 ZimFile::redirectedMimeType( RedirectEntry const & redEntry )
{
RedirectEntry current_entry = redEntry;
quint64 current_pos = pos();
quint16 mimetype = 0xFFFF;
for( ; ; )
{
quint32 current_nom = current_entry.redirectIndex;
seek( zimHeader.urlPtrPos + (quint64)current_nom * 8 );
quint64 new_pos;
if( read( reinterpret_cast< char * >( &new_pos ), sizeof(new_pos) ) != sizeof(new_pos) )
break;
seek( new_pos );
quint16 new_mimetype;
if( read( reinterpret_cast< char * >( &new_mimetype ), sizeof(new_mimetype) ) != sizeof(new_mimetype) )
break;
if( new_mimetype == 0xFFFF ) // Redirect to other article
{
if( read( reinterpret_cast< char * >( &current_entry ) + 2, sizeof( current_entry ) - 2 ) != sizeof( current_entry ) - 2 )
break;
if( current_nom == current_entry.redirectIndex )
break;
}
else
{
mimetype = new_mimetype;
break;
}
}
seek( current_pos );
return mimetype;
}
// Some supporting functions // Some supporting functions
bool indexIsOldOrBad( string const & indexFile ) bool indexIsOldOrBad( string const & indexFile )
@ -516,23 +604,42 @@ quint32 readArticle( ZimFile & file, quint32 articleNumber, string & result,
// Read cluster data // Read cluster data
string decompressedData = file.getClusterData( artEntry.clusterNumber ); unsigned offset_size = 0;
string decompressedData = file.getClusterData( artEntry.clusterNumber, offset_size );
if( decompressedData.empty() ) if( decompressedData.empty() )
break; break;
// Take article data from cluster // Take article data from cluster
quint32 firstOffset; quint32 firstOffset32;
memcpy( &firstOffset, decompressedData.data(), sizeof(firstOffset) ); quint64 firstOffset;
quint32 blobCount = ( firstOffset - 4 ) / 4;
if( offset_size == 8 )
memcpy( &firstOffset, decompressedData.data(), sizeof(firstOffset) );
else
{
memcpy( &firstOffset32, decompressedData.data(), sizeof(firstOffset32) );
firstOffset = firstOffset32;
}
quint32 blobCount = ( firstOffset - offset_size ) / offset_size;
if( artEntry.blobNumber > blobCount ) if( artEntry.blobNumber > blobCount )
break; break;
quint32 offsets[ 2 ]; quint32 size;
memcpy( offsets, decompressedData.data() + artEntry.blobNumber * 4, sizeof(offsets) ); if( offset_size == 8 )
quint32 size = offsets[ 1 ] - offsets[ 0 ]; {
quint64 offsets[ 2 ];
result.append( decompressedData, offsets[ 0 ], size ); memcpy( offsets, decompressedData.data() + artEntry.blobNumber * 8, sizeof(offsets) );
size = offsets[ 1 ] - offsets[ 0 ];
result.append( decompressedData, offsets[ 0 ], size );
}
else
{
quint32 offsets[ 2 ];
memcpy( offsets, decompressedData.data() + artEntry.blobNumber * 4, sizeof(offsets) );
size = offsets[ 1 ] - offsets[ 0 ];
result.append( decompressedData, offsets[ 0 ], size );
}
return articleNumber; return articleNumber;
} }
@ -1437,6 +1544,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
df.open(); df.open();
ZIM_header const & zh = df.header(); ZIM_header const & zh = df.header();
bool new_namespaces = ( zh.majorVersion >= 6 && zh.minorVersion >= 1 );
if( zh.magicNumber != 0x44D495A ) if( zh.magicNumber != 0x44D495A )
throw exNotZimFile( i->c_str() ); throw exNotZimFile( i->c_str() );
@ -1473,7 +1581,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
} }
const quint64 * ptr; const quint64 * ptr;
quint16 mimetype; quint16 mimetype, redirected_mime = 0xFFFF;
ArticleEntry artEntry; ArticleEntry artEntry;
RedirectEntry redEntry; RedirectEntry redEntry;
string url, title; string url, title;
@ -1490,6 +1598,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
if( ret != sizeof(RedirectEntry) - 2 ) if( ret != sizeof(RedirectEntry) - 2 )
throw exCantReadFile( i->c_str() ); throw exCantReadFile( i->c_str() );
redirected_mime = df.redirectedMimeType( redEntry );
nameSpace = redEntry.nameSpace; nameSpace = redEntry.nameSpace;
} }
else else
@ -1501,7 +1610,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
nameSpace = artEntry.nameSpace; nameSpace = artEntry.nameSpace;
if( nameSpace == 'A' ) if( ( nameSpace == 'A' || ( nameSpace == 'C' && new_namespaces ) ) && df.isArticleMime( mimetype ) )
articleCount++; articleCount++;
} }
@ -1524,7 +1633,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
title.push_back( ch ); title.push_back( ch );
} }
if( nameSpace == 'A' ) if( nameSpace == 'A' || ( nameSpace == 'C' && new_namespaces && ( df.isArticleMime( mimetype )
|| ( mimetype == 0xFFFF && df.isArticleMime( redirected_mime ) ) ) ) )
{ {
wstring word; wstring word;
if( !title.empty() ) if( !title.empty() )
@ -1532,16 +1642,26 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
else else
word = Utf8::decode( url ); word = Utf8::decode( url );
if( maxHeadwordsToExpand && zh.articleCount >= maxHeadwordsToExpand ) if( df.isArticleMime( mimetype )
indexedWords.addSingleWord( word, n ); || ( mimetype == 0xFFFF && df.isArticleMime( redirected_mime ) ) )
{
if( maxHeadwordsToExpand && zh.articleCount >= maxHeadwordsToExpand )
indexedWords.addSingleWord( word, n );
else
indexedWords.addWord( word, n );
wordCount++;
}
else else
indexedWords.addWord( word, n ); {
wordCount++; url.insert( url.begin(), '/' );
url.insert( url.begin(), nameSpace );
indexedResources.addSingleWord( Utf8::decode( url ), n );
}
} }
else else
if( nameSpace == 'M' ) if( nameSpace == 'M' )
{ {
if( url.compare( "Title") == 0 ) if( url.compare( "Title" ) == 0 )
{ {
idxHeader.namePtr = n; idxHeader.namePtr = n;
string name; string name;
@ -1549,10 +1669,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
initializing.indexingDictionary( name ); initializing.indexingDictionary( name );
} }
else else
if( url.compare( "Description") == 0 ) if( url.compare( "Description" ) == 0 )
idxHeader.descriptionPtr = n; idxHeader.descriptionPtr = n;
else else
if( url.compare( "Language") == 0 ) if( url.compare( "Language" ) == 0 )
{ {
string lang; string lang;
readArticle( df, n, lang ); readArticle( df, n, lang );
@ -1565,6 +1685,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
} }
} }
else else
if( nameSpace == 'X' )
{
continue;
}
else
{ {
url.insert( url.begin(), '/' ); url.insert( url.begin(), '/' );
url.insert( url.begin(), nameSpace ); url.insert( url.begin(), nameSpace );