/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
 * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */

#ifndef __DICTIONARY_HH_INCLUDED__
#define __DICTIONARY_HH_INCLUDED__

#include <map>
#include <string>
#include <vector>

#include <QMutex>
#include <QObject>
#include <QString>
#include <QWaitCondition>

#include "config.hh"
#include "ex.hh"
#include "globalbroadcaster.hh"
#include "langcoder.hh"
#include "sptr.hh"
#include "utils.hh"
#include "wstring.hh"

/// Abstract dictionary-related stuff
namespace Dictionary {

using std::vector;
using std::string;
using gd::wstring;
using std::map;

enum Property
{
  Author,
  Copyright,
  Description,
  Email
};

DEF_EX( Ex, "Dictionary error", std::exception )
DEF_EX( exIndexOutOfRange, "The supplied index is out of range", Ex )
DEF_EX( exSliceOutOfRange, "The requested data slice is out of range", Ex )
DEF_EX( exRequestUnfinished, "The request hasn't yet finished", Ex )

/// When you request a search to be performed in a dictionary, you get
/// this structure in return. It accumulates search results over time.
/// The finished() signal is emitted when the search has finished and there's
/// no more matches to be expected. Note that before connecting to it, check
/// the result of isFinished() -- if it's 'true', the search was instantaneous.
/// Destroy the object when you are not interested in results anymore.
///
/// Creating, destroying and calling member functions of the requests is done
/// in the GUI thread, however. Therefore, it is important to make sure those
/// operations are fast (this is most important for word searches, where
/// new requests are created and old ones deleted immediately upon a user
/// changing query).
class Request: public QObject
{
  Q_OBJECT

public:
  Request( QObject * parent = nullptr ) : QObject( parent )
  {
  }
  /// Returns whether the request has been processed in full and finished.
  /// This means that the data accumulated is final and won't change anymore.
  bool isFinished();

  /// Either returns an empty string in case there was no error processing
  /// the request, or otherwise a human-readable string describing the problem.
  /// Note that an empty result, such as a lack of word or of an article isn't
  /// an error -- but any kind of failure to connect to, or read the dictionary
  /// is.
  QString getErrorString();

  /// Cancels the ongoing request. This may make Request destruct faster some
  /// time in the future, Use this in preparation to destruct many Requests,
  /// so that they'd be cancelling in parallel. When the request was fully
  /// cancelled, it must emit the finished() signal, either as a result of an
  /// actual finish which has happened just before the cancellation, or solely as
  /// a result of a request being cancelled (in the latter case, the actual
  /// request result may be empty or incomplete). That is, finish() must be
  /// called by a derivative at least once if cancel() was called, either after
  /// or before it was called.
  virtual void cancel()=0;

  virtual ~Request()
  {}

signals:

  /// This signal is emitted when more data becomes available. Local
  /// dictionaries typically don't call this, since it is preferred that all
  /// data would be available from them at once, but network dictionaries
  /// might call that.
  void updated();

  /// This signal is emitted when the request has been processed in full and
  /// finished. That is, it's emitted when isFinished() turns true.
  void finished();

  void matchCount(int);

protected:

  /// Called by derivatives to signal update().
  void update();

  /// Called by derivatives to set isFinished() flag and signal finished().
  void finish();

  /// Sets the error string to be returned by getErrorString().
  void setErrorString( QString const & );

private:

  QAtomicInt isFinishedFlag;

  QMutex errorStringMutex;
  QString errorString;
};

/// This structure represents the word found. In addition to holding the
/// word itself, it also holds its weight. It is 0 by default. Negative
/// values should be used to store distance from Levenstein-like matching
/// algorithms. Positive values are used by morphology matches.
struct WordMatch
{
  wstring word;
  int weight;

  WordMatch(): weight( 0 ) {}
  WordMatch( wstring const & word_ ): word( word_ ), weight( 0 ){}
  WordMatch( wstring const & word_, int weight_ ): word( word_ ),
    weight( weight_ ) {}
};

/// This request type corresponds to all types of word searching operations.
class WordSearchRequest: public Request
{
  Q_OBJECT

public:

  WordSearchRequest(): uncertain( false )
  {}

  /// Returns the number of matches found. The value can grow over time
  /// unless isFinished() is true.
  size_t matchesCount();

  /// Returns the match with the given zero-based index, which should be less
  /// than matchesCount().
  WordMatch operator [] ( size_t index ) ;

  /// Returns all the matches found. Since no further locking can or would be
  /// done, this can only be called after the request has finished.
  vector< WordMatch > & getAllMatches() ;

  /// Returns true if the match was uncertain -- that is, there may be more
  /// results in the dictionary itself, the dictionary index isn't good enough
  /// to tell that.
  bool isUncertain() const
  { return uncertain; }

  /// Add match if one is not presented in matches list
  void addMatch( WordMatch const & match );

protected:

  // Subclasses should be filling up the 'matches' array, locking the mutex when
  // whey work with it.
  QMutex dataMutex;

  vector< WordMatch > matches;
  bool uncertain;
};

/// This request type corresponds to any kinds of data responses where a
/// single large blob of binary data is returned. It currently used of article
/// bodies and resources.
class DataRequest: public Request
{
  Q_OBJECT

public:
  /// Returns the number of bytes read, with a -1 meaning that so far it's
  /// uncertain whether resource even exists or not, and any non-negative value
  /// meaning that that amount of bytes is not available.
  /// If -1 is still being returned after the request has finished, that means
  /// the resource wasn't found.
  long dataSize();

  /// Writes "size" bytes starting from "offset" of the data read to the given
  /// buffer. "size + offset" must be <= than dataSize().
  void getDataSlice( size_t offset, size_t size, void * buffer );
  void appendDataSlice( const void * buffer, size_t size );

  /// Returns all the data read. Since no further locking can or would be
  /// done, this can only be called after the request has finished.
  vector< char > & getFullData() ;

  DataRequest( QObject * parent = 0 ) : Request( parent ), hasAnyData( false )
  {
  }

protected:

  // Subclasses should be filling up the 'data' array, locking the mutex when
  // whey work with it.
  QMutex dataMutex;

  bool hasAnyData; // With this being false, dataSize() always returns -1
  vector< char > data;
};

/// A helper class for synchronous word search implementations.
class WordSearchRequestInstant: public WordSearchRequest
{
public:

  WordSearchRequestInstant()
  {
    finish();
  }

  void cancel() override {}

  vector< WordMatch > & getMatches()
  {
    return matches;
  }

  void setUncertain( bool value )
  {
    uncertain = value;
  }
};

/// A helper class for synchronous data read implementations.
class DataRequestInstant: public DataRequest
{
public:

  DataRequestInstant( bool succeeded )
  { hasAnyData = succeeded; finish(); }

  DataRequestInstant( QString const & errorString )
  { setErrorString( errorString ); finish(); }

  virtual void cancel()
  {}

  vector< char > & getData()
  { return data; }
};

/// Dictionary features. Different dictionaries can possess different features,
/// which hint at some of their aspects.
enum Feature
{
  /// No features
  NoFeatures = 0,
  /// The dictionary is suitable to query when searching for compound expressions.
  SuitableForCompoundSearching = 1
};

Q_DECLARE_FLAGS( Features, Feature )
Q_DECLARE_OPERATORS_FOR_FLAGS( Features )

/// A dictionary. Can be used to query words.
class Class: public QObject
{
  Q_OBJECT

  string id;
  vector< string > dictionaryFiles;
  long indexedFtsDoc;

  long lastProgress = 0;

protected:
  QString dictionaryDescription;
  QIcon dictionaryIcon, dictionaryNativeIcon;
  bool dictionaryIconLoaded;
  bool can_FTS;
  QAtomicInt FTS_index_completed;
  bool synonymSearchEnabled;
  string dictionaryName;

  // Load user icon if it exist
  // By default set icon to empty
  virtual void loadIcon() noexcept;

  // Load icon from filename directly if isFullName == true
  // else treat filename as name without extension
  bool loadIconFromFile( QString const & filename, bool isFullName = false );
  bool loadIconFromText( QString iconUrl, QString const & text );

  QString getAbbrName( QString const & text );

  /// Make css content usable only for articles from this dictionary
  void isolateCSS( QString & css, QString const & wrapperSelector = QString() );

public:

  /// Creates a dictionary. The id should be made using
  /// Format::makeDictionaryId(), the dictionaryFiles is the file names the
  /// dictionary consists of.
  Class( string const & id, vector< string > const & dictionaryFiles );

  /// Called once after the dictionary is constructed. Usually called for each
  /// dictionaries once all dictionaries were made. The implementation should
  /// queue any initialization tasks the dictionary decided to postpone to
  /// threadpools, network requests etc, so the system could complete them
  /// in background.
  /// The default implementation does nothing.
  virtual void deferredInit();

  /// Returns the dictionary's id.
  string getId() noexcept
  { return id; }

  /// Returns the list of file names the dictionary consists of.
  vector< string > const & getDictionaryFilenames() noexcept
  { return dictionaryFiles; }

  /// Get the main folder that contains the dictionary, without the ending separator .
  QString getContainingFolder() const;

  /// Returns the dictionary's full name, utf8.
  virtual string getName()
  {
    return dictionaryName;
  }

  virtual void setName( string _dictionaryName )
  {
    dictionaryName = _dictionaryName;
  }

  /// Returns all the available properties, like the author's name, copyright,
  /// description etc. All strings are in utf8.
  virtual map< Property, string > getProperties() noexcept=0;

  /// Returns the features the dictionary possess. See the Feature enum for
  /// their list.
  virtual Features getFeatures() const noexcept
  { return NoFeatures; }

  /// Returns the number of articles in the dictionary.
  virtual unsigned long getArticleCount() noexcept=0;

  void setIndexedFtsDoc(long _indexedFtsDoc)
  {
    indexedFtsDoc = _indexedFtsDoc;

    auto newProgress = getIndexingFtsProgress();
    if ( newProgress != lastProgress ) {
      lastProgress = newProgress;
      emit GlobalBroadcaster::instance()->indexingDictionary(
        QString( "%1......%%2" ).arg( QString::fromStdString( getName() ) ).arg( newProgress ) );
    }
  }

  int getIndexingFtsProgress(){
    auto total = getArticleCount();
    if(total==0)
      return 0 ;
    return indexedFtsDoc*100/total;
  }

  /// Returns the number of words in the dictionary. This can be equal to
  /// the number of articles, or can be larger if some synonyms are present.
  virtual unsigned long getWordCount() noexcept=0;

  /// Returns the dictionary's icon.
  virtual QIcon const & getIcon() noexcept;

  /// Returns the dictionary's native icon. Dsl icons are usually rectangular,
  /// and are adapted by getIcon() to be square. This function allows getting
  /// the original icon with no geometry transformations applied.
  virtual QIcon const & getNativeIcon() noexcept;

  /// Returns the dictionary's source language.
  virtual quint32 getLangFrom() const
  { return 0; }

  /// Returns the dictionary's target language.
  virtual quint32 getLangTo() const
  { return 0; }

  /// Looks up a given word in the dictionary, aiming for exact matches and
  /// prefix matches. If it's not possible to locate any prefix matches, no
  /// prefix results should be added. Not more than maxResults results should
  /// be stored. The whole operation is supposed to be fast, though some
  /// dictionaries, the network ones particularly, may of course be slow.
  virtual sptr< WordSearchRequest > prefixMatch( wstring const &,
                                                 unsigned long maxResults ) =0;

  /// Looks up a given word in the dictionary, aiming to find different forms
  /// of the given word by allowing suffix variations. This means allowing words
  /// which can be as short as the input word size minus maxSuffixVariation, or as
  /// long as the input word size plus maxSuffixVariation, which share at least
  /// the input word size minus maxSuffixVariation initial symbols.
  /// Since the goal is to find forms of the words, no matches where a word
  /// in the middle of a phrase got matched should be returned.
  /// The default implementation does nothing, returning an empty result.
  virtual sptr< WordSearchRequest > stemmedMatch( wstring const &,
                                                  unsigned minLength,
                                                  unsigned maxSuffixVariation,
                                                  unsigned long maxResults ) ;

  /// Finds known headwords for the given word, that is, the words for which
  /// the given word is a synonym. If a dictionary can't perform this operation,
  /// it should leave the default implementation which always returns an empty
  /// result.
  virtual sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & )
    ;

  /// For a given word, provides alternate writings of it which are to be looked
  /// up alongside with it. Transliteration dictionaries implement this. The
  /// default implementation returns an empty list. Note that this function is
  /// supposed to be very fast and simple, and the results are thus returned
  /// synchronously.
  virtual vector< wstring > getAlternateWritings( wstring const & )
    noexcept;
  
  /// Returns a definition for the given word. The definition should
  /// be an html fragment (without html/head/body tags) in an utf8 encoding.
  /// The 'alts' vector could contain a list of words the definitions of which
  /// should be included in the output as well, being treated as additional
  /// synonyms for the main word.
  /// context is a dictionary-specific data, currently only used for the
  /// 'Websites' feature.
  virtual sptr< DataRequest > getArticle( wstring const &,
                                          vector< wstring > const & alts,
                                          wstring const & context = wstring(),
                                          bool ignoreDiacritics = false )
    =0;

  /// Loads contents of a resource named 'name' into the 'data' vector. This is
  /// usually a picture file referenced in the article or something like that.
  /// The default implementation always returns the non-existing resource
  /// response.
  virtual sptr< DataRequest > getResource( string const & /*name*/ )
    ;

  /// Returns a results of full-text search of given string similar getArticle().
  virtual sptr< DataRequest >
  getSearchResults( QString const & searchString, int searchMode, bool matchCase, bool ignoreDiacritics );

  // Return dictionary description if presented
  virtual QString const& getDescription();

  // Return dictionary main file name
  virtual QString getMainFilename();

  /// Check text direction
  bool isFromLanguageRTL()
  { return LangCoder::isLanguageRTL( getLangFrom() ); }
  bool isToLanguageRTL()
  { return LangCoder::isLanguageRTL( getLangTo() ); }

  /// Return true if dictionary is local dictionary
  virtual bool isLocalDictionary()
  { return false; }

  /// Dictionary can full-text search
  bool canFTS()
  { return can_FTS; }

  /// Dictionary have index for full-text search
  bool haveFTSIndex()
  { return Utils::AtomicInt::loadAcquire( FTS_index_completed ) != 0; }

  /// Make index for full-text search
  virtual void makeFTSIndex( QAtomicInt &, bool )
  {}

  /// Set full-text search parameters
  virtual void setFTSParameters( Config::FullTextSearch const & )
  {}

  /// Retrieve all dictionary headwords
  virtual bool getHeadwords( QStringList & )
  { return false; }
  virtual  void findHeadWordsWithLenth( int &, QSet< QString > * /*headwords*/, uint32_t ){}

  /// Enable/disable search via synonyms
  void setSynonymSearchEnabled( bool enabled )
  { synonymSearchEnabled = enabled; }

  virtual ~Class() = default;
};

/// Callbacks to be used when the dictionaries are being initialized.
class Initializing
{
public:

  /// Called by the Format instance to notify the caller that the given
  /// dictionary is being indexed. Since indexing can take some time, this
  /// is useful to show in some kind of a splash screen.
  /// The dictionaryName is in utf8.
  virtual void indexingDictionary( string const & dictionaryName ) noexcept=0;

  virtual ~Initializing() = default;
};

/// Generates an id based on the set of file names which the dictionary
/// consists of. The resulting id is an alphanumeric hex value made by
/// hashing the file names. This id should be used to identify dictionary
/// and for the index file name, if one is needed.
/// This function is supposed to be used by dictionary implementations.
string makeDictionaryId( vector< string > const & dictionaryFiles ) noexcept;

/// Checks if it is needed to regenerate index file based on its timestamp
/// and the timestamps of the dictionary files. If some files are newer than
/// the index file, or the index file doesn't exist, returns true. If some
/// dictionary files don't exist, returns true, too.
/// This function is supposed to be used by dictionary implementations.
bool needToRebuildIndex( vector< string > const & dictionaryFiles,
                         string const & indexFile ) noexcept;

string getFtsSuffix();
/// Returns a random dictionary id useful for interactively created
/// dictionaries.
QString generateRandomDictionaryId();

QMap< std::string, sptr< Dictionary::Class > >
dictToMap( std::vector< sptr< Dictionary::Class > > const & dicts );

}

#endif