Compare commits

...

12 commits

Author SHA1 Message Date
xiaoyifang 0dc58685ac
Merge 15b918eb6a into 0c42c300e1 2024-11-23 09:14:56 -05:00
shenleban tongying 0c42c300e1
Merge pull request #1987 from shenlebantongying/refactor/gd-text
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
refactor: use standard string types and merge wstring(-qt)/utf8/ namespaces to Text
2024-11-23 08:26:21 -05:00
shenleban tongying 1471bc3926 ignore last commit 2024-11-23 08:19:03 -05:00
shenleban tongying f1e158578f refactor: use standard string types and merge string namespaces to Text 2024-11-23 08:15:43 -05:00
shenleban tongying abeacef13d
clean: delete unused and empty builtin qt-style.css for macOS/Linux
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
2024-11-23 08:41:00 +00:00
atauzki 1fb1c5c9de
feat: auto dark reader mode for Windows
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
2024-11-22 22:47:19 -05:00
shenleban tongying 5406b3022a
dev: generally improve cmake build script
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
2024-11-22 16:50:55 -05:00
shenleban tongying f446ad358f clean: delete Dictionary::getProperties which is unused since 2009 2024-11-22 14:51:36 -05:00
shenleban tongying 3c5b76f77a
fix startdict index file reading caused by wrong order of reading
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
partially revert  https://github.com/xiaoyifang/goldendict-ng/pull/1972
2024-11-22 04:53:10 +00:00
autofix-ci[bot] 15b918eb6a
[autofix.ci] apply automated fixes 2024-11-08 01:47:29 +00:00
xiaoyifang 27cbb7351b opt: add option about 2024-11-06 13:35:22 +08:00
xiaoyifang c787a08d2f opt: add option about 2024-11-06 12:07:23 +08:00
85 changed files with 1306 additions and 1495 deletions

View file

@ -21,3 +21,6 @@ c8af0450f1f7f8188004db96e3f53e7e33e2ccad
# remove gddebug.hh and associated functions
76aaed116bdc3aeb53fd61553aedb877baf9b510
# wstring & wchar -> std::u32string & char32_t
f1e158578f62c96059bef1a616b75495adb6e2c6

View file

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.25) # ubuntu 23.04 Fedora 36
cmake_minimum_required(VERSION 3.25) # Debian 11 Ubuntu 24.04 Fedora 36
option(WITH_FFMPEG_PLAYER "Enable support for FFMPEG player" ON)
option(WITH_EPWING_SUPPORT "Enable epwing support" ON)
@ -9,20 +9,12 @@ option(WITH_TTS "enable QTexttoSpeech support" OFF)
option(USE_SYSTEM_FMT "use system fmt instead of bundled one" OFF)
option(USE_SYSTEM_TOML "use system toml++ instead of bundled one" OFF)
option(WITH_VCPKG_BREAKPAD "build with Breakpad support for VCPKG build only" OFF)
## Change binary & resources folder to parallel install with original GD.
## This flag should be avoided because it leads to small regressions:
## 1. There are personal scripts assuming the binary name to be "goldendict" -> require everyone to change the name in their script
## 2. There are icon themes that assuming the icon name to be "goldendict" -> invalidate the GD icon when using a icon theme
## 3. There are dictionary packages that install files to "/usr/share/goldendict/content" -> nullify the auto dict discovery
option(USE_ALTERNATIVE_NAME "Force the name goldendict-ng " OFF)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}") # to put staff in the ./cmake folder
## This should be avoided because of small regressions, as some scripts and icons themes assume the binary name and resources folder to be `goldendict`
option(USE_ALTERNATIVE_NAME "For Linux, change the binary name and resource folder to goldendict-ng to parallel install with the original GD" OFF)
# vcpkg handling code, must be placed before project()
if (WIN32)
option(WITH_VCPKG_BREAKPAD "build with Breakpad support for VCPKG build only" OFF)
if (DEFINED CMAKE_TOOLCHAIN_FILE)
message(STATUS "Using toolchain file: ${CMAKE_TOOLCHAIN_FILE}")
else ()
@ -37,11 +29,9 @@ if (WIN32)
set(VCPKG_MANIFEST_MODE OFF CACHE BOOL "disable existing manifest mode caused by the existrance of vcpkg.json" FORCE)
set(CMAKE_TOOLCHAIN_FILE "${CMAKE_BINARY_DIR}/_deps/vcpkg-export-src/scripts/buildsystems/vcpkg.cmake")
endif ()
endif ()
if (WITH_VCPKG_BREAKPAD)
list(APPEND VCPKG_MANIFEST_FEATURES "breakpad")
if (WITH_VCPKG_BREAKPAD)
list(APPEND VCPKG_MANIFEST_FEATURES "breakpad")
endif ()
endif ()
include(FeatureSummary)
@ -49,7 +39,7 @@ include(FeatureSummary)
project(goldendict-ng
VERSION 24.11.0
LANGUAGES CXX C)
if (APPLE)
enable_language(OBJCXX)
set(CMAKE_OBJCXX_STANDARD 17)
@ -60,13 +50,12 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(GOLDENDICT "goldendict") # binary/executable name
if (USE_ALTERNATIVE_NAME )
if (USE_ALTERNATIVE_NAME)
set(GOLDENDICT "goldendict-ng")
endif ()
if (APPLE)
set(GOLDENDICT "GoldenDict-ng")
endif()
endif ()
#### Qt
@ -78,11 +67,10 @@ endif ()
find_package(Qt6 REQUIRED COMPONENTS ${GD_QT_COMPONENTS})
qt_standard_project_setup() # availiable after find_package(Qt6 .... Core
qt_standard_project_setup()
set(CMAKE_AUTORCC ON) # not included in the qt_standard_project_setup
#### Things required during configuration
block() # generate version.txt
string(TIMESTAMP build_time UTC)
find_package(Git)
@ -163,11 +151,8 @@ target_link_libraries(${GOLDENDICT} PRIVATE
Qt6::WebEngineWidgets
Qt6::Widgets
Qt6::Svg
)
if (WITH_TTS)
target_link_libraries(${GOLDENDICT} PRIVATE Qt6::TextToSpeech)
endif ()
$<$<BOOL:${WITH_TTS}>:Qt6::TextToSpeech>
)
target_include_directories(${GOLDENDICT} PRIVATE
${PROJECT_SOURCE_DIR}/thirdparty/qtsingleapplication/src
@ -176,11 +161,7 @@ target_include_directories(${GOLDENDICT} PRIVATE
${PROJECT_SOURCE_DIR}/src/dict
${PROJECT_SOURCE_DIR}/src/dict/utils
${PROJECT_SOURCE_DIR}/src/ui
)
if (WIN32)
target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/src/windows)
endif ()
)
if (NOT USE_SYSTEM_TOML)
target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/thirdparty/tomlplusplus)
@ -199,45 +180,22 @@ target_compile_definitions(${GOLDENDICT} PRIVATE
)
target_compile_definitions(${GOLDENDICT} PUBLIC
CMAKE_USED_HACK # temporal hack to avoid breaking qmake build
MAKE_QTMULTIMEDIA_PLAYER
MAKE_CHINESE_CONVERSION_SUPPORT
)
if (WIN32)
target_compile_definitions(${GOLDENDICT} PUBLIC
__WIN32
INCLUDE_LIBRARY_PATH
)
endif ()
if (WITH_FFMPEG_PLAYER)
target_compile_definitions(${GOLDENDICT} PUBLIC MAKE_FFMPEG_PLAYER)
endif ()
if(NOT WITH_TTS)
target_compile_definitions(${GOLDENDICT} PUBLIC NO_TTS_SUPPORT)
endif()
if (NOT WITH_EPWING_SUPPORT)
target_compile_definitions(${GOLDENDICT} PUBLIC NO_EPWING_SUPPORT)
endif ()
if (WITH_ZIM)
target_compile_definitions(${GOLDENDICT} PUBLIC MAKE_ZIM_SUPPORT)
endif ()
if (WITH_VCPKG_BREAKPAD)
target_compile_definitions(${GOLDENDICT} PUBLIC USE_BREAKPAD)
endif ()
$<$<BOOL:${WIN32}>:__WIN32>
$<$<BOOL:${WITH_FFMPEG_PLAYER}>:MAKE_FFMPEG_PLAYER>
$<$<BOOL:${WITH_TTS}>:TTS_SUPPORT>
$<$<BOOL:${WITH_EPWING_SUPPORT}>:EPWING_SUPPORT>
$<$<BOOL:${WITH_ZIM}>:MAKE_ZIM_SUPPORT>
$<$<BOOL:${WITH_VCPKG_BREAKPAD}>:USE_BREAKPAD>
)
#### libraries linking && includes for Win or Unix
if (WIN32)
include(Deps_Vcpkg)
include(cmake/Deps_Vcpkg.cmake)
else ()
include(Deps_Unix)
include(cmake/Deps_Unix.cmake)
endif ()
#### add translations
@ -261,156 +219,11 @@ add_dependencies(${GOLDENDICT} "release_translations")
#### installation or assemble redistribution
if (APPLE)
set(PLIST_FILE "${CMAKE_BINARY_DIR}/info_generated.plist")
configure_file("${CMAKE_SOURCE_DIR}/redist/mac_info_plist_template_cmake.plist" "${PLIST_FILE}" @ONLY)
set_target_properties(${GOLDENDICT} PROPERTIES
MACOSX_BUNDLE TRUE
MACOSX_BUNDLE_INFO_PLIST "${PLIST_FILE}"
)
set(Assembling_Dir "${CMAKE_BINARY_DIR}/redist")
set(App_Name "${GOLDENDICT}.app")
set(Redistributable_APP "${Assembling_Dir}/${App_Name}")
# if anything wrong, delete this and affect lines, and see what's Qt will generate by default.
set(QtConfPath "${Redistributable_APP}/Contents/Resources/qt.conf")
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "
set(QT_DEPLOY_PREFIX \"${Redistributable_APP}\")
set(QT_DEPLOY_TRANSLATIONS_DIR \"Contents/Resources/translations\")
qt_deploy_runtime_dependencies(
EXECUTABLE \"${Redistributable_APP}\"
ADDITIONAL_LIBRARIES ${BREW_ICU_ADDITIONAL_DYLIBS}
GENERATE_QT_CONF
NO_APP_STORE_COMPLIANCE)
qt_deploy_translations()
qt_deploy_qt_conf(\"${QtConfPath}\"
PLUGINS_DIR PlugIns
TRANSLATIONS_DIR Resources/translations)
"
)
install(TARGETS ${GOLDENDICT} BUNDLE DESTINATION "${Assembling_Dir}")
install(FILES ${qm_files} DESTINATION "${Redistributable_APP}/Contents/MacOS/locale")
if (IS_READABLE "/opt/homebrew/share/opencc/")
set(OPENCC_DATA_PATH "/opt/homebrew/share/opencc/" CACHE PATH "opencc's data path")
elseif (IS_READABLE "/usr/local/share/opencc/")
set(OPENCC_DATA_PATH "/usr/local/share/opencc/" CACHE PATH "opencc's data path")
else ()
message(FATAL_ERROR "Cannot find opencc's data folder!")
endif ()
file(REAL_PATH "${OPENCC_DATA_PATH}" OPENCC_DATA_PATH_FOR_REAL)
message(STATUS "OPENCC data is found -> ${OPENCC_DATA_PATH_FOR_REAL}")
install(DIRECTORY "${OPENCC_DATA_PATH_FOR_REAL}" DESTINATION "${Redistributable_APP}/Contents/MacOS")
install(SCRIPT ${deploy_script})
install(CODE "execute_process(COMMAND codesign --force --deep -s - ${Redistributable_APP})")
find_program(CREATE-DMG "create-dmg")
if (CREATE-DMG)
install(CODE "
execute_process(COMMAND ${CREATE-DMG} \
--skip-jenkins \
--format \"ULMO\"
--volname ${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${CMAKE_SYSTEM_PROCESSOR} \
--volicon ${CMAKE_SOURCE_DIR}/icons/macicon.icns \
--icon \"${App_Name}\" 100 100
--app-drop-link 300 100 \
\"GoldenDict-ng-${CMAKE_PROJECT_VERSION}-Qt${Qt6_VERSION}-macOS-${CMAKE_SYSTEM_PROCESSOR}.dmg\" \
\"${Assembling_Dir}\")"
)
else ()
message(WARNING "create-dmg not found. No .dmg will be created")
endif ()
endif ()
if (LINUX OR BSD)
install(TARGETS ${GOLDENDICT})
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop DESTINATION share/applications)
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.metainfo.xml DESTINATION share/metainfo)
if (NOT USE_ALTERNATIVE_NAME)
# see: config.cc -> getProgramDataDir
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps)
install(FILES ${qm_files} DESTINATION share/goldendict/locale)
else ()
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict-ng")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps
RENAME goldendict-ng.png)
install(FILES ${qm_files} DESTINATION share/goldendict-ng/locale)
block() # patch the desktop file to adapt the binary & icon file's name change
file(READ "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" DESKTOP_FILE_CONTENT)
string(REGEX REPLACE "\nIcon=goldendict\n" "\nIcon=goldendict-ng\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
string(REGEX REPLACE "\nExec=goldendict %u\n" "\nExec=goldendict-ng %u\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
file(WRITE "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" "${DESKTOP_FILE_CONTENT}")
endblock()
endif ()
endif ()
if (WIN32)
set_target_properties(${GOLDENDICT}
PROPERTIES
WIN32_EXECUTABLE TRUE
RUNTIME_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
LIBRARY_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
)
set(CMAKE_INSTALL_PREFIX "${GD_WIN_OUTPUT_DIR}" CACHE PATH "If you see this message, don't change this unless you want look into CMake build script. If you are an expert, yes, this is wrong. Help welcomed." FORCE)
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "qt_deploy_runtime_dependencies(
EXECUTABLE \"${CMAKE_INSTALL_PREFIX}/goldendict.exe\"
BIN_DIR .
LIB_DIR .
)"
)
install(SCRIPT ${deploy_script})
install(DIRECTORY "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/share/opencc" DESTINATION .)
# TODO: do we really need to carry a copy of openSSL?
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libssl-3-x64.dll" DESTINATION .)
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libcrypto-3-x64.dll" DESTINATION .)
# trick CPack to make the output folder as NSIS installer
install(DIRECTORY "${GD_WIN_OUTPUT_DIR}/"
DESTINATION .
FILES_MATCHING
PATTERN "*"
PATTERN "*.pdb" EXCLUDE
PATTERN "*.ilk" EXCLUDE)
set(CPACK_PACKAGE_FILE_NAME "GoldenDict-ng-${PROJECT_VERSION}-Qt${Qt6Widgets_VERSION}")
set(CPACK_GENERATOR "7Z;NSIS64")
# override the default install path, which is $PROGRAMFILES64\${project-name} ${project-version} in NSIS
set(CPACK_PACKAGE_INSTALL_DIRECTORY "GoldenDict-ng")
# NSIS specificS
set(CPACK_NSIS_MANIFEST_DPI_AWARE ON)
set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/icons/programicon.ico")
set(CPACK_NSIS_PACKAGE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_DISPLAY_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_URL_INFO_ABOUT [=[https://xiaoyifang.github.io/goldendict-ng/]=])
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
set(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\GoldenDict-ng.lnk' '$INSTDIR\\\\${GOLDENDICT}.exe'")
set(CPACK_NSIS_DELETE_ICONS_EXTRA "Delete '$SMPROGRAMS\\\\$START_MENU\\\\GoldenDict-ng.lnk'")
include(CPack)
include(cmake/Package_macOS.cmake)
elseif (LINUX OR BSD)
include(cmake/Package_Linux.cmake)
elseif (WIN32)
include(cmake/Package_Windows.cmake)
endif ()
feature_summary(WHAT ALL DESCRIPTION "Build configuration:")

View file

@ -1,5 +1,4 @@
#### Various workarounds
if (APPLE)
# old & new homebrew's include paths
target_include_directories(${GOLDENDICT} PRIVATE /usr/local/include /opt/homebrew/include)
@ -29,25 +28,20 @@ endif ()
##### Finding packages from package manager
find_package(PkgConfig REQUIRED)
find_package(ZLIB REQUIRED)
find_package(BZip2 REQUIRED)
# Consider all PkgConfig dependencies as one
pkg_check_modules(PKGCONFIG_DEPS IMPORTED_TARGET
# Import all PkgConfig dependencies as one
pkg_check_modules(DEPS REQUIRED IMPORTED_TARGET
hunspell
liblzma
lzo2
opencc
vorbis # .ogg
vorbisfile
liblzma
xapian-core
zlib
)
target_link_libraries(${GOLDENDICT} PRIVATE
PkgConfig::PKGCONFIG_DEPS
BZip2::BZip2
ZLIB::ZLIB
)
target_link_libraries(${GOLDENDICT} PRIVATE PkgConfig::DEPS BZip2::BZip2)
# On FreeBSD, there are two iconv, libc iconv & GNU libiconv.
# The system one is good enough, the following is a workaround to use libc iconv on freeBSD.
@ -88,7 +82,7 @@ if (WITH_ZIM)
COMMAND_ERROR_IS_FATAL ANY)
message(STATUS "Found correct homebrew icu path -> ${ICU_REQUIRED_BY_ZIM_PREFIX}")
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/pkgconfig")
message(STATUS "Updated pkg_config_path -> $ENV{PKG_CONFIG_PATH}:${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/pkgconfig")
message(STATUS "Updated pkg_config_path -> $ENV{PKG_CONFIG_PATH}")
# icu4c as transitive dependency of libzim may not be automatically copied into app bundle
# so we manually discover the icu4c from homebrew, then find the relevent dylibs

22
cmake/Package_Linux.cmake Normal file
View file

@ -0,0 +1,22 @@
install(TARGETS ${GOLDENDICT})
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop DESTINATION share/applications)
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.metainfo.xml DESTINATION share/metainfo)
if (NOT USE_ALTERNATIVE_NAME)
# see: config.cc -> getProgramDataDir
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps)
install(FILES ${qm_files} DESTINATION share/goldendict/locale)
else ()
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict-ng")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps
RENAME goldendict-ng.png)
install(FILES ${qm_files} DESTINATION share/goldendict-ng/locale)
block() # patch the desktop file to adapt the binary & icon file's name change
file(READ "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" DESKTOP_FILE_CONTENT)
string(REGEX REPLACE "\nIcon=goldendict\n" "\nIcon=goldendict-ng\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
string(REGEX REPLACE "\nExec=goldendict %u\n" "\nExec=goldendict-ng %u\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
file(WRITE "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" "${DESKTOP_FILE_CONTENT}")
endblock()
endif ()

View file

@ -0,0 +1,55 @@
set_target_properties(${GOLDENDICT}
PROPERTIES
WIN32_EXECUTABLE TRUE
RUNTIME_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
LIBRARY_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
)
# TODO: this breaks "Multi-Config" build systems like VisualStudio.
set(CMAKE_INSTALL_PREFIX "${GD_WIN_OUTPUT_DIR}" CACHE PATH "If you see this message, don't change this unless you want look into CMake build script. If you are an expert, yes, this is wrong. Help welcomed." FORCE)
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "qt_deploy_runtime_dependencies(
EXECUTABLE \"${CMAKE_INSTALL_PREFIX}/goldendict.exe\"
BIN_DIR .
LIB_DIR .
)"
)
install(SCRIPT ${deploy_script})
install(DIRECTORY "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/share/opencc" DESTINATION .)
# Note: This is runtime dependency that aren't copied automatically
# See Qt's network -> SSDL documentation https://doc.qt.io/qt-6/ssl.html#considerations-while-packaging-your-application
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libssl-3-x64.dll" DESTINATION .)
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libcrypto-3-x64.dll" DESTINATION .)
# trick CPack to make the output folder as NSIS installer
install(DIRECTORY "${GD_WIN_OUTPUT_DIR}/"
DESTINATION .
FILES_MATCHING
PATTERN "*"
PATTERN "*.pdb" EXCLUDE
PATTERN "*.ilk" EXCLUDE)
set(CPACK_PACKAGE_FILE_NAME "GoldenDict-ng-${PROJECT_VERSION}-Qt${Qt6Widgets_VERSION}")
set(CPACK_GENERATOR "7Z;NSIS64")
# override the default install path, which is $PROGRAMFILES64\${project-name} ${project-version} in NSIS
set(CPACK_PACKAGE_INSTALL_DIRECTORY "GoldenDict-ng")
# NSIS specificS
set(CPACK_NSIS_MANIFEST_DPI_AWARE ON)
set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/icons/programicon.ico")
set(CPACK_NSIS_PACKAGE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_DISPLAY_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
# Copied from https://crascit.com/2015/08/07/cmake_cpack_nsis_shortcuts_with_parameters/
set(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\GoldenDict-ng.lnk' '$INSTDIR\\\\${GOLDENDICT}.exe'")
set(CPACK_NSIS_DELETE_ICONS_EXTRA "Delete '$SMPROGRAMS\\\\$START_MENU\\\\GoldenDict-ng.lnk'")
include(CPack)

69
cmake/Package_macOS.cmake Normal file
View file

@ -0,0 +1,69 @@
set(PLIST_FILE "${CMAKE_BINARY_DIR}/info_generated.plist")
configure_file("${CMAKE_SOURCE_DIR}/redist/mac_info_plist_template_cmake.plist" "${PLIST_FILE}" @ONLY)
set_target_properties(${GOLDENDICT} PROPERTIES
MACOSX_BUNDLE TRUE
MACOSX_BUNDLE_INFO_PLIST "${PLIST_FILE}"
)
set(Assembling_Dir "${CMAKE_BINARY_DIR}/redist")
set(App_Name "${GOLDENDICT}.app")
set(Redistributable_APP "${Assembling_Dir}/${App_Name}")
# if anything wrong, delete this and affect lines, and see what's Qt will generate by default.
set(QtConfPath "${Redistributable_APP}/Contents/Resources/qt.conf")
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "
set(QT_DEPLOY_PREFIX \"${Redistributable_APP}\")
set(QT_DEPLOY_TRANSLATIONS_DIR \"Contents/Resources/translations\")
qt_deploy_runtime_dependencies(
EXECUTABLE \"${Redistributable_APP}\"
ADDITIONAL_LIBRARIES ${BREW_ICU_ADDITIONAL_DYLIBS}
GENERATE_QT_CONF
NO_APP_STORE_COMPLIANCE)
qt_deploy_translations()
qt_deploy_qt_conf(\"${QtConfPath}\"
PLUGINS_DIR PlugIns
TRANSLATIONS_DIR Resources/translations)
"
)
install(TARGETS ${GOLDENDICT} BUNDLE DESTINATION "${Assembling_Dir}")
install(FILES ${qm_files} DESTINATION "${Redistributable_APP}/Contents/MacOS/locale")
if (IS_READABLE "/opt/homebrew/share/opencc/")
set(OPENCC_DATA_PATH "/opt/homebrew/share/opencc/" CACHE PATH "opencc's data path")
elseif (IS_READABLE "/usr/local/share/opencc/")
set(OPENCC_DATA_PATH "/usr/local/share/opencc/" CACHE PATH "opencc's data path")
else ()
message(FATAL_ERROR "Cannot find opencc's data folder!")
endif ()
file(REAL_PATH "${OPENCC_DATA_PATH}" OPENCC_DATA_PATH_FOR_REAL)
message(STATUS "OPENCC data is found -> ${OPENCC_DATA_PATH_FOR_REAL}")
install(DIRECTORY "${OPENCC_DATA_PATH_FOR_REAL}" DESTINATION "${Redistributable_APP}/Contents/MacOS")
install(SCRIPT ${deploy_script})
install(CODE "execute_process(COMMAND codesign --force --deep -s - ${Redistributable_APP})")
find_program(CREATE-DMG "create-dmg")
if (CREATE-DMG)
install(CODE "
execute_process(COMMAND ${CREATE-DMG} \
--skip-jenkins \
--format \"ULMO\"
--volname ${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${CMAKE_SYSTEM_PROCESSOR} \
--volicon ${CMAKE_SOURCE_DIR}/icons/macicon.icns \
--icon \"${App_Name}\" 100 100
--app-drop-link 300 100 \
\"GoldenDict-ng-${CMAKE_PROJECT_VERSION}-Qt${Qt6_VERSION}-macOS-${CMAKE_SYSTEM_PROCESSOR}.dmg\" \
\"${Assembling_Dir}\")"
)
else ()
message(WARNING "create-dmg not found. No .dmg will be created")
endif ()

View file

@ -9,7 +9,6 @@
#include "htmlescape.hh"
#include "langcoder.hh"
#include "utils.hh"
#include "wstring_qt.hh"
#include <QDir>
#include <QFile>
#include <QTextDocumentFragment>
@ -21,7 +20,6 @@
using std::vector;
using std::string;
using gd::wstring;
using std::set;
using std::list;
@ -161,7 +159,10 @@ std::string ArticleMaker::makeHtmlHeader( QString const & word, QString const &
#if QT_VERSION >= QT_VERSION_CHECK( 6, 5, 0 )
if ( GlobalBroadcaster::instance()->getPreference()->darkReaderMode == Config::Dark::Auto
&& QGuiApplication::styleHints()->colorScheme() == Qt::ColorScheme::Dark ) {
#if !defined( Q_OS_WINDOWS ) // not properly works on Windows.
&& QGuiApplication::styleHints()->colorScheme() == Qt::ColorScheme::Dark
#endif
&& GlobalBroadcaster::instance()->getPreference()->darkMode == Config::Dark::On ) {
darkReaderModeEnabled = true;
}
#endif
@ -481,7 +482,7 @@ ArticleRequest::ArticleRequest( QString const & word,
// Accumulate main forms
for ( const auto & activeDict : activeDicts ) {
auto const s = activeDict->findHeadwordsForSynonym( gd::removeTrailingZero( word ) );
auto const s = activeDict->findHeadwordsForSynonym( Text::removeTrailingZero( word ) );
connect( s.get(), &Dictionary::Request::finished, this, &ArticleRequest::altSearchFinished, Qt::QueuedConnection );
@ -518,9 +519,9 @@ void ArticleRequest::altSearchFinished()
altsDone = true; // So any pending signals in queued mode won't mess us up
vector< wstring > altsVector( alts.begin(), alts.end() );
vector< std::u32string > altsVector( alts.begin(), alts.end() );
wstring wordStd = word.toStdU32String();
std::u32string wordStd = word.toStdU32String();
if ( activeDicts.size() <= 1 ) {
articleSizeLimit = -1; // Don't collapse article if only one dictionary presented
@ -531,7 +532,7 @@ void ArticleRequest::altSearchFinished()
sptr< Dictionary::DataRequest > r = activeDict->getArticle(
wordStd,
altsVector,
gd::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
Text::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
ignoreDiacritics );
connect( r.get(), &Dictionary::Request::finished, this, &ArticleRequest::bodyFinished, Qt::QueuedConnection );
@ -1005,7 +1006,7 @@ void ArticleRequest::individualWordFinished()
WordFinder::SearchResults const & results = stemmedWordFinder->getResults();
if ( results.size() ) {
wstring source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );
std::u32string source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );
bool hadSomething = false;
@ -1019,7 +1020,7 @@ void ArticleRequest::individualWordFinished()
// Prefix match found. Check if the aliases are acceptable.
wstring result( Folding::applySimpleCaseOnly( results[ x ].first ) );
std::u32string result( Folding::applySimpleCaseOnly( results[ x ].first ) );
if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 ) {
// The resulting string begins with the source one

View file

@ -88,7 +88,7 @@ class ArticleRequest: public Dictionary::DataRequest
QMap< QString, QString > contexts;
std::vector< sptr< Dictionary::Class > > activeDicts;
std::set< gd::wstring, std::less<> > alts; // Accumulated main forms
std::set< std::u32string, std::less<> > alts; // Accumulated main forms
std::list< sptr< Dictionary::WordSearchRequest > > altSearches;
std::list< sptr< Dictionary::DataRequest > > bodyRequests;
bool altsDone{ false };

View file

@ -2,7 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "filetype.hh"
#include "utf8.hh"
#include "text.hh"
#include <ctype.h>
namespace Filetype {
@ -26,13 +26,13 @@ string simplifyString( string const & str, bool lowercase )
size_t beginPos = 0;
while ( beginPos < str.size() && Utf8::isspace( str[ beginPos ] ) ) {
while ( beginPos < str.size() && Text::isspace( str[ beginPos ] ) ) {
++beginPos;
}
size_t endPos = str.size();
while ( endPos && Utf8::isspace( str[ endPos - 1 ] ) ) {
while ( endPos && Text::isspace( str[ endPos - 1 ] ) ) {
--endPos;
}

View file

@ -3,7 +3,7 @@
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "globalregex.hh"
#include "inc_case_folding.hh"
@ -13,12 +13,12 @@ namespace Folding {
/// caught by the diacritics folding table, but they are only handled there
/// when they come with their main characters, not by themselves. The rest
/// are caught here.
bool isCombiningMark( wchar ch )
bool isCombiningMark( char32_t ch )
{
return QChar::isMark( ch );
}
wstring apply( wstring const & in, bool preserveWildcards )
std::u32string apply( std::u32string const & in, bool preserveWildcards )
{
// remove diacritics (normalization), white space, punt,
auto temp = QString::fromStdU32String( in )
@ -32,7 +32,7 @@ wstring apply( wstring const & in, bool preserveWildcards )
// case folding
std::u32string caseFolded;
caseFolded.reserve( temp.size() );
wchar buf[ foldCaseMaxOut ];
char32_t buf[ foldCaseMaxOut ];
for ( const char32_t ch : temp ) {
auto n = foldCase( ch, buf );
caseFolded.append( buf, n );
@ -40,11 +40,11 @@ wstring apply( wstring const & in, bool preserveWildcards )
return caseFolded;
}
wstring applySimpleCaseOnly( wstring const & in )
std::u32string applySimpleCaseOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wstring out;
std::u32string out;
out.reserve( in.size() );
@ -55,27 +55,27 @@ wstring applySimpleCaseOnly( wstring const & in )
return out;
}
wstring applySimpleCaseOnly( QString const & in )
std::u32string applySimpleCaseOnly( QString const & in )
{
//qt only support simple case folding.
return in.toCaseFolded().toStdU32String();
}
wstring applySimpleCaseOnly( std::string const & in )
std::u32string applySimpleCaseOnly( std::string const & in )
{
return applySimpleCaseOnly( Utf8::decode( in ) );
return applySimpleCaseOnly( Text::toUtf32( in ) );
// return QString::fromStdString( in ).toCaseFolded().toStdU32String();
}
wstring applyFullCaseOnly( wstring const & in )
std::u32string applyFullCaseOnly( std::u32string const & in )
{
wstring caseFolded;
std::u32string caseFolded;
caseFolded.reserve( in.size() * foldCaseMaxOut );
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wchar buf[ foldCaseMaxOut ];
char32_t buf[ foldCaseMaxOut ];
for ( size_t left = in.size(); left--; ) {
caseFolded.append( buf, foldCase( *nextChar++, buf ) );
@ -84,17 +84,17 @@ wstring applyFullCaseOnly( wstring const & in )
return caseFolded;
}
wstring applyDiacriticsOnly( wstring const & in )
std::u32string applyDiacriticsOnly( std::u32string const & in )
{
auto noAccent = QString::fromStdU32String( in ).normalized( QString::NormalizationForm_KD ).remove( RX::accentMark );
return noAccent.toStdU32String();
}
wstring applyPunctOnly( wstring const & in )
std::u32string applyPunctOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wstring out;
std::u32string out;
out.reserve( in.size() );
@ -119,11 +119,11 @@ QString applyPunctOnly( QString const & in )
return out;
}
wstring applyWhitespaceOnly( wstring const & in )
std::u32string applyWhitespaceOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wstring out;
std::u32string out;
out.reserve( in.size() );
@ -136,11 +136,11 @@ wstring applyWhitespaceOnly( wstring const & in )
return out;
}
wstring applyWhitespaceAndPunctOnly( wstring const & in )
std::u32string applyWhitespaceAndPunctOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wstring out;
std::u32string out;
out.reserve( in.size() );
@ -153,26 +153,26 @@ wstring applyWhitespaceAndPunctOnly( wstring const & in )
return out;
}
bool isWhitespace( wchar ch )
bool isWhitespace( char32_t ch )
{
//invisible character should be treated as whitespace as well.
return QChar::isSpace( ch ) || !QChar::isPrint( ch );
}
bool isWhitespaceOrPunct( wchar ch )
bool isWhitespaceOrPunct( char32_t ch )
{
return isWhitespace( ch ) || QChar::isPunct( ch );
}
bool isPunct( wchar ch )
bool isPunct( char32_t ch )
{
return QChar::isPunct( ch );
}
wstring trimWhitespaceOrPunct( wstring const & in )
std::u32string trimWhitespaceOrPunct( std::u32string const & in )
{
wchar const * wordBegin = in.c_str();
wstring::size_type wordSize = in.size();
char32_t const * wordBegin = in.c_str();
std::u32string::size_type wordSize = in.size();
// Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespaceOrPunct( *wordBegin ) ) {
@ -185,7 +185,7 @@ wstring trimWhitespaceOrPunct( wstring const & in )
--wordSize;
}
return wstring( wordBegin, wordSize );
return std::u32string( wordBegin, wordSize );
}
QString trimWhitespaceOrPunct( QString const & in )
@ -209,13 +209,13 @@ QString trimWhitespaceOrPunct( QString const & in )
return in.mid( wordBegin, wordSize );
}
wstring trimWhitespace( wstring const & in )
std::u32string trimWhitespace( std::u32string const & in )
{
if ( in.empty() ) {
return in;
}
wchar const * wordBegin = in.c_str();
wstring::size_type wordSize = in.size();
char32_t const * wordBegin = in.c_str();
std::u32string::size_type wordSize = in.size();
// Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
@ -228,7 +228,7 @@ wstring trimWhitespace( wstring const & in )
--wordSize;
}
return wstring( wordBegin, wordSize );
return std::u32string( wordBegin, wordSize );
}
QString trimWhitespace( QString const & in )

View file

@ -3,7 +3,7 @@
#pragma once
#include "wstring.hh"
#include "text.hh"
#include <QString>
/// Folding provides means to translate several possible ways to write a
@ -17,8 +17,6 @@
namespace Folding {
using gd::wstring;
using gd::wchar;
/// The algorithm's version.
enum {
@ -27,48 +25,48 @@ enum {
/// Applies the folding algorithm to each character in the given string,
/// making another one as a result.
wstring apply( wstring const &, bool preserveWildcards = false );
std::u32string apply( std::u32string const &, bool preserveWildcards = false );
/// Applies only simple case folding algorithm. Since many dictionaries have
/// different case style, we interpret words differing only by case as synonyms.
wstring applySimpleCaseOnly( wstring const & );
wstring applySimpleCaseOnly( QString const & in );
wstring applySimpleCaseOnly( std::string const & in );
std::u32string applySimpleCaseOnly( std::u32string const & );
std::u32string applySimpleCaseOnly( QString const & in );
std::u32string applySimpleCaseOnly( std::string const & in );
/// Applies only full case folding algorithm. This includes simple case, but also
/// decomposing ligatures and complex letters.
wstring applyFullCaseOnly( wstring const & );
std::u32string applyFullCaseOnly( std::u32string const & );
/// Applies only diacritics folding algorithm.
wstring applyDiacriticsOnly( wstring const & );
std::u32string applyDiacriticsOnly( std::u32string const & );
/// Applies only punctuation folding algorithm.
wstring applyPunctOnly( wstring const & );
std::u32string applyPunctOnly( std::u32string const & );
QString applyPunctOnly( QString const & in );
/// Applies only whitespace folding algorithm.
wstring applyWhitespaceOnly( wstring const & );
std::u32string applyWhitespaceOnly( std::u32string const & );
/// Applies only whitespace&punctuation folding algorithm.
wstring applyWhitespaceAndPunctOnly( wstring const & );
std::u32string applyWhitespaceAndPunctOnly( std::u32string const & );
/// Returns true if the given character is any form of whitespace, false
/// otherwise. Whitespace corresponds to Zl/Zp/Zs Unicode classes, and also
/// includes \n, \r and \t.
bool isWhitespace( wchar ch );
bool isWhitespaceOrPunct( wchar ch );
bool isWhitespace( char32_t ch );
bool isWhitespaceOrPunct( char32_t ch );
/// Returns true if the given character is any form of punctuation, false
/// otherwise. Punctuation corresponds to Pc/Pd/Pe/Pf/Pi/Po/Ps classes.
bool isPunct( wchar ch );
bool isPunct( char32_t ch );
/// Removes any whitespace or punctuation from the beginning and the end of
/// the word.
wstring trimWhitespaceOrPunct( wstring const & );
std::u32string trimWhitespaceOrPunct( std::u32string const & );
QString trimWhitespaceOrPunct( QString const & in );
/// Removes any whitespace from the beginning and the end of
/// the word.
wstring trimWhitespace( wstring const & );
std::u32string trimWhitespace( std::u32string const & );
QString trimWhitespace( QString const & in );
/// Same as apply( wstring ), but without any heap operations, therefore
@ -86,6 +84,6 @@ QString unescapeWildcardSymbols( QString const & );
QString escapeWildcardSymbols( QString const & );
/// Tests if the given char is one of the Unicode combining marks.
bool isCombiningMark( wchar ch );
bool isCombiningMark( char32_t ch );
} // namespace Folding

View file

@ -5,7 +5,6 @@
#include <vector>
#include <errno.h>
#include <string.h>
#include "wstring_qt.hh"
char const * const Iconv::GdWchar = "UTF-32LE";
char const * const Iconv::Utf16Le = "UTF-16LE";
@ -80,7 +79,7 @@ QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
return QString::fromUtf8( &outBuf.front(), datasize );
}
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
std::u32string Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
{
/// Special-case the dataSize == 0 to avoid any kind of iconv-specific

View file

@ -5,7 +5,7 @@
#include <QString>
#include "wstring.hh"
#include "text.hh"
#include "ex.hh"
#include <iconv.h>
@ -35,7 +35,7 @@ public:
QString convert( void const *& inBuf, size_t & inBytesLeft );
// Converts a given block of data from the given encoding to a wide string.
static gd::wstring toWstring( char const * fromEncoding, void const * fromData, size_t dataSize );
static std::u32string toWstring( char const * fromEncoding, void const * fromData, size_t dataSize );
// Converts a given block of data from the given encoding to an utf8-encoded
// string.

View file

@ -1,15 +1,21 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "utf8.hh"
#include "text.hh"
#include <vector>
#include <algorithm>
#include <QByteArray>
#include <QString>
#include <QList>
namespace Utf8 {
namespace Text {
size_t encode( wchar const * in, size_t inSize, char * out_ )
/// Encodes the given UTF-32 into UTF-8. The inSize specifies the number
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
/// at least inSize * 4 bytes long. The function returns the number of chars
/// stored in the 'out' buffer. The result is not 0-terminated.
size_t encode( char32_t const * in, size_t inSize, char * out_ )
{
unsigned char * out = (unsigned char *)out_;
@ -37,13 +43,18 @@ size_t encode( wchar const * in, size_t inSize, char * out_ )
return out - (unsigned char *)out_;
}
long decode( char const * in_, size_t inSize, wchar * out_ )
/// Decodes the given UTF-8 into UTF-32. The inSize specifies the number
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
/// function returns -1, otherwise it returns the number of wide characters
/// stored in the 'out' buffer. The result is not 0-terminated.
long decode( char const * in_, size_t inSize, char32_t * out_ )
{
unsigned char const * in = (unsigned char const *)in_;
wchar * out = out_;
char32_t * out = out_;
while ( inSize-- ) {
wchar result;
char32_t result;
if ( *in & 0x80 ) {
if ( *in & 0x40 ) {
@ -61,22 +72,22 @@ long decode( char const * in_, size_t inSize, wchar * out_ )
inSize -= 3;
result = ( (wchar)*in++ & 7 ) << 18;
result = ( (char32_t)*in++ & 7 ) << 18;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (wchar)*in++ & 0x3F ) << 12;
result |= ( (char32_t)*in++ & 0x3F ) << 12;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (wchar)*in++ & 0x3F ) << 6;
result |= ( (char32_t)*in++ & 0x3F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (wchar)*in++ & 0x3F;
result |= (char32_t)*in++ & 0x3F;
}
else {
// Three-byte sequence
@ -87,17 +98,17 @@ long decode( char const * in_, size_t inSize, wchar * out_ )
inSize -= 2;
result = ( (wchar)*in++ & 0xF ) << 12;
result = ( (char32_t)*in++ & 0xF ) << 12;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (wchar)*in++ & 0x3F ) << 6;
result |= ( (char32_t)*in++ & 0x3F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (wchar)*in++ & 0x3F;
result |= (char32_t)*in++ & 0x3F;
}
}
else {
@ -108,12 +119,12 @@ long decode( char const * in_, size_t inSize, wchar * out_ )
--inSize;
result = ( (wchar)*in++ & 0x1F ) << 6;
result = ( (char32_t)*in++ & 0x1F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (wchar)*in++ & 0x3F;
result |= (char32_t)*in++ & 0x3F;
}
}
else {
@ -132,7 +143,7 @@ long decode( char const * in_, size_t inSize, wchar * out_ )
return out - out_;
}
string encode( wstring const & in ) noexcept
std::string toUtf8( std::u32string const & in ) noexcept
{
if ( in.empty() ) {
return {};
@ -140,16 +151,16 @@ string encode( wstring const & in ) noexcept
std::vector< char > buffer( in.size() * 4 );
return string( &buffer.front(), encode( in.data(), in.size(), &buffer.front() ) );
return { &buffer.front(), encode( in.data(), in.size(), &buffer.front() ) };
}
wstring decode( string const & in )
std::u32string toUtf32( std::string const & in )
{
if ( in.empty() ) {
return {};
}
std::vector< wchar > buffer( in.size() );
std::vector< char32_t > buffer( in.size() );
long result = decode( in.data(), in.size(), &buffer.front() );
@ -157,7 +168,7 @@ wstring decode( string const & in )
throw exCantDecode( in );
}
return wstring( &buffer.front(), result );
return std::u32string( &buffer.front(), result );
}
bool isspace( int c )
@ -247,29 +258,29 @@ LineFeed initLineFeed( const Encoding e )
{
LineFeed lf{};
switch ( e ) {
case Utf8::Utf32LE:
case Utf32LE:
lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 };
lf.length = 4;
break;
case Utf8::Utf32BE:
case Utf32BE:
lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A };
lf.length = 4;
break;
case Utf8::Utf16LE:
case Utf16LE:
lf.lineFeed = new char[ 2 ]{ 0x0A, 0 };
lf.length = 2;
break;
case Utf8::Utf16BE:
case Utf16BE:
lf.lineFeed = new char[ 2 ]{ 0, 0x0A };
lf.length = 2;
break;
case Utf8::Windows1252:
case Windows1252:
case Utf8::Windows1251:
case Windows1251:
case Utf8::Utf8:
case Utf8:
case Utf8::Windows1250:
case Windows1250:
default:
lf.length = 1;
lf.lineFeed = new char[ 1 ]{ 0x0A };
@ -277,4 +288,36 @@ LineFeed initLineFeed( const Encoding e )
return lf;
}
} // namespace Utf8
// When convert non-BMP characters to wstring,the ending char maybe \0 .This method remove the tailing \0 from the wstring
// as \0 is sensitive in the index. This method will be only used with index related operations like store/query.
std::u32string removeTrailingZero( std::u32string const & v )
{
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
return std::u32string( v.data(), n );
}
std::u32string removeTrailingZero( QString const & in )
{
QList< unsigned int > v = in.toUcs4();
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
if ( n != v.size() ) {
v.resize( n );
}
return std::u32string( (const char32_t *)v.constData(), v.size() );
}
std::u32string normalize( const std::u32string & str )
{
return QString::fromStdU32String( str ).normalized( QString::NormalizationForm_C ).toStdU32String();
}
} // namespace Text

50
src/common/text.hh Normal file
View file

@ -0,0 +1,50 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include <cstdio>
#include <QByteArray>
#include <string>
#include "ex.hh"
/// Facilities to process Text, focusing on Unicode
namespace Text {
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
// Those are possible encodings for .dsl files
enum Encoding {
Utf16LE,
Utf16BE,
Windows1252,
Windows1251,
Windows1250,
Utf8,
Utf32BE,
Utf32LE,
};
std::string toUtf8( std::u32string const & ) noexcept;
std::u32string toUtf32( std::string const & );
/// Since the standard isspace() is locale-specific, we need something
/// that would never mess up our utf8 input. The stock one worked fine under
/// Linux but was messing up strings under Windows.
bool isspace( int c );
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
char const * getEncodingNameFor( Encoding e );
Encoding getEncodingForName( const QByteArray & name );
struct LineFeed
{
int length;
char * lineFeed;
};
LineFeed initLineFeed( Encoding e );
std::u32string removeTrailingZero( std::u32string const & v );
std::u32string removeTrailingZero( QString const & in );
std::u32string normalize( std::u32string const & );
} // namespace Text

View file

@ -1,68 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include <cstdio>
#include <QByteArray>
#include <string>
#include "ex.hh"
#include "wstring.hh"
/// A simple UTF-8 encoder/decoder. Some dictionary backends only require
/// utf8, so we have this separately, removing the iconv dependency for them.
/// Besides, utf8 is quite ubiquitous now, and its use is spreaded over many
/// places.
namespace Utf8 {
// Those are possible encodings for .dsl files
enum Encoding {
Utf16LE,
Utf16BE,
Windows1252,
Windows1251,
Windows1250,
Utf8, // This is an extension. Detected solely by the UTF8 BOM.
Utf32BE,
Utf32LE,
};
using std::string;
using gd::wstring;
using gd::wchar;
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
/// Encodes the given UCS-4 into UTF-8. The inSize specifies the number
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
/// at least inSize * 4 bytes long. The function returns the number of chars
/// stored in the 'out' buffer. The result is not 0-terminated.
size_t encode( wchar const * in, size_t inSize, char * out );
/// Decodes the given UTF-8 into UCS-32. The inSize specifies the number
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
/// function returns -1, otherwise it returns the number of wide characters
/// stored in the 'out' buffer. The result is not 0-terminated.
long decode( char const * in, size_t inSize, wchar * out );
/// Versions for non time-critical code.
string encode( wstring const & ) noexcept;
wstring decode( string const & );
/// Since the standard isspace() is locale-specific, we need something
/// that would never mess up our utf8 input. The stock one worked fine under
/// Linux but was messing up strings under Windows.
bool isspace( int c );
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
char const * getEncodingNameFor( Encoding e );
Encoding getEncodingForName( const QByteArray & name );
struct LineFeed
{
int length;
char * lineFeed;
};
LineFeed initLineFeed( Encoding e );
} // namespace Utf8

View file

@ -1,17 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include <string>
///
/// Aliases for legacy reasons.
///
/// For new code, just use the standardized std::u32string for UTF-32 strings instead.
///
namespace gd {
using wchar = char32_t;
using wstring = std::u32string;
} // namespace gd

View file

@ -1,38 +0,0 @@
#include "wstring_qt.hh"
#include <QList>
namespace gd {
// When convert non-BMP characters to wstring,the ending char maybe \0 .This method remove the tailing \0 from the wstring
// as \0 is sensitive in the index. This method will be only used with index related operations like store/query.
wstring removeTrailingZero( wstring const & v )
{
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
return wstring( v.data(), n );
}
wstring removeTrailingZero( QString const & in )
{
QList< unsigned int > v = in.toUcs4();
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
if ( n != v.size() ) {
v.resize( n );
}
return wstring( (const wchar *)v.constData(), v.size() );
}
wstring normalize( const wstring & str )
{
return QString::fromStdU32String( str ).normalized( QString::NormalizationForm_C ).toStdU32String();
}
} // namespace gd

View file

@ -1,16 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
/// This file adds conversions between gd::wstring and QString. See wstring.hh
/// for more details on gd::wstring.
#include "wstring.hh"
#include <QString>
namespace gd {
wstring removeTrailingZero( wstring const & v );
wstring removeTrailingZero( QString const & in );
wstring normalize( wstring const & );
} // namespace gd

View file

@ -149,6 +149,7 @@ Preferences::Preferences():
doubleClickTranslates( true ),
selectWordBySingleClick( false ),
autoScrollToTargetArticle( true ),
targetArticleAtFirst( false ),
escKeyHidesMainWindow( false ),
alwaysOnTop( false ),
searchInDock( false ),
@ -800,7 +801,7 @@ Class load()
// Upgrading
c.dictServers = makeDefaultDictServers();
}
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
QDomNode ves = root.namedItem( "voiceEngines" );
if ( !ves.isNull() ) {
@ -877,6 +878,11 @@ Class load()
( preferences.namedItem( "autoScrollToTargetArticle" ).toElement().text() == "1" );
}
if ( !preferences.namedItem( "targetArticleAtFirst" ).isNull() ) {
c.preferences.targetArticleAtFirst =
( preferences.namedItem( "targetArticleAtFirst" ).toElement().text() == "1" );
}
if ( !preferences.namedItem( "escKeyHidesMainWindow" ).isNull() ) {
c.preferences.escKeyHidesMainWindow =
( preferences.namedItem( "escKeyHidesMainWindow" ).toElement().text() == "1" );
@ -1684,7 +1690,7 @@ void save( Class const & c )
p.setAttributeNode( icon );
}
}
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
{
QDomNode ves = dd.createElement( "voiceEngines" );
root.appendChild( ves );
@ -1814,6 +1820,10 @@ void save( Class const & c )
opt.appendChild( dd.createTextNode( c.preferences.autoScrollToTargetArticle ? "1" : "0" ) );
preferences.appendChild( opt );
opt = dd.createElement( "targetArticleAtFirst" );
opt.appendChild( dd.createTextNode( c.preferences.targetArticleAtFirst ? "1" : "0" ) );
preferences.appendChild( opt );
opt = dd.createElement( "escKeyHidesMainWindow" );
opt.appendChild( dd.createTextNode( c.preferences.escKeyHidesMainWindow ? "1" : "0" ) );
preferences.appendChild( opt );

View file

@ -297,6 +297,7 @@ struct Preferences
bool doubleClickTranslates;
bool selectWordBySingleClick;
bool autoScrollToTargetArticle;
bool targetArticleAtFirst;
bool escKeyHidesMainWindow;
bool alwaysOnTop;
@ -725,7 +726,7 @@ struct Program
using Programs = QList< Program >;
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
struct VoiceEngine
{
bool enabled;
@ -818,7 +819,7 @@ struct Class
Lingua lingua;
Forvo forvo;
Programs programs;
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
VoiceEngines voiceEngines;
#endif

View file

@ -4,7 +4,7 @@
#include "aard.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "langcoder.hh"
#include "decompress.hh"
@ -29,7 +29,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -216,11 +215,6 @@ public:
~AardDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -241,8 +235,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override;
@ -606,8 +602,8 @@ AardDictionary::getSearchResults( QString const & searchString, int searchMode,
class AardArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
AardDictionary & dict;
bool ignoreDiacritics;
@ -616,8 +612,8 @@ class AardArticleRequest: public Dictionary::DataRequest
public:
AardArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
AardArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
AardDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -661,13 +657,13 @@ void AardArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -698,12 +694,12 @@ void AardArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -719,7 +715,7 @@ void AardArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += "<h3>";
@ -742,9 +738,9 @@ void AardArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > AardDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > AardDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -920,7 +916,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
// Insert new entry
wstring word = Utf8::decode( string( data.data(), wordSize ) );
std::u32string word = Text::toUtf32( string( data.data(), wordSize ) );
if ( maxHeadwordsToExpand && dictHeader.wordsCount >= maxHeadwordsToExpand ) {
indexedWords.addSingleWord( word, articleOffset );
}

View file

@ -11,7 +11,7 @@
#include "htmlescape.hh"
#include "langcoder.hh"
#include "language.hh"
#include "utf8.hh"
#include "text.hh"
#include "utils.hh"
#include <ctype.h>
#include <list>
@ -30,8 +30,6 @@ namespace Bgl {
using std::map;
using std::multimap;
using std::set;
using gd::wstring;
using gd::wchar;
using std::list;
using std::pair;
using std::string;
@ -111,7 +109,7 @@ void trimWs( string & word )
if ( word.size() ) {
unsigned begin = 0;
while ( begin < word.size() && Utf8::isspace( word[ begin ] ) ) {
while ( begin < word.size() && Text::isspace( word[ begin ] ) ) {
++begin;
}
@ -123,7 +121,7 @@ void trimWs( string & word )
// Doesn't consist of ws entirely, so must end with just isspace()
// condition.
while ( Utf8::isspace( word[ end - 1 ] ) ) {
while ( Text::isspace( word[ end - 1 ] ) ) {
--end;
}
@ -137,7 +135,7 @@ void trimWs( string & word )
void addEntryToIndex( string & word,
uint32_t articleOffset,
IndexedWords & indexedWords,
vector< wchar > & wcharBuffer )
vector< char32_t > & wcharBuffer )
{
// Strip any leading or trailing whitespaces
trimWs( word );
@ -159,7 +157,7 @@ void addEntryToIndex( string & word,
}
// Convert the word from utf8 to wide chars
indexedWords.addWord( Utf8::decode( word ), articleOffset );
indexedWords.addWord( Text::toUtf32( word ), articleOffset );
}
class BglDictionary: public BtreeIndexing::BtreeDictionary
@ -173,11 +171,6 @@ public:
BglDictionary( string const & id, string const & indexFile, string const & dictionaryFile );
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -198,10 +191,12 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -392,7 +387,7 @@ void BglDictionary::getArticleText( uint32_t articleAddress, QString & headword,
headword = QString::fromUtf8( headwordStr.data(), headwordStr.size() );
wstring wstr = Utf8::decode( articleStr );
std::u32string wstr = Text::toUtf32( articleStr );
if ( getLangTo() == LangCoder::code2toInt( "he" ) ) {
for ( char32_t & i : wstr ) {
@ -441,7 +436,7 @@ void BglDictionary::makeFTSIndex( QAtomicInt & isCancelled )
class BglHeadwordsRequest: public Dictionary::WordSearchRequest
{
wstring str;
std::u32string str;
BglDictionary & dict;
QAtomicInt isCancelled;
@ -449,7 +444,7 @@ class BglHeadwordsRequest: public Dictionary::WordSearchRequest
public:
BglHeadwordsRequest( wstring const & word_, BglDictionary & dict_ ):
BglHeadwordsRequest( std::u32string const & word_, BglDictionary & dict_ ):
str( word_ ),
dict( dict_ )
{
@ -481,7 +476,7 @@ void BglHeadwordsRequest::run()
vector< WordArticleLink > chain = dict.findArticles( str );
wstring caseFolded = Folding::applySimpleCaseOnly( str );
std::u32string caseFolded = Folding::applySimpleCaseOnly( str );
for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -493,11 +488,11 @@ void BglHeadwordsRequest::run()
dict.loadArticle( x.articleOffset, headword, displayedHeadword, articleText );
wstring headwordDecoded;
std::u32string headwordDecoded;
try {
headwordDecoded = Utf8::decode( removePostfix( headword ) );
headwordDecoded = Text::toUtf32( removePostfix( headword ) );
}
catch ( Utf8::exCantDecode & ) {
catch ( Text::exCantDecode & ) {
}
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) && !headwordDecoded.empty() ) {
@ -512,7 +507,7 @@ void BglHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest > BglDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > BglDictionary::findHeadwordsForSynonym( std::u32string const & word )
{
return synonymSearchEnabled ? std::make_shared< BglHeadwordsRequest >( word, *this ) :
@ -552,8 +547,8 @@ string postfixToSuperscript( string const & in )
class BglArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
BglDictionary & dict;
QAtomicInt isCancelled;
@ -562,8 +557,8 @@ class BglArticleRequest: public Dictionary::DataRequest
public:
BglArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
BglArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
BglDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -595,11 +590,11 @@ public:
void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - convert non-unicode to unicode
{
wstring hebWStr;
std::u32string hebWStr;
try {
hebWStr = Utf8::decode( hebStr );
hebWStr = Text::toUtf32( hebStr );
}
catch ( Utf8::exCantDecode & ) {
catch ( Text::exCantDecode & ) {
hebStr = "Utf-8 decoding error";
return;
}
@ -613,7 +608,7 @@ void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - conv
i += 1488 - 224; // Convert to Hebrew unicode
}
}
hebStr = Utf8::encode( hebWStr );
hebStr = Text::toUtf8( hebWStr );
}
void BglArticleRequest::fixHebArticle( string & hebArticle ) // Hebrew support - remove extra chars at the end
@ -649,7 +644,7 @@ void BglArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
@ -658,7 +653,7 @@ void BglArticleRequest::run()
// the bodies to account for this.
set< QByteArray > articleBodiesIncluded;
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -686,7 +681,7 @@ void BglArticleRequest::run()
// We do the case-folded and postfix-less comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( removePostfix( headword ) );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( removePostfix( headword ) );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
@ -709,7 +704,7 @@ void BglArticleRequest::run()
continue; // Already had this body
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( targetHeadword, articleText ) ) );
@ -730,7 +725,7 @@ void BglArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner();
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -807,9 +802,9 @@ void BglArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > BglDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > BglDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -1090,7 +1085,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
IndexedWords indexedWords;
// We use this buffer to decode utf8 into it.
vector< wchar > wcharBuffer;
vector< char32_t > wcharBuffer;
ChunkedStorage::Writer chunks( idx );

View file

@ -3,11 +3,10 @@
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include "wstring_qt.hh"
#include "utils.hh"
#include <QRegularExpression>
@ -19,8 +18,6 @@
namespace BtreeIndexing {
using gd::wstring;
using gd::wchar;
using std::pair;
enum {
@ -59,14 +56,14 @@ void BtreeIndex::openIndex( IndexInfo const & indexInfo, File::Index & file, QMu
}
vector< WordArticleLink >
BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount )
BtreeIndex::findArticles( std::u32string const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount )
{
//First trim ending zero
wstring word = gd::removeTrailingZero( search_word );
std::u32string word = Text::removeTrailingZero( search_word );
vector< WordArticleLink > result;
try {
wstring folded = Folding::apply( word );
std::u32string folded = Folding::apply( word );
if ( folded.empty() ) {
folded = Folding::applyWhitespaceOnly( word );
}
@ -100,7 +97,7 @@ BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, ui
BtreeWordSearchRequest::BtreeWordSearchRequest( BtreeDictionary & dict_,
wstring const & str_,
std::u32string const & str_,
unsigned minLength_,
int maxSuffixVariation_,
bool allowMiddleMatches_,
@ -137,11 +134,11 @@ void BtreeWordSearchRequest::findMatches()
bool useWildcards = false;
if ( allowMiddleMatches ) {
useWildcards = ( str.find( '*' ) != wstring::npos || str.find( '?' ) != wstring::npos
|| str.find( '[' ) != wstring::npos || str.find( ']' ) != wstring::npos );
useWildcards = ( str.find( '*' ) != std::u32string::npos || str.find( '?' ) != std::u32string::npos
|| str.find( '[' ) != std::u32string::npos || str.find( ']' ) != std::u32string::npos );
}
wstring folded = Folding::apply( str );
std::u32string folded = Folding::apply( str );
int minMatchLength = 0;
@ -154,7 +151,7 @@ void BtreeWordSearchRequest::findMatches()
regexp.setPatternOptions( QRegularExpression::CaseInsensitiveOption );
bool bNoLetters = folded.empty();
wstring foldedWithWildcards;
std::u32string foldedWithWildcards;
if ( bNoLetters ) {
foldedWithWildcards = Folding::applyWhitespaceOnly( str );
@ -268,9 +265,9 @@ void BtreeWordSearchRequest::findMatches()
vector< WordArticleLink > chain = dict.readChain( chainOffset );
wstring chainHead = Utf8::decode( chain[ 0 ].word );
std::u32string chainHead = Text::toUtf32( chain[ 0 ].word );
wstring resultFolded = Folding::apply( chainHead );
std::u32string resultFolded = Folding::apply( chainHead );
if ( resultFolded.empty() ) {
resultFolded = Folding::applyWhitespaceOnly( chainHead );
}
@ -286,9 +283,9 @@ void BtreeWordSearchRequest::findMatches()
break;
}
if ( useWildcards ) {
wstring word = Utf8::decode( x.prefix + x.word );
wstring result = Folding::applyDiacriticsOnly( word );
if ( result.size() >= (wstring::size_type)minMatchLength ) {
std::u32string word = Text::toUtf32( x.prefix + x.word );
std::u32string result = Folding::applyDiacriticsOnly( word );
if ( result.size() >= (std::u32string::size_type)minMatchLength ) {
QRegularExpressionMatch match = regexp.match( QString::fromStdU32String( result ) );
if ( match.hasMatch() && match.capturedStart() == 0 ) {
addMatch( word );
@ -298,10 +295,10 @@ void BtreeWordSearchRequest::findMatches()
else {
// Skip middle matches, if requested. If suffix variation is specified,
// make sure the string isn't larger than requested.
if ( ( allowMiddleMatches || Folding::apply( Utf8::decode( x.prefix ) ).empty() )
if ( ( allowMiddleMatches || Folding::apply( Text::toUtf32( x.prefix ) ).empty() )
&& ( maxSuffixVariation < 0
|| (int)resultFolded.size() - initialFoldedSize <= maxSuffixVariation ) ) {
addMatch( Utf8::decode( x.prefix + x.word ) );
addMatch( Text::toUtf32( x.prefix + x.word ) );
}
}
if ( matches.size() >= maxResults ) {
@ -393,13 +390,14 @@ BtreeWordSearchRequest::~BtreeWordSearchRequest()
f.waitForFinished();
}
sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch( wstring const & str, unsigned long maxResults )
sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch( std::u32string const & str,
unsigned long maxResults )
{
return std::make_shared< BtreeWordSearchRequest >( *this, str, 0, -1, true, maxResults );
}
sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch( wstring const & str,
sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch( std::u32string const & str,
unsigned minLength,
unsigned maxSuffixVariation,
unsigned long maxResults )
@ -437,8 +435,11 @@ void BtreeIndex::readNode( uint32_t offset, vector< char > & out )
}
}
char const * BtreeIndex::findChainOffsetExactOrPrefix(
wstring const & target, bool & exactMatch, vector< char > & extLeaf, uint32_t & nextLeaf, char const *& leafEnd )
char const * BtreeIndex::findChainOffsetExactOrPrefix( std::u32string const & target,
bool & exactMatch,
vector< char > & extLeaf,
uint32_t & nextLeaf,
char const *& leafEnd )
{
if ( !idxFile ) {
throw exIndexWasNotOpened();
@ -449,7 +450,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
// Lookup the index by traversing the index btree
// vector< wchar > wcharBuffer;
wstring w_word;
std::u32string w_word;
exactMatch = false;
// Read a node
@ -530,7 +531,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
size_t wordSize = strlen( closestString );
w_word = Utf8::decode( string( closestString, wordSize ) );
w_word = Text::toUtf32( string( closestString, wordSize ) );
compareResult = target.compare( w_word );
@ -649,9 +650,9 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
size_t wordSize = strlen( ptr );
w_word = Utf8::decode( string( ptr, wordSize ) );
w_word = Text::toUtf32( string( ptr, wordSize ) );
wstring foldedWord = Folding::apply( w_word );
std::u32string foldedWord = Folding::apply( w_word );
if ( foldedWord.empty() ) {
foldedWord = Folding::applyWhitespaceOnly( w_word );
}
@ -750,9 +751,9 @@ vector< WordArticleLink > BtreeIndex::readChain( char const *& ptr, uint32_t max
return result;
}
void BtreeIndex::antialias( wstring const & str, vector< WordArticleLink > & chain, bool ignoreDiacritics )
void BtreeIndex::antialias( std::u32string const & str, vector< WordArticleLink > & chain, bool ignoreDiacritics )
{
wstring caseFolded = Folding::applySimpleCaseOnly( gd::normalize( str ) );
std::u32string caseFolded = Folding::applySimpleCaseOnly( Text::normalize( str ) );
if ( ignoreDiacritics ) {
caseFolded = Folding::applyDiacriticsOnly( caseFolded );
}
@ -764,8 +765,8 @@ void BtreeIndex::antialias( wstring const & str, vector< WordArticleLink > & cha
for ( unsigned x = chain.size(); x--; ) {
// If after applying case folding to each word they wouldn't match, we
// drop the entry.
wstring entry =
Folding::applySimpleCaseOnly( gd::normalize( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ) );
std::u32string entry =
Folding::applySimpleCaseOnly( Text::normalize( Text::toUtf32( chain[ x ].prefix + chain[ x ].word ) ) );
if ( ignoreDiacritics ) {
entry = Folding::applyDiacriticsOnly( entry );
}
@ -923,9 +924,9 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex,
return offset;
}
void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset, unsigned int maxHeadwordSize )
void IndexedWords::addWord( std::u32string const & index_word, uint32_t articleOffset, unsigned int maxHeadwordSize )
{
wstring word = gd::removeTrailingZero( index_word );
std::u32string word = Text::removeTrailingZero( index_word );
string::size_type wordSize = word.size();
// Safeguard us against various bugs here. Don't attempt adding words
@ -945,7 +946,7 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
wordSize = word.size();
}
wchar const * wordBegin = word.c_str();
char32_t const * wordBegin = word.c_str();
// Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
@ -958,7 +959,7 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
--wordSize;
}
wchar const * nextChar = wordBegin;
char32_t const * nextChar = wordBegin;
vector< char > utfBuffer( wordSize * 4 );
@ -970,11 +971,11 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
if ( !*nextChar ) // End of string ends everything
{
if ( wordsAdded == 0 ) {
wstring folded = Folding::applyWhitespaceOnly( wstring( wordBegin, wordSize ) );
std::u32string folded = Folding::applyWhitespaceOnly( std::u32string( wordBegin, wordSize ) );
if ( !folded.empty() ) {
auto i = insert( { Utf8::encode( folded ), vector< WordArticleLink >() } ).first;
auto i = insert( { Text::toUtf8( folded ), vector< WordArticleLink >() } ).first;
string utfWord = Utf8::encode( wstring( wordBegin, wordSize ) );
string utfWord = Text::toUtf8( std::u32string( wordBegin, wordSize ) );
string utfPrefix;
i->second.emplace_back( utfWord, articleOffset, utfPrefix );
}
@ -988,15 +989,15 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
}
// Insert this word
wstring folded = Folding::apply( nextChar );
auto name = Utf8::encode( folded );
std::u32string folded = Folding::apply( nextChar );
auto name = Text::toUtf8( folded );
auto i = insert( { std::move( name ), vector< WordArticleLink >() } ).first;
if ( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches
{
string utfWord = Utf8::encode( wstring( nextChar, wordSize - ( nextChar - wordBegin ) ) );
string utfPrefix = Utf8::encode( wstring( wordBegin, nextChar - wordBegin ) );
string utfWord = Text::toUtf8( std::u32string( nextChar, wordSize - ( nextChar - wordBegin ) ) );
string utfPrefix = Text::toUtf8( std::u32string( wordBegin, nextChar - wordBegin ) );
i->second.emplace_back( std::move( utfWord ), articleOffset, std::move( utfPrefix ) );
// reduce the vector reallocation.
@ -1020,14 +1021,14 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
}
}
void IndexedWords::addSingleWord( wstring const & index_word, uint32_t articleOffset )
void IndexedWords::addSingleWord( std::u32string const & index_word, uint32_t articleOffset )
{
wstring const & word = gd::removeTrailingZero( index_word );
wstring folded = Folding::apply( word );
std::u32string const & word = Text::removeTrailingZero( index_word );
std::u32string folded = Folding::apply( word );
if ( folded.empty() ) {
folded = Folding::applyWhitespaceOnly( word );
}
operator[]( Utf8::encode( folded ) ).emplace_back( Utf8::encode( word ), articleOffset );
operator[]( Text::toUtf8( folded ) ).emplace_back( Text::toUtf8( word ), articleOffset );
}
IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file )

View file

@ -18,7 +18,6 @@
namespace BtreeIndexing {
using std::string;
using gd::wstring;
using std::vector;
using std::map;
@ -80,7 +79,8 @@ public:
/// Finds articles that match the given string. A case-insensitive search
/// is performed.
vector< WordArticleLink > findArticles( wstring const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 );
vector< WordArticleLink >
findArticles( std::u32string const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 );
/// Find all unique article links in the index
void findAllArticleLinks( QList< WordArticleLink > & articleLinks );
@ -116,8 +116,11 @@ protected:
/// case, the returned pointer wouldn't belong to 'leaf' at all. To that end,
/// the leafEnd pointer always holds the pointer to the first byte outside
/// the node data.
char const * findChainOffsetExactOrPrefix(
wstring const & target, bool & exactMatch, vector< char > & leaf, uint32_t & nextLeaf, char const *& leafEnd );
char const * findChainOffsetExactOrPrefix( std::u32string const & target,
bool & exactMatch,
vector< char > & leaf,
uint32_t & nextLeaf,
char const *& leafEnd );
/// Reads a node or leaf at the given offset. Just uncompresses its data
/// to the given vector and does nothing more.
@ -129,7 +132,7 @@ protected:
/// Drops any aliases which arose due to folding. Only case-folded aliases
/// are left.
void antialias( wstring const &, vector< WordArticleLink > &, bool ignoreDiactitics );
void antialias( std::u32string const &, vector< WordArticleLink > &, bool ignoreDiactitics );
protected:
@ -161,10 +164,10 @@ public:
/// This function does the search using the btree index. Derivatives usually
/// need not to implement this function.
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( std::u32string const &, unsigned long );
virtual sptr< Dictionary::WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
stemmedMatch( std::u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
virtual bool isLocalDictionary()
{
@ -210,7 +213,7 @@ class BtreeWordSearchRequest: public Dictionary::WordSearchRequest
{
protected:
BtreeDictionary & dict;
wstring str;
std::u32string str;
unsigned long maxResults;
unsigned minLength;
int maxSuffixVariation;
@ -221,7 +224,7 @@ protected:
public:
BtreeWordSearchRequest( BtreeDictionary & dict_,
wstring const & str_,
std::u32string const & str_,
unsigned minLength_,
int maxSuffixVariation_,
bool allowMiddleMatches_,
@ -251,11 +254,11 @@ struct IndexedWords: public map< string, vector< WordArticleLink > >
/// Instead of adding to the map directly, use this function. It does folding
/// itself, and for phrases/sentences it adds additional entries beginning with
/// each new word.
void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
void addWord( std::u32string const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
/// Differs from addWord() in that it only adds a single entry. We use this
/// for zip's file names.
void addSingleWord( wstring const & word, uint32_t articleOffset );
void addSingleWord( std::u32string const & word, uint32_t articleOffset );
};
/// Builds the index, as a compressed btree. Returns IndexInfo.

View file

@ -4,7 +4,7 @@
#include "dictdfiles.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "dictzip.hh"
#include "htmlescape.hh"
#include "langcoder.hh"
@ -29,7 +29,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using std::vector;
using std::list;
@ -91,11 +90,6 @@ public:
~DictdDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -118,8 +112,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override;
@ -239,9 +235,9 @@ uint32_t decodeBase64( string const & str )
return number;
}
sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > DictdDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -256,13 +252,13 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, string > mainArticles, alternateArticles;
multimap< std::u32string, string > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -382,12 +378,12 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, string > & mapToUse =
multimap< std::u32string, string > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( x.word ), articleText ) );
@ -401,7 +397,7 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
string result;
multimap< wstring, string >::const_iterator i;
multimap< std::u32string, string >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += i->second;
@ -427,7 +423,8 @@ QString const & DictdDictionary::getDescription()
return dictionaryDescription;
}
sptr< Dictionary::DataRequest > req = getArticle( U"00databaseinfo", vector< wstring >(), wstring(), false );
sptr< Dictionary::DataRequest > req =
getArticle( U"00databaseinfo", vector< std::u32string >(), std::u32string(), false );
if ( req->dataSize() > 0 ) {
dictionaryDescription = QString::fromUtf8( req->getFullData().data(), req->getFullData().size() );
@ -634,10 +631,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Handle the forth entry, if it exists. From dictfmt man:
// When --index-keep-orig option is used fourth column is created
// (if necessary) in .index file.
indexedWords.addWord( Utf8::decode( string( tab3 + 1, strlen( tab3 + 1 ) ) ), curOffset );
indexedWords.addWord( Text::toUtf32( string( tab3 + 1, strlen( tab3 + 1 ) ) ), curOffset );
++idxHeader.wordCount;
}
indexedWords.addWord( Utf8::decode( string( buf, strchr( buf, '\t' ) - buf ) ), curOffset );
indexedWords.addWord( Text::toUtf32( string( buf, strchr( buf, '\t' ) - buf ) ), curOffset );
++idxHeader.wordCount;
++idxHeader.articleCount;
@ -662,7 +659,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
eol = articleBody; // No headword itself
}
if ( eol ) {
while ( *eol && Utf8::isspace( *eol ) ) {
while ( *eol && Text::isspace( *eol ) ) {
++eol; // skip spaces
}

View file

@ -177,7 +177,7 @@ void Class::deferredInit()
//base method.
}
sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/,
sptr< WordSearchRequest > Class::stemmedMatch( std::u32string const & /*str*/,
unsigned /*minLength*/,
unsigned /*maxSuffixVariation*/,
unsigned long /*maxResults*/ )
@ -185,12 +185,12 @@ sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/,
return std::make_shared< WordSearchRequestInstant >();
}
sptr< WordSearchRequest > Class::findHeadwordsForSynonym( wstring const & )
sptr< WordSearchRequest > Class::findHeadwordsForSynonym( std::u32string const & )
{
return std::make_shared< WordSearchRequestInstant >();
}
vector< wstring > Class::getAlternateWritings( wstring const & ) noexcept
vector< std::u32string > Class::getAlternateWritings( std::u32string const & ) noexcept
{
return {};
}

View file

@ -19,7 +19,7 @@
#include "langcoder.hh"
#include "sptr.hh"
#include "utils.hh"
#include "wstring.hh"
#include "text.hh"
#include <QtGlobal>
/// Abstract dictionary-related stuff
@ -27,16 +27,8 @@ namespace Dictionary {
using std::vector;
using std::string;
using gd::wstring;
using std::map;
enum Property {
Author,
Copyright,
Description,
Email
};
DEF_EX( Ex, "Dictionary error", std::exception )
DEF_EX( exIndexOutOfRange, "The supplied index is out of range", Ex )
DEF_EX( exSliceOutOfRange, "The requested data slice is out of range", Ex )
@ -131,19 +123,19 @@ private:
/// algorithms. Positive values are used by morphology matches.
struct WordMatch
{
wstring word;
std::u32string word;
int weight;
WordMatch():
weight( 0 )
{
}
WordMatch( wstring const & word_ ):
WordMatch( std::u32string const & word_ ):
word( word_ ),
weight( 0 )
{
}
WordMatch( wstring const & word_, int weight_ ):
WordMatch( std::u32string const & word_, int weight_ ):
word( word_ ),
weight( weight_ )
{
@ -380,10 +372,6 @@ public:
metadata_enable_fts = _enable_FTS;
}
/// Returns all the available properties, like the author's name, copyright,
/// description etc. All strings are in utf8.
virtual map< Property, string > getProperties() noexcept = 0;
/// Returns the features the dictionary possess. See the Feature enum for
/// their list.
virtual Features getFeatures() const noexcept
@ -442,7 +430,7 @@ public:
/// prefix results should be added. Not more than maxResults results should
/// be stored. The whole operation is supposed to be fast, though some
/// dictionaries, the network ones particularly, may of course be slow.
virtual sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) = 0;
virtual sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) = 0;
/// Looks up a given word in the dictionary, aiming to find different forms
/// of the given word by allowing suffix variations. This means allowing words
@ -453,20 +441,20 @@ public:
/// in the middle of a phrase got matched should be returned.
/// The default implementation does nothing, returning an empty result.
virtual sptr< WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
stemmedMatch( std::u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
/// Finds known headwords for the given word, that is, the words for which
/// the given word is a synonym. If a dictionary can't perform this operation,
/// it should leave the default implementation which always returns an empty
/// result.
virtual sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & );
virtual sptr< WordSearchRequest > findHeadwordsForSynonym( std::u32string const & );
/// For a given word, provides alternate writings of it which are to be looked
/// up alongside with it. Transliteration dictionaries implement this. The
/// default implementation returns an empty list. Note that this function is
/// supposed to be very fast and simple, and the results are thus returned
/// synchronously.
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept;
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept;
/// Returns a definition for the given word. The definition should
/// be an html fragment (without html/head/body tags) in an utf8 encoding.
@ -475,10 +463,10 @@ public:
/// synonyms for the main word.
/// context is a dictionary-specific data, currently only used for the
/// 'Websites' feature.
virtual sptr< DataRequest > getArticle( wstring const &,
vector< wstring > const & alts,
wstring const & context = wstring(),
bool ignoreDiacritics = false ) = 0;
virtual sptr< DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const & context = std::u32string(),
bool ignoreDiacritics = false ) = 0;
/// Loads contents of a resource named 'name' into the 'data' vector. This is
/// usually a picture file referenced in the article or something like that.

View file

@ -2,7 +2,6 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "dictserver.hh"
#include "wstring_qt.hh"
#include <QTimer>
#include <QUrl>
#include <QTcpSocket>
@ -303,10 +302,6 @@ public:
disconnectFromServer( socket );
}
map< Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override
{
@ -318,9 +313,10 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
quint32 getLangFrom() const override
{
@ -391,7 +387,7 @@ class DictServerWordSearchRequest: public Dictionary::WordSearchRequest
{
Q_OBJECT
QAtomicInt isCancelled;
wstring word;
std::u32string word;
QString errorString;
DictServerDictionary & dict;
@ -406,7 +402,7 @@ class DictServerWordSearchRequest: public Dictionary::WordSearchRequest
public:
DictServerWordSearchRequest( wstring word_, DictServerDictionary & dict_ ):
DictServerWordSearchRequest( std::u32string word_, DictServerDictionary & dict_ ):
word( std::move( word_ ) ),
dict( dict_ ),
dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-w" ) )
@ -566,7 +562,7 @@ void DictServer::DictServerWordSearchRequest::addMatchedWord( const QString & st
class DictServerArticleRequest: public Dictionary::DataRequest
{
QAtomicInt isCancelled;
wstring word;
std::u32string word;
QString errorString;
DictServerDictionary & dict;
string articleData;
@ -582,7 +578,7 @@ class DictServerArticleRequest: public Dictionary::DataRequest
public:
DictServerImpl * dictImpl;
DictServerArticleRequest( wstring word_, DictServerDictionary & dict_ ):
DictServerArticleRequest( std::u32string word_, DictServerDictionary & dict_ ):
word( std::move( word_ ) ),
dict( dict_ ),
dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-t" ) )
@ -874,7 +870,7 @@ void DictServerArticleRequest::cancel()
finish();
}
sptr< WordSearchRequest > DictServerDictionary::prefixMatch( wstring const & word, unsigned long maxResults )
sptr< WordSearchRequest > DictServerDictionary::prefixMatch( std::u32string const & word, unsigned long maxResults )
{
(void)maxResults;
if ( word.size() > 80 ) {
@ -887,8 +883,10 @@ sptr< WordSearchRequest > DictServerDictionary::prefixMatch( wstring const & wor
}
}
sptr< DataRequest >
DictServerDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
sptr< DataRequest > DictServerDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const &,
std::u32string const &,
bool )
{
if ( word.size() > 80 ) {

View file

@ -5,7 +5,7 @@
#include "dsl_details.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "dictzip.hh"
#include "htmlescape.hh"
@ -13,7 +13,6 @@
#include "filetype.hh"
#include "audiolink.hh"
#include "langcoder.hh"
#include "wstring_qt.hh"
#include "indexedzip.hh"
#include "tiff.hh"
#include "ftshelpers.hh"
@ -44,11 +43,9 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using gd::wchar;
using std::vector;
using std::list;
using Utf8::Encoding;
using Text::Encoding;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -100,8 +97,8 @@ struct InsidedCard
{
uint32_t offset;
uint32_t size;
QList< wstring > headwords;
InsidedCard( uint32_t _offset, uint32_t _size, QList< wstring > const & words ):
QList< std::u32string > headwords;
InsidedCard( uint32_t _offset, uint32_t _size, QList< std::u32string > const & words ):
offset( _offset ),
size( _size ),
headwords( words )
@ -144,7 +141,7 @@ class DslDictionary: public BtreeIndexing::BtreeDictionary
int optionalPartNom;
quint8 articleNom;
wstring currentHeadword;
std::u32string currentHeadword;
string resourceDir1, resourceDir2;
public:
@ -156,11 +153,6 @@ public:
~DslDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -192,8 +184,10 @@ public:
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -237,15 +231,15 @@ private:
/// Loads the article. Does not process the DSL language.
void loadArticle( uint32_t address,
wstring const & requestedHeadwordFolded,
std::u32string const & requestedHeadwordFolded,
bool ignoreDiacritics,
wstring & tildeValue,
wstring & displayedHeadword,
std::u32string & tildeValue,
std::u32string & displayedHeadword,
unsigned & headwordIndex,
wstring & articleText );
std::u32string & articleText );
/// Converts DSL language to an Html.
string dslToHtml( wstring const &, wstring const & headword = wstring() );
string dslToHtml( std::u32string const &, std::u32string const & headword = std::u32string() );
// Parts of dslToHtml()
string nodeToHtml( ArticleDom::Node const & );
@ -457,7 +451,7 @@ void DslDictionary::loadIcon() noexcept
/// so nbsp is not a whitespace character for Dsl compiler.
/// For now we have only space and tab, since those are most likely the only
/// ones recognized as spaces by that compiler.
bool isDslWs( wchar ch )
bool isDslWs( char32_t ch )
{
switch ( ch ) {
case ' ':
@ -469,14 +463,14 @@ bool isDslWs( wchar ch )
}
void DslDictionary::loadArticle( uint32_t address,
wstring const & requestedHeadwordFolded,
std::u32string const & requestedHeadwordFolded,
bool ignoreDiacritics,
wstring & tildeValue,
wstring & displayedHeadword,
std::u32string & tildeValue,
std::u32string & displayedHeadword,
unsigned & headwordIndex,
wstring & articleText )
std::u32string & articleText )
{
wstring articleData;
std::u32string articleData;
{
vector< char > chunk;
@ -512,7 +506,7 @@ void DslDictionary::loadArticle( uint32_t address,
else {
try {
articleData =
Iconv::toWstring( Utf8::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
Iconv::toWstring( Text::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
free( articleBody );
// Strip DSL comments
@ -533,27 +527,27 @@ void DslDictionary::loadArticle( uint32_t address,
// Check is we retrieve insided card
bool insidedCard = isDslWs( articleData.at( 0 ) );
wstring tildeValueWithUnsorted; // This one has unsorted parts left
std::u32string tildeValueWithUnsorted; // This one has unsorted parts left
for ( headwordIndex = 0;; ) {
size_t begin = pos;
pos = articleData.find_first_of( U"\n\r", begin );
if ( pos == wstring::npos ) {
if ( pos == std::u32string::npos ) {
pos = articleData.size();
}
if ( !foundDisplayedHeadword ) {
// Process the headword
wstring rawHeadword = wstring( articleData, begin, pos - begin );
std::u32string rawHeadword = std::u32string( articleData, begin, pos - begin );
if ( insidedCard && !rawHeadword.empty() && isDslWs( rawHeadword[ 0 ] ) ) {
// Headword of the insided card
wstring::size_type hpos = rawHeadword.find( L'@' );
std::u32string::size_type hpos = rawHeadword.find( L'@' );
if ( hpos != string::npos ) {
wstring head = Folding::trimWhitespace( rawHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' );
std::u32string head = Folding::trimWhitespace( rawHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' );
while ( hpos != string::npos ) {
if ( hpos == 0 || head[ hpos ] != L'\\' ) {
break;
@ -574,7 +568,7 @@ void DslDictionary::loadArticle( uint32_t address,
// We need our tilde expansion value
tildeValue = rawHeadword;
list< wstring > lst;
list< std::u32string > lst;
expandOptionalParts( tildeValue, &lst );
@ -586,7 +580,7 @@ void DslDictionary::loadArticle( uint32_t address,
processUnsortedParts( tildeValue, false );
}
wstring str = rawHeadword;
std::u32string str = rawHeadword;
if ( hadFirstHeadword ) {
expandTildes( str, tildeValueWithUnsorted );
@ -596,7 +590,7 @@ void DslDictionary::loadArticle( uint32_t address,
str = Folding::applySimpleCaseOnly( str );
list< wstring > lst;
list< std::u32string > lst;
expandOptionalParts( str, &lst );
// Does one of the results match the requested word? If so, we'd choose
@ -662,15 +656,15 @@ void DslDictionary::loadArticle( uint32_t address,
// Check for begin article text
if ( insidedCard ) {
// Check for next insided headword
wstring::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == wstring::npos ) {
std::u32string::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == std::u32string::npos ) {
hpos = articleData.size();
}
wstring str = wstring( articleData, pos, hpos - pos );
std::u32string str = std::u32string( articleData, pos, hpos - pos );
hpos = str.find( L'@' );
if ( hpos == wstring::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
if ( hpos == std::u32string::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
break;
}
}
@ -692,18 +686,18 @@ void DslDictionary::loadArticle( uint32_t address,
}
if ( pos != articleData.size() ) {
articleText = wstring( articleData, pos );
articleText = std::u32string( articleData, pos );
}
else {
articleText.clear();
}
}
string DslDictionary::dslToHtml( wstring const & str, wstring const & headword )
string DslDictionary::dslToHtml( std::u32string const & str, std::u32string const & headword )
{
// Normalize the string
wstring normalizedStr = gd::normalize( str );
currentHeadword = headword;
std::u32string normalizedStr = Text::normalize( str );
currentHeadword = headword;
ArticleDom dom( normalizedStr, getName(), headword );
@ -738,7 +732,7 @@ string DslDictionary::getNodeLink( ArticleDom::Node const & node )
}
}
if ( link.empty() ) {
link = Html::escape( Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false ) );
link = Html::escape( Filetype::simplifyString( Text::toUtf8( node.renderAsText() ), false ) );
}
return link;
@ -749,7 +743,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
string result;
if ( !node.isTag ) {
result = Html::escape( Utf8::encode( node.text ) );
result = Html::escape( Text::toUtf8( node.text ) );
// Handle all end-of-line
@ -789,7 +783,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<span class=\"c_default_color\">" + processNodeChildren( node ) + "</span>";
}
else {
result += "<font color=\"" + Html::escape( Utf8::encode( node.tagAttrs ) ) + "\">" + processNodeChildren( node )
result += "<font color=\"" + Html::escape( Text::toUtf8( node.tagAttrs ) ) + "\">" + processNodeChildren( node )
+ "</font>";
}
}
@ -802,7 +796,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<div class=\"dsl_m\">" + processNodeChildren( node ) + "</div>";
}
else if ( node.tagName.size() == 2 && node.tagName[ 0 ] == L'm' && iswdigit( node.tagName[ 1 ] ) ) {
result += "<div class=\"dsl_" + Utf8::encode( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>";
result += "<div class=\"dsl_" + Text::toUtf8( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>";
}
else if ( node.tagName == U"trn" ) {
result += "<span class=\"dsl_trn\">" + processNodeChildren( node ) + "</span>";
@ -814,7 +808,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<span class=\"dsl_com\">" + processNodeChildren( node ) + "</span>";
}
else if ( node.tagName == U"s" || node.tagName == U"video" ) {
string filename = Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false );
string filename = Filetype::simplifyString( Text::toUtf8( node.renderAsText() ), false );
string n = resourceDir1 + filename;
if ( Filetype::isNameOfSound( filename ) ) {
@ -893,7 +887,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
else if ( node.tagName == U"p" ) {
result += "<span class=\"dsl_p\"";
string val = Utf8::encode( node.renderAsText() );
string val = Text::toUtf8( node.renderAsText() );
// If we have such a key, display a title
@ -913,7 +907,8 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
// user could pick up the best suitable option.
string data = processNodeChildren( node );
result += R"(<span class="dsl_stress"><span class="dsl_stress_without_accent">)" + data + "</span>"
+ "<span class=\"dsl_stress_with_accent\">" + data + Utf8::encode( wstring( 1, 0x301 ) ) + "</span></span>";
+ "<span class=\"dsl_stress_with_accent\">" + data + Text::toUtf8( std::u32string( 1, 0x301 ) )
+ "</span></span>";
}
else if ( node.tagName == U"lang" ) {
result += "<span class=\"dsl_lang\"";
@ -949,7 +944,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
url.setScheme( "gdlookup" );
url.setHost( "localhost" );
auto nodeStr = Utf8::decode( getNodeLink( node ) );
auto nodeStr = Text::toUtf32( getNodeLink( node ) );
normalizeHeadword( nodeStr );
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) );
@ -973,7 +968,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
url.setScheme( "gdlookup" );
url.setHost( "localhost" );
wstring nodeStr = node.renderAsText();
std::u32string nodeStr = node.renderAsText();
normalizeHeadword( nodeStr );
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) );
@ -1125,7 +1120,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
vector< char > chunk;
char * articleProps;
wstring articleData;
std::u32string articleData;
{
QMutexLocker _( &idxMutex );
@ -1166,7 +1161,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
// Skip headword
size_t pos = 0;
wstring articleHeadword, tildeValue;
std::u32string articleHeadword, tildeValue;
// Check if we retrieve insided card
bool insidedCard = isDslWs( articleData.at( 0 ) );
@ -1175,20 +1170,20 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
size_t begin = pos;
pos = articleData.find_first_of( U"\n\r", begin );
if ( pos == wstring::npos ) {
if ( pos == std::u32string::npos ) {
pos = articleData.size();
}
if ( articleHeadword.empty() ) {
// Process the headword
articleHeadword = wstring( articleData, begin, pos - begin );
articleHeadword = std::u32string( articleData, begin, pos - begin );
if ( insidedCard && !articleHeadword.empty() && isDslWs( articleHeadword[ 0 ] ) ) {
// Headword of the insided card
wstring::size_type hpos = articleHeadword.find( L'@' );
std::u32string::size_type hpos = articleHeadword.find( L'@' );
if ( hpos != string::npos ) {
wstring head = Folding::trimWhitespace( articleHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' );
std::u32string head = Folding::trimWhitespace( articleHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' );
while ( hpos != string::npos ) {
if ( hpos == 0 || head[ hpos ] != L'\\' ) {
break;
@ -1205,7 +1200,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
}
if ( !articleHeadword.empty() ) {
list< wstring > lst;
list< std::u32string > lst;
tildeValue = articleHeadword;
@ -1242,15 +1237,15 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
// Check for begin article text
if ( insidedCard ) {
// Check for next insided headword
wstring::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == wstring::npos ) {
std::u32string::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == std::u32string::npos ) {
hpos = articleData.size();
}
wstring str = wstring( articleData, pos, hpos - pos );
std::u32string str = std::u32string( articleData, pos, hpos - pos );
hpos = str.find( L'@' );
if ( hpos == wstring::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
if ( hpos == std::u32string::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
break;
}
}
@ -1266,17 +1261,17 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
headword = QString::fromStdU32String( articleHeadword );
}
wstring articleText;
std::u32string articleText;
if ( pos != articleData.size() ) {
articleText = wstring( articleData, pos );
articleText = std::u32string( articleData, pos );
}
else {
articleText.clear();
}
if ( !tildeValue.empty() ) {
list< wstring > lst;
list< std::u32string > lst;
processUnsortedParts( tildeValue, false );
expandOptionalParts( tildeValue, &lst );
@ -1382,8 +1377,8 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
class DslArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
DslDictionary & dict;
bool ignoreDiacritics;
@ -1392,8 +1387,8 @@ class DslArticleRequest: public Dictionary::DataRequest
public:
DslArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
DslArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
DslDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -1449,7 +1444,7 @@ void DslArticleRequest::run()
// index here.
set< pair< uint32_t, unsigned > > articlesIncluded;
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) {
// Check if we're cancelled occasionally
@ -1460,9 +1455,9 @@ void DslArticleRequest::run()
// Grab that article
wstring tildeValue;
wstring displayedHeadword;
wstring articleBody;
std::u32string tildeValue;
std::u32string displayedHeadword;
std::u32string articleBody;
unsigned headwordIndex;
string articleText, articleAfter;
@ -1546,9 +1541,9 @@ void DslArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > DslDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > DslDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -1637,7 +1632,7 @@ void DslResourceRequest::run()
if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive
}
}
@ -1766,7 +1761,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
// Building the index
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) );
initializing.indexingDictionary( Text::toUtf8( scanner.getDictionaryName() ) );
qDebug( "Dsl: Building the index for dictionary: %s",
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
@ -1782,12 +1777,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( idxHeader );
string dictionaryName = Utf8::encode( scanner.getDictionaryName() );
string dictionaryName = Text::toUtf8( scanner.getDictionaryName() );
idx.write( (uint32_t)dictionaryName.size() );
idx.write( dictionaryName.data(), dictionaryName.size() );
string soundDictName = Utf8::encode( scanner.getSoundDictionaryName() );
string soundDictName = Text::toUtf8( scanner.getSoundDictionaryName() );
if ( !soundDictName.empty() ) {
idxHeader.hasSoundDictionaryName = 1;
idx.write( (uint32_t)soundDictName.size() );
@ -1808,7 +1803,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
map< string, string > abrv;
wstring curString;
std::u32string curString;
size_t curOffset;
for ( ;; ) {
@ -1820,7 +1815,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
continue;
}
list< wstring > keys;
list< std::u32string > keys;
bool eof = false;
@ -1856,13 +1851,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
// If the string has any dsl markup, we strip it
string value = Utf8::encode( ArticleDom( curString ).root.renderAsText() );
string value = Text::toUtf8( ArticleDom( curString ).root.renderAsText() );
for ( auto & key : keys ) {
unescapeDsl( key );
normalizeHeadword( key );
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
}
}
@ -1890,7 +1885,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
bool hasString = false;
wstring curString;
std::u32string curString;
size_t curOffset;
uint32_t articleCount = 0, wordCount = 0;
@ -1924,7 +1919,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Ok, got the headword
list< wstring > allEntryWords;
list< std::u32string > allEntryWords;
processUnsortedParts( curString, true );
expandOptionalParts( curString, &allEntryWords );
@ -1977,10 +1972,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
wordCount += allEntryWords.size();
int insideInsided = 0;
wstring headword;
std::u32string headword;
QList< InsidedCard > insidedCards;
uint32_t offset = curOffset;
QList< wstring > insidedHeadwords;
QList< std::u32string > insidedHeadwords;
unsigned linesInsideCard = 0;
int dogLine = 0;
bool wasEmptyLine = false;
@ -2023,8 +2018,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Find embedded cards
wstring::size_type n = curString.find( L'@' );
if ( n == wstring::npos || curString[ n - 1 ] == L'\\' ) {
std::u32string::size_type n = curString.find( L'@' );
if ( n == std::u32string::npos || curString[ n - 1 ] == L'\\' ) {
if ( insideInsided ) {
linesInsideCard++;
}

View file

@ -6,7 +6,7 @@
#include "folding.hh"
#include "langcoder.hh"
#include "ufile.hh"
#include "utf8.hh"
#include "text.hh"
#include <exception>
#include <stdio.h>
@ -17,9 +17,8 @@
namespace Dsl {
namespace Details {
using gd::wstring;
using std::list;
using Utf8::Encoding;
using Text::Encoding;
static QMap< int, string > lang_codes = {
{ 1, "en" }, { 1033, "en" }, { 2, "ru" }, { 1049, "ru" }, { 1068, "az" }, { 1025, "ar" }, { 1067, "am" },
@ -40,7 +39,7 @@ string findCodeForDslId( int id )
return lang_codes[ id ];
}
bool isAtSignFirst( wstring const & str )
bool isAtSignFirst( std::u32string const & str )
{
// Test if '@' is first in string except spaces and dsl tags
QRegularExpression reg( R"([ \t]*(?:\[[^\]]+\][ \t]*)*@)", QRegularExpression::PatternOption::CaseInsensitiveOption );
@ -49,13 +48,13 @@ bool isAtSignFirst( wstring const & str )
/////////////// ArticleDom
wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
std::u32string ArticleDom::Node::renderAsText( bool stripTrsTag ) const
{
if ( !isTag ) {
return text;
}
wstring result;
std::u32string result;
for ( const auto & i : *this ) {
if ( !stripTrsTag || i.tagName != U"!trs" ) {
@ -69,17 +68,17 @@ wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
namespace {
/// @return true if @p tagName equals "mN" where N is a digit
bool is_mN( wstring const & tagName )
bool is_mN( std::u32string const & tagName )
{
return tagName.size() == 2 && tagName[ 0 ] == U'm' && iswdigit( tagName[ 1 ] );
}
bool isAnyM( wstring const & tagName )
bool isAnyM( std::u32string const & tagName )
{
return tagName == U"m" || is_mN( tagName );
}
bool checkM( wstring const & dest, wstring const & src )
bool checkM( std::u32string const & dest, std::u32string const & src )
{
return src == U"m" && is_mN( dest );
}
@ -97,8 +96,8 @@ struct MustTagBeClosed
} // unnamed namespace
ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring const & headword_ ):
root( Node::Tag(), wstring(), wstring() ),
ArticleDom::ArticleDom( std::u32string const & str, string const & dictName, std::u32string const & headword_ ):
root( Node::Tag(), std::u32string(), std::u32string() ),
stringPos( str.c_str() ),
lineStartPos( str.c_str() ),
transcriptionCount( 0 ),
@ -126,7 +125,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
}
else {
// Insided card
wstring linkTo;
std::u32string linkTo;
nextChar();
for ( ;; nextChar() ) {
if ( ch == L'\n' ) {
@ -142,13 +141,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
linkTo = Folding::trimWhitespace( linkTo );
if ( !linkTo.empty() ) {
list< wstring > allLinkEntries;
list< std::u32string > allLinkEntries;
processUnsortedParts( linkTo, true );
expandOptionalParts( linkTo, &allLinkEntries );
for ( auto entry = allLinkEntries.begin(); entry != allLinkEntries.end(); ) {
if ( !textNode ) {
Node text = Node( Node::Text(), wstring() );
Node text = Node( Node::Text(), std::u32string() );
if ( stack.empty() ) {
root.push_back( text );
@ -168,10 +167,10 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
stack.pop_back();
textNode = 0;
wstring linkText = Folding::trimWhitespace( *entry );
std::u32string linkText = Folding::trimWhitespace( *entry );
ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), U"@", wstring() );
Node link( Node::Tag(), U"@", std::u32string() );
for ( auto & n : nodeDom.root ) {
link.push_back( n );
}
@ -181,13 +180,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
if ( stack.empty() ) {
root.push_back( link );
if ( entry != allLinkEntries.end() ) { // Add line break before next entry
root.push_back( Node( Node::Tag(), U"br", wstring() ) );
root.push_back( Node( Node::Tag(), U"br", std::u32string() ) );
}
}
else {
stack.back()->push_back( link );
if ( entry != allLinkEntries.end() ) {
stack.back()->push_back( Node( Node::Tag(), U"br", wstring() ) );
stack.back()->push_back( Node( Node::Tag(), U"br", std::u32string() ) );
}
}
}
@ -208,8 +207,8 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
if ( ch == L'[' && !escaped ) {
// Beginning of a tag.
bool isClosing;
wstring name;
wstring attrs;
std::u32string name;
std::u32string attrs;
try {
do {
@ -330,7 +329,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
nextChar();
} while ( Folding::isWhitespace( ch ) );
wstring linkTo, linkText;
std::u32string linkTo, linkText;
for ( ;; nextChar() ) {
// Is it the end?
@ -373,7 +372,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
processUnsortedParts( linkText, true );
ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), U"ref", wstring() );
Node link( Node::Tag(), U"ref", std::u32string() );
for ( auto & n : nodeDom.root ) {
link.push_back( n );
}
@ -427,7 +426,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
// If there's currently no text node, open one
if ( !textNode ) {
Node text = Node( Node::Text(), wstring() );
Node text = Node( Node::Text(), std::u32string() );
if ( stack.empty() ) {
root.push_back( text );
@ -691,7 +690,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
}
}
void ArticleDom::openTag( wstring const & name, wstring const & attrs, list< Node * > & stack )
void ArticleDom::openTag( std::u32string const & name, std::u32string const & attrs, list< Node * > & stack )
{
list< Node > nodesToReopen;
@ -746,7 +745,7 @@ void ArticleDom::openTag( wstring const & name, wstring const & attrs, list< Nod
}
}
void ArticleDom::closeTag( wstring const & name, list< Node * > & stack, bool warn )
void ArticleDom::closeTag( std::u32string const & name, list< Node * > & stack, bool warn )
{
// Find the tag which is to be closed
@ -839,13 +838,13 @@ bool ArticleDom::atSignFirstInLine()
return true;
}
return isAtSignFirst( wstring( lineStartPos ) );
return isAtSignFirst( std::u32string( lineStartPos ) );
}
/////////////// DslScanner
DslScanner::DslScanner( string const & fileName ):
encoding( Utf8::Utf8 ),
encoding( Text::Utf8 ),
readBufferPtr( readBuffer ),
readBufferLeft( 0 ),
linesRead( 0 )
@ -876,19 +875,19 @@ DslScanner::DslScanner( string const & fileName ):
guessedEncoding.has_value() ) {
switch ( guessedEncoding.value() ) {
case QStringConverter::Utf8:
encoding = Utf8::Utf8;
encoding = Text::Utf8;
break;
case QStringConverter::Utf16LE:
encoding = Utf8::Utf16LE;
encoding = Text::Utf16LE;
break;
case QStringConverter::Utf16BE:
encoding = Utf8::Utf16BE;
encoding = Text::Utf16BE;
break;
case QStringConverter::Utf32LE:
encoding = Utf8::Utf16LE;
encoding = Text::Utf16LE;
break;
case QStringConverter::Utf32BE:
encoding = Utf8::Utf32BE;
encoding = Text::Utf32BE;
break;
default:
break;
@ -905,10 +904,10 @@ DslScanner::DslScanner( string const & fileName ):
}
//iconv.reinit( encoding );
lineFeed = Utf8::initLineFeed( encoding );
lineFeed = Text::initLineFeed( encoding );
// We now can use our own readNextLine() function
wstring str;
std::u32string str;
size_t offset;
for ( ;; ) {
@ -946,7 +945,7 @@ DslScanner::DslScanner( string const & fileName ):
size_t beg = str.find_first_of( L'"' );
if ( beg == wstring::npos ) {
if ( beg == std::u32string::npos ) {
throw exMalformedDslFile( fileName );
}
@ -956,7 +955,7 @@ DslScanner::DslScanner( string const & fileName ):
throw exMalformedDslFile( fileName );
}
wstring arg( str, beg + 1, end - beg - 1 );
std::u32string arg( str, beg + 1, end - beg - 1 );
if ( isName ) {
dictionaryName = arg;
@ -977,13 +976,13 @@ DslScanner::DslScanner( string const & fileName ):
qWarning( "Warning: encoding was specified in a Unicode file, ignoring." );
}
else if ( !arg.compare( U"Latin" ) ) {
encoding = Utf8::Windows1252;
encoding = Text::Windows1252;
}
else if ( !arg.compare( U"Cyrillic" ) ) {
encoding = Utf8::Windows1251;
encoding = Text::Windows1251;
}
else if ( !arg.compare( U"EasternEuropean" ) ) {
encoding = Utf8::Windows1250;
encoding = Text::Windows1250;
}
else {
gzclose( f );
@ -1009,7 +1008,7 @@ DslScanner::~DslScanner() noexcept
gzclose( f );
}
bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_word )
bool DslScanner::readNextLine( std::u32string & out, size_t & offset, bool only_head_word )
{
offset = gztell( f ) - readBufferLeft /*+pos*/;
@ -1036,7 +1035,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
return false;
}
int pos = Utf8::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
int pos = Text::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
if ( pos == -1 ) {
return false;
}
@ -1057,9 +1056,9 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
}
}
bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset, bool only_headword )
bool DslScanner::readNextLineWithoutComments( std::u32string & out, size_t & offset, bool only_headword )
{
wstring str;
std::u32string str;
bool commentToNextLine = false;
size_t currentOffset;
@ -1087,14 +1086,14 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset, bo
/////////////// DslScanner
void processUnsortedParts( wstring & str, bool strip )
void processUnsortedParts( std::u32string & str, bool strip )
{
int refCount = 0;
size_t startPos = 0;
for ( size_t x = 0; x < str.size(); ) {
wchar ch = str[ x ];
char32_t ch = str[ x ];
if ( ch == L'\\' ) {
// Escape code
@ -1150,18 +1149,18 @@ void processUnsortedParts( wstring & str, bool strip )
}
}
void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, bool inside_recurse )
void expandOptionalParts( std::u32string & str, list< std::u32string > * result, size_t x, bool inside_recurse )
{
// if str is too long ,it can never be headwords.
if ( str.size() > 100 ) {
return;
}
list< wstring > expanded;
list< wstring > * headwords;
list< std::u32string > expanded;
list< std::u32string > * headwords;
headwords = inside_recurse ? result : &expanded;
for ( ; x < str.size(); ) {
wchar ch = str[ x ];
char32_t ch = str[ x ];
if ( ch == L'\\' ) {
// Escape code
@ -1174,7 +1173,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
int refCount = 1;
for ( size_t y = x + 1; y < str.size(); ++y ) {
wchar ch = str[ y ];
char32_t ch = str[ y ];
if ( ch == L'\\' ) {
// Escape code
@ -1190,7 +1189,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
if ( y != x + 1 ) // Only do for non-empty cases
{
wstring removed( str, 0, x );
std::u32string removed( str, 0, x );
removed.append( str, y + 1, str.size() - y - 1 );
expandOptionalParts( removed, headwords, x, true );
@ -1204,7 +1203,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
if ( refCount && x != str.size() - 1 ) {
// Closing paren not found? Chop it.
wstring removed( str, 0, x );
std::u32string removed( str, 0, x );
// Limit the amount of results to avoid excessive resource consumption
if ( headwords->size() < 32 ) {
@ -1242,10 +1241,10 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
}
}
static const wstring openBraces( U"{{" );
static const wstring closeBraces( U"}}" );
static const std::u32string openBraces( U"{{" );
static const std::u32string closeBraces( U"}}" );
void stripComments( wstring & str, bool & nextLine )
void stripComments( std::u32string & str, bool & nextLine )
{
string::size_type n = 0, n2 = 0;
@ -1269,9 +1268,9 @@ void stripComments( wstring & str, bool & nextLine )
}
}
void expandTildes( wstring & str, wstring const & tildeReplacement )
void expandTildes( std::u32string & str, std::u32string const & tildeReplacement )
{
wstring tildeValue = Folding::trimWhitespace( tildeReplacement );
std::u32string tildeValue = Folding::trimWhitespace( tildeReplacement );
for ( size_t x = 0; x < str.size(); ) {
if ( str[ x ] == L'\\' ) {
x += 2;
@ -1294,7 +1293,7 @@ void expandTildes( wstring & str, wstring const & tildeReplacement )
}
}
void unescapeDsl( wstring & str )
void unescapeDsl( std::u32string & str )
{
for ( size_t x = 0; x < str.size(); ++x ) {
if ( str[ x ] == L'\\' ) {
@ -1303,7 +1302,7 @@ void unescapeDsl( wstring & str )
}
}
void normalizeHeadword( wstring & str )
void normalizeHeadword( std::u32string & str )
{
for ( size_t x = str.size(); x-- > 1; ) // >1 -- Don't test the first char
{
@ -1331,7 +1330,7 @@ void normalizeHeadword( wstring & str )
}
namespace {
void cutEnding( wstring & where, wstring const & ending )
void cutEnding( std::u32string & where, std::u32string const & ending )
{
if ( where.size() > ending.size() && where.compare( where.size() - ending.size(), ending.size(), ending ) == 0 ) {
where.erase( where.size() - ending.size() );
@ -1339,17 +1338,17 @@ void cutEnding( wstring & where, wstring const & ending )
}
} // namespace
quint32 dslLanguageToId( wstring const & name )
quint32 dslLanguageToId( std::u32string const & name )
{
static wstring newSp( U"newspelling" );
static wstring st( U"standard" );
static wstring ms( U"modernsort" );
static wstring ts( U"traditionalsort" );
static wstring prc( U"prc" );
static std::u32string newSp( U"newspelling" );
static std::u32string st( U"standard" );
static std::u32string ms( U"modernsort" );
static std::u32string ts( U"traditionalsort" );
static std::u32string prc( U"prc" );
// Any of those endings are to be removed
wstring nameStripped = Folding::apply( name );
std::u32string nameStripped = Folding::apply( name );
cutEnding( nameStripped, newSp );
cutEnding( nameStripped, st );

View file

@ -11,23 +11,21 @@
#include "iconv.hh"
#include <QtCore5Compat/QTextCodec>
#include <QByteArray>
#include "utf8.hh"
#include "text.hh"
// Implementation details for Dsl, not part of its interface
namespace Dsl {
namespace Details {
using std::string;
using gd::wstring;
using gd::wchar;
using std::list;
using std::vector;
using Utf8::Encoding;
using Utf8::LineFeed;
using Text::Encoding;
using Text::LineFeed;
string findCodeForDslId( int id );
bool isAtSignFirst( wstring const & str );
bool isAtSignFirst( std::u32string const & str );
/// Parses the DSL language, representing it in its structural DOM form.
struct ArticleDom
@ -37,23 +35,23 @@ struct ArticleDom
bool isTag; // true if it is a tag with subnodes, false if it's a leaf text
// data.
// Those are only used if isTag is true
wstring tagName;
wstring tagAttrs;
wstring text; // This is only used if isTag is false
std::u32string tagName;
std::u32string tagAttrs;
std::u32string text; // This is only used if isTag is false
class Text
{};
class Tag
{};
Node( Tag, wstring const & name, wstring const & attrs ):
Node( Tag, std::u32string const & name, std::u32string const & attrs ):
isTag( true ),
tagName( name ),
tagAttrs( attrs )
{
}
Node( Text, wstring const & text_ ):
Node( Text, std::u32string const & text_ ):
isTag( false ),
text( text_ )
{
@ -61,30 +59,32 @@ struct ArticleDom
/// Concatenates all childen text nodes recursively to form all text
/// the node contains stripped of any markup.
wstring renderAsText( bool stripTrsTag = false ) const;
std::u32string renderAsText( bool stripTrsTag = false ) const;
};
/// Does the parse at construction. Refer to the 'root' member variable
/// afterwards.
explicit ArticleDom( wstring const &, string const & dictName = string(), wstring const & headword_ = wstring() );
explicit ArticleDom( std::u32string const &,
string const & dictName = string(),
std::u32string const & headword_ = std::u32string() );
/// Root of DOM's tree
Node root;
private:
void openTag( wstring const & name, wstring const & attr, list< Node * > & stack );
void openTag( std::u32string const & name, std::u32string const & attr, list< Node * > & stack );
void closeTag( wstring const & name, list< Node * > & stack, bool warn = true );
void closeTag( std::u32string const & name, list< Node * > & stack, bool warn = true );
bool atSignFirstInLine();
wchar const *stringPos, *lineStartPos;
char32_t const *stringPos, *lineStartPos;
class eot: std::exception
{};
wchar ch;
char32_t ch;
bool escaped;
unsigned transcriptionCount; // >0 = inside a [t] tag
unsigned mediaCount; // >0 = inside a [s] tag
@ -93,7 +93,7 @@ private:
/// Information for diagnostic purposes
string dictionaryName;
wstring headword;
std::u32string headword;
};
/// Opens the .dsl or .dsl.dz file and allows line-by-line reading. Auto-detects
@ -103,9 +103,9 @@ class DslScanner
gzFile f;
Encoding encoding;
QTextCodec * codec;
wstring dictionaryName;
wstring langFrom, langTo;
wstring soundDictionary;
std::u32string dictionaryName;
std::u32string langFrom, langTo;
std::u32string soundDictionary;
char readBuffer[ 65536 ];
char * readBufferPtr;
LineFeed lineFeed;
@ -132,25 +132,25 @@ public:
}
/// Returns the dictionary's name, as was read from file's headers.
wstring const & getDictionaryName() const
std::u32string const & getDictionaryName() const
{
return dictionaryName;
}
/// Returns the dictionary's source language, as was read from file's headers.
wstring const & getLangFrom() const
std::u32string const & getLangFrom() const
{
return langFrom;
}
/// Returns the dictionary's target language, as was read from file's headers.
wstring const & getLangTo() const
std::u32string const & getLangTo() const
{
return langTo;
}
/// Returns the preferred external dictionary with sounds, as was read from file's headers.
wstring const & getSoundDictionaryName() const
std::u32string const & getSoundDictionaryName() const
{
return soundDictionary;
}
@ -161,10 +161,10 @@ public:
/// If end of file is reached, false is returned.
/// Reading begins from the first line after the headers (ones which start
/// with #).
bool readNextLine( wstring &, size_t & offset, bool only_head_word = false );
bool readNextLine( std::u32string &, size_t & offset, bool only_head_word = false );
/// Similar readNextLine but strip all DSL comments {{...}}
bool readNextLineWithoutComments( wstring &, size_t & offset, bool only_headword = false );
bool readNextLineWithoutComments( std::u32string &, size_t & offset, bool only_headword = false );
/// Returns the number of lines read so far from the file.
unsigned getLinesRead() const
@ -180,32 +180,35 @@ public:
/// This function either removes parts of string enclosed in braces, or leaves
/// them intact. The braces themselves are removed always, though.
void processUnsortedParts( wstring & str, bool strip );
void processUnsortedParts( std::u32string & str, bool strip );
/// Expands optional parts of a headword (ones marked with parentheses),
/// producing all possible combinations where they are present or absent.
void expandOptionalParts( wstring & str, list< wstring > * result, size_t x = 0, bool inside_recurse = false );
void expandOptionalParts( std::u32string & str,
list< std::u32string > * result,
size_t x = 0,
bool inside_recurse = false );
/// Expands all unescaped tildes, inserting tildeReplacement text instead of
/// them.
void expandTildes( wstring & str, wstring const & tildeReplacement );
void expandTildes( std::u32string & str, std::u32string const & tildeReplacement );
/// Unescapes any escaped chars. Be sure to handle all their special meanings
/// before unescaping them.
void unescapeDsl( wstring & str );
void unescapeDsl( std::u32string & str );
/// Normalizes the headword. Currently turns any sequences of consecutive spaces
/// into a single space.
void normalizeHeadword( wstring & );
void normalizeHeadword( std::u32string & );
/// Strip DSL {{...}} comments
void stripComments( wstring &, bool & );
void stripComments( std::u32string &, bool & );
inline size_t DslScanner::distanceToBytes( size_t x ) const
{
switch ( encoding ) {
case Utf8::Utf16LE:
case Utf8::Utf16BE:
case Text::Utf16LE:
case Text::Utf16BE:
return x * 2;
default:
return x;
@ -214,7 +217,7 @@ inline size_t DslScanner::distanceToBytes( size_t x ) const
/// Converts the given language name taken from Dsl header (i.e. getLangFrom(),
/// getLangTo()) to its proper language id.
quint32 dslLanguageToId( wstring const & name );
quint32 dslLanguageToId( std::u32string const & name );
} // namespace Details
} // namespace Dsl

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include <QDir>
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
#include "epwing_book.hh"
#include "epwing.hh"
@ -29,7 +29,7 @@ using std::multimap;
using std::vector;
using std::set;
using std::pair;
using gd::wstring;
using std::u32string;
namespace {
@ -87,12 +87,6 @@ public:
~EpwingDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -115,10 +109,10 @@ public:
QString const & getDescription() override;
void getHeadwordPos( wstring const & word_, QList< int > & pg, QList< int > & off );
void getHeadwordPos( u32string const & word_, QList< int > & pg, QList< int > & off );
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -140,16 +134,16 @@ public:
&& ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize );
}
static int japaneseWriting( gd::wchar ch );
static int japaneseWriting( char32_t ch );
static bool isSign( gd::wchar ch );
static bool isSign( char32_t ch );
static bool isJapanesePunctiation( gd::wchar ch );
static bool isJapanesePunctiation( char32_t ch );
sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long ) override;
sptr< Dictionary::WordSearchRequest > prefixMatch( u32string const &, unsigned long ) override;
sptr< Dictionary::WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ) override;
stemmedMatch( u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ) override;
protected:
@ -162,7 +156,7 @@ private:
quint32 address, string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & word ) override;
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( u32string const & word ) override;
void loadArticleNextPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
void
@ -455,7 +449,7 @@ void EpwingDictionary::getArticleText( uint32_t articleAddress, QString & headwo
class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest
{
wstring str;
u32string str;
EpwingDictionary & dict;
QAtomicInt isCancelled;
@ -463,7 +457,7 @@ class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest
public:
EpwingHeadwordsRequest( wstring const & word_, EpwingDictionary & dict_ ):
EpwingHeadwordsRequest( u32string const & word_, EpwingDictionary & dict_ ):
str( word_ ),
dict( dict_ )
{
@ -539,7 +533,7 @@ void EpwingHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym( u32string const & word )
{
return synonymSearchEnabled ? std::make_shared< EpwingHeadwordsRequest >( word, *this ) :
Class::findHeadwordsForSynonym( word );
@ -548,8 +542,8 @@ sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym(
class EpwingArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
u32string word;
vector< u32string > alts;
EpwingDictionary & dict;
bool ignoreDiacritics;
@ -558,8 +552,8 @@ class EpwingArticleRequest: public Dictionary::DataRequest
public:
EpwingArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
EpwingArticleRequest( u32string const & word_,
vector< u32string > const & alts_,
EpwingDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -574,10 +568,10 @@ public:
void run();
void getBuiltInArticle( wstring const & word_,
void getBuiltInArticle( u32string const & word_,
QList< int > & pages,
QList< int > & offsets,
multimap< wstring, pair< string, string > > & mainArticles );
multimap< u32string, pair< string, string > > & mainArticles );
void cancel() override
{
@ -607,13 +601,13 @@ void EpwingArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
@ -647,11 +641,11 @@ void EpwingArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics )
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
multimap< wstring, pair< string, string > > & mapToUse =
multimap< u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -676,7 +670,7 @@ void EpwingArticleRequest::run()
string result = "<div class=\"epwing_article\">";
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += "<h3>";
@ -725,10 +719,10 @@ void EpwingArticleRequest::run()
finish();
}
void EpwingArticleRequest::getBuiltInArticle( wstring const & word_,
void EpwingArticleRequest::getBuiltInArticle( u32string const & word_,
QList< int > & pages,
QList< int > & offsets,
multimap< wstring, pair< string, string > > & mainArticles )
multimap< u32string, pair< string, string > > & mainArticles )
{
try {
string headword, articleText;
@ -762,7 +756,7 @@ void EpwingArticleRequest::getBuiltInArticle( wstring const & word_,
}
}
void EpwingDictionary::getHeadwordPos( wstring const & word_, QList< int > & pg, QList< int > & off )
void EpwingDictionary::getHeadwordPos( u32string const & word_, QList< int > & pg, QList< int > & off )
{
try {
QMutexLocker _( &eBook.getLibMutex() );
@ -773,9 +767,9 @@ void EpwingDictionary::getHeadwordPos( wstring const & word_, QList< int > & pg,
}
}
sptr< Dictionary::DataRequest > EpwingDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > EpwingDictionary::getArticle( u32string const & word,
vector< u32string > const & alts,
u32string const &,
bool ignoreDiacritics )
{
@ -888,7 +882,7 @@ sptr< Dictionary::DataRequest > EpwingDictionary::getSearchResults( QString cons
ignoreDiacritics );
}
int EpwingDictionary::japaneseWriting( gd::wchar ch )
int EpwingDictionary::japaneseWriting( char32_t ch )
{
if ( ( ch >= 0x30A0 && ch <= 0x30FF ) || ( ch >= 0x31F0 && ch <= 0x31FF ) || ( ch >= 0x3200 && ch <= 0x32FF )
|| ( ch >= 0xFF00 && ch <= 0xFFEF ) || ( ch == 0x1B000 ) )
@ -901,7 +895,7 @@ int EpwingDictionary::japaneseWriting( gd::wchar ch )
return 0;
}
bool EpwingDictionary::isSign( gd::wchar ch )
bool EpwingDictionary::isSign( char32_t ch )
{
switch ( ch ) {
case 0x002B: // PLUS SIGN
@ -921,7 +915,7 @@ bool EpwingDictionary::isSign( gd::wchar ch )
}
}
bool EpwingDictionary::isJapanesePunctiation( gd::wchar ch )
bool EpwingDictionary::isJapanesePunctiation( char32_t ch )
{
return ch >= 0x3000 && ch <= 0x303F;
}
@ -935,7 +929,7 @@ class EpwingWordSearchRequest: public BtreeIndexing::BtreeWordSearchRequest
public:
EpwingWordSearchRequest( EpwingDictionary & dict_,
wstring const & str_,
u32string const & str_,
unsigned minLength_,
int maxSuffixVariation_,
bool allowMiddleMatches_,
@ -982,13 +976,13 @@ void EpwingWordSearchRequest::findMatches()
finish();
}
sptr< Dictionary::WordSearchRequest > EpwingDictionary::prefixMatch( wstring const & str, unsigned long maxResults )
sptr< Dictionary::WordSearchRequest > EpwingDictionary::prefixMatch( u32string const & str, unsigned long maxResults )
{
return std::make_shared< EpwingWordSearchRequest >( *this, str, 0, -1, true, maxResults );
}
sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( wstring const & str,
sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( u32string const & str,
unsigned minLength,
unsigned maxSuffixVariation,
unsigned long maxResults )
@ -1027,20 +1021,20 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
chunks.addToBlock( &head.page, sizeof( head.page ) );
chunks.addToBlock( &head.offset, sizeof( head.offset ) );
wstring hw = head.headword.toStdU32String();
u32string hw = head.headword.toStdU32String();
indexedWords.addWord( hw, offset );
wordCount++;
articleCount++;
vector< wstring > words;
vector< u32string > words;
// Parse combined kanji/katakana/hiragana headwords
int w_prev = 0;
wstring word;
for ( wstring::size_type n = 0; n < hw.size(); n++ ) {
gd::wchar ch = hw[ n ];
u32string word;
for ( u32string::size_type n = 0; n < hw.size(); n++ ) {
char32_t ch = hw[ n ];
if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isSign( ch )
|| EpwingDictionary::isJapanesePunctiation( ch ) )
@ -1050,7 +1044,7 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
if ( w > 0 ) {
// Store only separated words
gd::wchar ch_prev = 0;
char32_t ch_prev = 0;
if ( n )
ch_prev = hw[ n - 1 ];
bool needStore = ( n == 0 || Folding::isPunct( ch_prev ) || Folding::isWhitespace( ch_prev )
@ -1058,7 +1052,7 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
word.push_back( ch );
w_prev = w;
wstring::size_type i;
u32string::size_type i;
for ( i = n + 1; i < hw.size(); i++ ) {
ch = hw[ i ];
if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isJapanesePunctiation( ch ) )

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
#include "epwing_book.hh"
@ -10,8 +10,7 @@
#include <QTextDocumentFragment>
#include <QHash>
#include "audiolink.hh"
#include "wstring.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "folding.hh"
#include "epwing_charmap.hh"
#include "htmlescape.hh"
@ -1135,7 +1134,7 @@ void EpwingBook::fixHeadword( QString & headword )
// return;
//}
gd::wstring folded = Folding::applyPunctOnly( fixed.toStdU32String() );
std::u32string folded = Folding::applyPunctOnly( fixed.toStdU32String() );
//fixed = QString::fromStdU32String( folded );
//if( isHeadwordCorrect( fixed ) )
@ -1993,4 +1992,4 @@ QMutex EpwingBook::libMutex;
} // namespace Epwing
#endif
#endif

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
#include "epwing_charmap.hh"

View file

@ -2,14 +2,13 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "forvo.hh"
#include "wstring_qt.hh"
#include <QNetworkAccessManager>
#include <QNetworkReply>
#include <QtXml>
#include <list>
#include "audiolink.hh"
#include "htmlescape.hh"
#include "utf8.hh"
#include "text.hh"
namespace Forvo {
@ -38,11 +37,6 @@ public:
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -53,7 +47,7 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) override
sptr< WordSearchRequest > prefixMatch( std::u32string const & /*word*/, unsigned long /*maxResults*/ ) override
{
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -62,7 +56,8 @@ public:
return sr;
}
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
protected:
@ -94,8 +89,8 @@ class ForvoArticleRequest: public Dictionary::DataRequest
public:
ForvoArticleRequest( wstring const & word,
vector< wstring > const & alts,
ForvoArticleRequest( std::u32string const & word,
vector< std::u32string > const & alts,
QString const & apiKey_,
QString const & languageCode_,
string const & dictionaryId_,
@ -105,14 +100,16 @@ public:
private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
private slots:
virtual void requestFinished( QNetworkReply * );
};
sptr< DataRequest >
ForvoDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool )
sptr< DataRequest > ForvoDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool )
{
if ( word.size() > 80 || apiKey.isEmpty() ) {
@ -142,8 +139,8 @@ void ForvoArticleRequest::cancel()
finish();
}
ForvoArticleRequest::ForvoArticleRequest( wstring const & str,
vector< wstring > const & alts,
ForvoArticleRequest::ForvoArticleRequest( std::u32string const & str,
vector< std::u32string > const & alts,
QString const & apiKey_,
QString const & languageCode_,
string const & dictionaryId_,
@ -161,7 +158,7 @@ ForvoArticleRequest::ForvoArticleRequest( wstring const & str,
}
}
void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const & str )
void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, std::u32string const & str )
{
qDebug( "Forvo: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() );
@ -182,7 +179,7 @@ void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const &
sptr< QNetworkReply > netReply = std::shared_ptr< QNetworkReply >( mgr.get( QNetworkRequest( reqUrl ) ) );
netReplies.push_back( NetReply( netReply, Utf8::encode( str ) ) );
netReplies.push_back( NetReply( netReply, Text::toUtf8( str ) ) );
}
void ForvoArticleRequest::requestFinished( QNetworkReply * r )

View file

@ -8,8 +8,7 @@
#include "ufile.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "langcoder.hh"
#include "dictzip.hh"
@ -39,14 +38,12 @@ using std::set;
using std::multimap;
using std::pair;
using gd::wstring;
using gd::wchar;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
using Utf8::Encoding;
using Utf8::LineFeed;
using Text::Encoding;
using Text::LineFeed;
/////////////// GlsScanner
@ -55,9 +52,9 @@ class GlsScanner
gzFile f;
Encoding encoding;
QTextCodec * codec;
wstring dictionaryName;
wstring dictionaryDecription, dictionaryAuthor;
wstring langFrom, langTo;
std::u32string dictionaryName;
std::u32string dictionaryDecription, dictionaryAuthor;
std::u32string langFrom, langTo;
char readBuffer[ 10000 ];
char * readBufferPtr;
size_t readBufferLeft;
@ -82,31 +79,31 @@ public:
}
/// Returns the dictionary's name, as was read from file's headers.
wstring const & getDictionaryName() const
std::u32string const & getDictionaryName() const
{
return dictionaryName;
}
/// Returns the dictionary's author, as was read from file's headers.
wstring const & getDictionaryAuthor() const
std::u32string const & getDictionaryAuthor() const
{
return dictionaryAuthor;
}
/// Returns the dictionary's description, as was read from file's headers.
wstring const & getDictionaryDescription() const
std::u32string const & getDictionaryDescription() const
{
return dictionaryDecription;
}
/// Returns the dictionary's source language, as was read from file's headers.
wstring const & getLangFrom() const
std::u32string const & getLangFrom() const
{
return langFrom;
}
/// Returns the dictionary's target language, as was read from file's headers.
wstring const & getLangTo() const
std::u32string const & getLangTo() const
{
return langTo;
}
@ -117,7 +114,7 @@ public:
/// If end of file is reached, false is returned.
/// Reading begins from the first line after the headers (ones which end
/// by the "### Glossary section:" line).
bool readNextLine( wstring &, size_t & offset );
bool readNextLine( std::u32string &, size_t & offset );
/// Returns the number of lines read so far from the file.
unsigned getLinesRead() const
{
@ -126,7 +123,7 @@ public:
};
GlsScanner::GlsScanner( string const & fileName ):
encoding( Utf8::Utf8 ),
encoding( Text::Utf8 ),
readBufferPtr( readBuffer ),
readBufferLeft( 0 ),
linesRead( 0 )
@ -152,10 +149,10 @@ GlsScanner::GlsScanner( string const & fileName ):
// If the file begins with the dedicated Unicode marker, we just consume
// it. If, on the other hand, it's not, we return the bytes back
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) {
encoding = Utf8::Utf16LE;
encoding = Text::Utf16LE;
}
else if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) {
encoding = Utf8::Utf16BE;
encoding = Text::Utf16BE;
}
else if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) {
// Looks like Utf8, read one more byte
@ -164,29 +161,29 @@ GlsScanner::GlsScanner( string const & fileName ):
gzclose( f );
throw exMalformedGlsFile( fileName );
}
encoding = Utf8::Utf8;
encoding = Text::Utf8;
}
else {
if ( gzrewind( f ) ) {
gzclose( f );
throw exCantOpen( fileName );
}
encoding = Utf8::Utf8;
encoding = Text::Utf8;
}
codec = QTextCodec::codecForName( Utf8::getEncodingNameFor( encoding ) );
codec = QTextCodec::codecForName( Text::getEncodingNameFor( encoding ) );
// We now can use our own readNextLine() function
lineFeed = Utf8::initLineFeed( encoding );
lineFeed = Text::initLineFeed( encoding );
wstring str;
wstring * currentField = 0;
wstring mark = U"###";
wstring titleMark = U"### Glossary title:";
wstring authorMark = U"### Author:";
wstring descriptionMark = U"### Description:";
wstring langFromMark = U"### Source language:";
wstring langToMark = U"### Target language:";
wstring endOfHeaderMark = U"### Glossary section:";
std::u32string str;
std::u32string * currentField = 0;
std::u32string mark = U"###";
std::u32string titleMark = U"### Glossary title:";
std::u32string authorMark = U"### Author:";
std::u32string descriptionMark = U"### Description:";
std::u32string langFromMark = U"### Source language:";
std::u32string langToMark = U"### Target language:";
std::u32string endOfHeaderMark = U"### Glossary section:";
size_t offset;
for ( ;; ) {
@ -199,22 +196,22 @@ GlsScanner::GlsScanner( string const & fileName ):
currentField = 0;
if ( str.compare( 0, titleMark.size(), titleMark ) == 0 ) {
dictionaryName = wstring( str, titleMark.size(), str.size() - titleMark.size() );
dictionaryName = std::u32string( str, titleMark.size(), str.size() - titleMark.size() );
currentField = &dictionaryName;
}
else if ( str.compare( 0, authorMark.size(), authorMark ) == 0 ) {
dictionaryAuthor = wstring( str, authorMark.size(), str.size() - authorMark.size() );
dictionaryAuthor = std::u32string( str, authorMark.size(), str.size() - authorMark.size() );
currentField = &dictionaryAuthor;
}
else if ( str.compare( 0, descriptionMark.size(), descriptionMark ) == 0 ) {
dictionaryDecription = wstring( str, descriptionMark.size(), str.size() - descriptionMark.size() );
dictionaryDecription = std::u32string( str, descriptionMark.size(), str.size() - descriptionMark.size() );
currentField = &dictionaryDecription;
}
else if ( str.compare( 0, langFromMark.size(), langFromMark ) == 0 ) {
langFrom = wstring( str, langFromMark.size(), str.size() - langFromMark.size() );
langFrom = std::u32string( str, langFromMark.size(), str.size() - langFromMark.size() );
}
else if ( str.compare( 0, langToMark.size(), langToMark ) == 0 ) {
langTo = wstring( str, langToMark.size(), str.size() - langToMark.size() );
langTo = std::u32string( str, langToMark.size(), str.size() - langToMark.size() );
}
else if ( str.compare( 0, endOfHeaderMark.size(), endOfHeaderMark ) == 0 ) {
break;
@ -229,7 +226,7 @@ GlsScanner::GlsScanner( string const & fileName ):
}
}
bool GlsScanner::readNextLine( wstring & out, size_t & offset )
bool GlsScanner::readNextLine( std::u32string & out, size_t & offset )
{
offset = (size_t)( gztell( f ) - readBufferLeft );
@ -256,7 +253,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset )
return false;
}
int pos = Utf8::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
int pos = Text::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
if ( pos == -1 ) {
return false;
}
@ -349,11 +346,6 @@ public:
~GlsDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -374,10 +366,12 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -508,11 +502,11 @@ QString const & GlsDictionary::getDescription()
try {
GlsScanner scanner( getDictionaryFilenames()[ 0 ] );
string str = Utf8::encode( scanner.getDictionaryAuthor() );
string str = Text::toUtf8( scanner.getDictionaryAuthor() );
if ( !str.empty() ) {
dictionaryDescription = QObject::tr( "Author: %1%2" ).arg( QString::fromUtf8( str.c_str() ) ).arg( "\n\n" );
}
str = Utf8::encode( scanner.getDictionaryDescription() );
str = Text::toUtf8( scanner.getDictionaryDescription() );
if ( !str.empty() ) {
QString desc = QString::fromUtf8( str.c_str() );
desc.replace( "\t", "<br/>" );
@ -597,7 +591,7 @@ void GlsDictionary::loadArticleText( uint32_t address, vector< string > & headwo
}
else {
string articleData =
Iconv::toUtf8( Utf8::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
Iconv::toUtf8( Text::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
string::size_type start_pos = 0, end_pos = 0;
for ( ;; ) {
@ -626,7 +620,7 @@ void GlsDictionary::loadArticleText( uint32_t address, vector< string > & headwo
end_pos = 0;
for ( ;; ) {
end_pos = headword.find( '|', start_pos );
if ( end_pos == wstring::npos ) {
if ( end_pos == std::u32string::npos ) {
string hw = headword.substr( start_pos );
if ( !hw.empty() ) {
headwords.push_back( hw );
@ -809,7 +803,7 @@ void GlsDictionary::getArticleText( uint32_t articleAddress, QString & headword,
class GlsHeadwordsRequest: public Dictionary::WordSearchRequest
{
wstring word;
std::u32string word;
GlsDictionary & dict;
QAtomicInt isCancelled;
@ -817,7 +811,7 @@ class GlsHeadwordsRequest: public Dictionary::WordSearchRequest
public:
GlsHeadwordsRequest( wstring const & word_, GlsDictionary & dict_ ):
GlsHeadwordsRequest( std::u32string const & word_, GlsDictionary & dict_ ):
word( word_ ),
dict( dict_ )
{
@ -850,7 +844,7 @@ void GlsHeadwordsRequest::run()
try {
vector< WordArticleLink > chain = dict.findArticles( word );
wstring caseFolded = Folding::applySimpleCaseOnly( word );
std::u32string caseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -863,7 +857,7 @@ void GlsHeadwordsRequest::run()
dict.loadArticleText( x.articleOffset, headwords, articleText );
wstring headwordDecoded = Utf8::decode( headwords.front() );
std::u32string headwordDecoded = Text::toUtf32( headwords.front() );
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) {
// The headword seems to differ from the input word, which makes the
@ -881,7 +875,7 @@ void GlsHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( std::u32string const & word )
{
return synonymSearchEnabled ? std::make_shared< GlsHeadwordsRequest >( word, *this ) :
@ -894,8 +888,8 @@ sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( ws
class GlsArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
GlsDictionary & dict;
bool ignoreDiacritics;
@ -904,8 +898,8 @@ class GlsArticleRequest: public Dictionary::DataRequest
public:
GlsArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
GlsArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
GlsDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -949,13 +943,13 @@ void GlsArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -981,16 +975,16 @@ void GlsArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( Utf8::decode( headword ) );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( Text::toUtf32( headword ) );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert(
pair( Folding::applySimpleCaseOnly( Utf8::decode( headword ) ), pair( headword, articleText ) ) );
pair( Folding::applySimpleCaseOnly( Text::toUtf32( headword ) ), pair( headword, articleText ) ) );
articlesIncluded.insert( x.articleOffset );
}
@ -1003,7 +997,7 @@ void GlsArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += i->second.second;
@ -1024,9 +1018,9 @@ void GlsArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > GlsDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > GlsDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -1102,7 +1096,7 @@ void GlsResourceRequest::run()
if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive
}
}
@ -1244,7 +1238,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// which the incident happened. We need alive scanner for that.
// Building the index
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) );
initializing.indexingDictionary( Text::toUtf8( scanner.getDictionaryName() ) );
qDebug( "Gls: Building the index for dictionary: %s",
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
@ -1260,7 +1254,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( idxHeader );
string dictionaryName = Utf8::encode( scanner.getDictionaryName() );
string dictionaryName = Text::toUtf8( scanner.getDictionaryName() );
idx.write( (uint32_t)dictionaryName.size() );
idx.write( dictionaryName.data(), dictionaryName.size() );
@ -1271,7 +1265,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
ChunkedStorage::Writer chunks( idx );
wstring curString;
std::u32string curString;
size_t curOffset;
uint32_t articleCount = 0, wordCount = 0;
@ -1291,12 +1285,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Parse headwords
list< wstring > allEntryWords;
wstring::size_type start_pos = 0, end_pos = 0;
list< std::u32string > allEntryWords;
std::u32string::size_type start_pos = 0, end_pos = 0;
for ( ;; ) {
end_pos = curString.find( '|', start_pos );
if ( end_pos == wstring::npos ) {
wstring headword = curString.substr( start_pos );
if ( end_pos == std::u32string::npos ) {
std::u32string headword = curString.substr( start_pos );
if ( !headword.empty() ) {
allEntryWords.push_back( headword );
}

View file

@ -2,7 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "hunspell.hh"
#include "utf8.hh"
#include "text.hh"
#include "htmlescape.hh"
#include "iconv.hh"
#include "folding.hh"
@ -15,18 +15,12 @@
#include <set>
#include "utils.hh"
#include <QtConcurrentRun>
#ifndef INCLUDE_LIBRARY_PATH
#include <hunspell.hxx>
#else
#include <hunspell/hunspell.hxx>
#endif
#include <hunspell/hunspell.hxx>
namespace HunspellMorpho {
using namespace Dictionary;
using gd::wchar;
namespace {
@ -55,12 +49,6 @@ public:
dictionaryName = name_;
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -71,18 +59,19 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
sptr< WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
bool isLocalDictionary() override
{
return true;
}
vector< wstring > getAlternateWritings( const wstring & word ) noexcept override;
vector< std::u32string > getAlternateWritings( const std::u32string & word ) noexcept override;
protected:
@ -105,25 +94,25 @@ private:
/// Encodes the given string to be passed to the hunspell object. May throw
/// Iconv::Ex
string encodeToHunspell( Hunspell &, wstring const & );
string encodeToHunspell( Hunspell &, std::u32string const & );
/// Decodes the given string returned by the hunspell object. May throw
/// Iconv::Ex
wstring decodeFromHunspell( Hunspell &, char const * );
std::u32string decodeFromHunspell( Hunspell &, char const * );
/// Generates suggestions via hunspell
QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hunspell );
QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex, Hunspell & hunspell );
/// Generates suggestions for compound expression
void getSuggestionsForExpression( wstring const & expression,
vector< wstring > & suggestions,
void getSuggestionsForExpression( std::u32string const & expression,
vector< std::u32string > & suggestions,
QMutex & hunspellMutex,
Hunspell & hunspell );
/// Returns true if the string contains whitespace, false otherwise
bool containsWhitespace( wstring const & str )
bool containsWhitespace( std::u32string const & str )
{
wchar const * next = str.c_str();
char32_t const * next = str.c_str();
for ( ; *next; ++next ) {
if ( Folding::isWhitespace( *next ) ) {
@ -153,9 +142,9 @@ void HunspellDictionary::loadIcon() noexcept
dictionaryIconLoaded = true;
}
vector< wstring > HunspellDictionary::getAlternateWritings( wstring const & word ) noexcept
vector< std::u32string > HunspellDictionary::getAlternateWritings( std::u32string const & word ) noexcept
{
vector< wstring > results;
vector< std::u32string > results;
if ( containsWhitespace( word ) ) {
getSuggestionsForExpression( word, results, getHunspellMutex(), hunspell );
@ -171,14 +160,14 @@ class HunspellArticleRequest: public Dictionary::DataRequest
QMutex & hunspellMutex;
Hunspell & hunspell;
wstring word;
std::u32string word;
QAtomicInt isCancelled;
QFuture< void > f;
public:
HunspellArticleRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
HunspellArticleRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ),
word( word_ )
@ -212,7 +201,7 @@ void HunspellArticleRequest::run()
vector< string > suggestions;
try {
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( containsWhitespace( trimmedWord ) ) {
// For now we don't analyze whitespace-containing phrases
@ -237,10 +226,10 @@ void HunspellArticleRequest::run()
string result = "<div class=\"gdspellsuggestion\">"
+ Html::escape( QCoreApplication::translate( "Hunspell", "Spelling suggestions: " ).toUtf8().data() );
wstring lowercasedWord = Folding::applySimpleCaseOnly( word );
std::u32string lowercasedWord = Folding::applySimpleCaseOnly( word );
for ( vector< string >::size_type x = 0; x < suggestions.size(); ++x ) {
wstring suggestion = decodeFromHunspell( hunspell, suggestions[ x ].c_str() );
std::u32string suggestion = decodeFromHunspell( hunspell, suggestions[ x ].c_str() );
if ( Folding::applySimpleCaseOnly( suggestion ) == lowercasedWord ) {
// If among suggestions we see the same word just with the different
@ -251,7 +240,7 @@ void HunspellArticleRequest::run()
return;
}
string suggestionUtf8 = Utf8::encode( suggestion );
string suggestionUtf8 = Text::toUtf8( suggestion );
result += "<a href=\"bword:";
result += Html::escape( suggestionUtf8 ) + "\">";
@ -279,8 +268,10 @@ void HunspellArticleRequest::run()
finish();
}
sptr< DataRequest >
HunspellDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
sptr< DataRequest > HunspellDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const &,
std::u32string const &,
bool )
{
return std::make_shared< HunspellArticleRequest >( word, getHunspellMutex(), hunspell );
@ -293,7 +284,7 @@ class HunspellHeadwordsRequest: public Dictionary::WordSearchRequest
QMutex & hunspellMutex;
Hunspell & hunspell;
wstring word;
std::u32string word;
QAtomicInt isCancelled;
QFuture< void > f;
@ -301,7 +292,7 @@ class HunspellHeadwordsRequest: public Dictionary::WordSearchRequest
public:
HunspellHeadwordsRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
HunspellHeadwordsRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ),
word( word_ )
@ -333,7 +324,7 @@ void HunspellHeadwordsRequest::run()
return;
}
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( trimmedWord.size() > 80 ) {
// We won't do anything for overly long sentences since that would probably
@ -343,7 +334,7 @@ void HunspellHeadwordsRequest::run()
}
if ( containsWhitespace( trimmedWord ) ) {
vector< wstring > results;
vector< std::u32string > results;
getSuggestionsForExpression( trimmedWord, results, hunspellMutex, hunspell );
@ -353,7 +344,7 @@ void HunspellHeadwordsRequest::run()
}
}
else {
QList< wstring > suggestions = suggest( trimmedWord, hunspellMutex, hunspell );
QList< std::u32string > suggestions = suggest( trimmedWord, hunspellMutex, hunspell );
if ( !suggestions.empty() ) {
QMutexLocker _( &dataMutex );
@ -367,9 +358,9 @@ void HunspellHeadwordsRequest::run()
finish();
}
QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hunspell )
QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex, Hunspell & hunspell )
{
QList< wstring > result;
QList< std::u32string > result;
vector< string > suggestions;
@ -382,7 +373,7 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
if ( !suggestions.empty() ) {
// There were some suggestions made for us. Make an appropriate output.
wstring lowercasedWord = Folding::applySimpleCaseOnly( word );
std::u32string lowercasedWord = Folding::applySimpleCaseOnly( word );
static QRegularExpression cutStem( R"(^\s*st:(((\s+(?!\w{2}:)(?!-)(?!\+))|\S+)+))" );
@ -399,7 +390,7 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
auto match = cutStem.match( suggestion.trimmed() );
if ( match.hasMatch() ) {
wstring alt = match.captured( 1 ).toStdU32String();
std::u32string alt = match.captured( 1 ).toStdU32String();
if ( Folding::applySimpleCaseOnly( alt ) != lowercasedWord ) // No point in providing same word
{
@ -417,7 +408,7 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
}
sptr< WordSearchRequest > HunspellDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< WordSearchRequest > HunspellDictionary::findHeadwordsForSynonym( std::u32string const & word )
{
return std::make_shared< HunspellHeadwordsRequest >( word, getHunspellMutex(), hunspell );
@ -431,14 +422,14 @@ class HunspellPrefixMatchRequest: public Dictionary::WordSearchRequest
QMutex & hunspellMutex;
Hunspell & hunspell;
wstring word;
std::u32string word;
QAtomicInt isCancelled;
QFuture< void > f;
public:
HunspellPrefixMatchRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
HunspellPrefixMatchRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ),
word( word_ )
@ -471,7 +462,7 @@ void HunspellPrefixMatchRequest::run()
}
try {
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( trimmedWord.empty() || containsWhitespace( trimmedWord ) ) {
// For now we don't analyze whitespace-containing phrases
@ -498,14 +489,14 @@ void HunspellPrefixMatchRequest::run()
finish();
}
sptr< WordSearchRequest > HunspellDictionary::prefixMatch( wstring const & word, unsigned long /*maxResults*/ )
sptr< WordSearchRequest > HunspellDictionary::prefixMatch( std::u32string const & word, unsigned long /*maxResults*/ )
{
return std::make_shared< HunspellPrefixMatchRequest >( word, getHunspellMutex(), hunspell );
}
void getSuggestionsForExpression( wstring const & expression,
vector< wstring > & suggestions,
void getSuggestionsForExpression( std::u32string const & expression,
vector< std::u32string > & suggestions,
QMutex & hunspellMutex,
Hunspell & hunspell )
{
@ -513,15 +504,15 @@ void getSuggestionsForExpression( wstring const & expression,
// This is useful for compound expressions where some words is
// in different form, e.g. "dozing off" -> "doze off".
wstring trimmedWord = Folding::trimWhitespaceOrPunct( expression );
wstring word, punct;
QList< wstring > words;
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( expression );
std::u32string word, punct;
QList< std::u32string > words;
suggestions.clear();
// Parse string to separate words
for ( wchar const * c = trimmedWord.c_str();; ++c ) {
for ( char32_t const * c = trimmedWord.c_str();; ++c ) {
if ( !*c || Folding::isPunct( *c ) || Folding::isWhitespace( *c ) ) {
if ( word.size() ) {
words.push_back( word );
@ -552,7 +543,7 @@ void getSuggestionsForExpression( wstring const & expression,
// Combine result strings from suggestions
QList< wstring > results;
QList< std::u32string > results;
for ( const auto & i : words ) {
word = i;
@ -562,13 +553,13 @@ void getSuggestionsForExpression( wstring const & expression,
}
}
else {
QList< wstring > sugg = suggest( word, hunspellMutex, hunspell );
QList< std::u32string > sugg = suggest( word, hunspellMutex, hunspell );
int suggNum = sugg.size() + 1;
if ( suggNum > 3 ) {
suggNum = 3;
}
int resNum = results.size();
wstring resultStr;
std::u32string resultStr;
if ( resNum == 0 ) {
for ( int k = 0; k < suggNum; k++ ) {
@ -598,12 +589,12 @@ void getSuggestionsForExpression( wstring const & expression,
}
}
string encodeToHunspell( Hunspell & hunspell, wstring const & str )
string encodeToHunspell( Hunspell & hunspell, std::u32string const & str )
{
Iconv conv( Iconv::GdWchar );
void const * in = str.data();
size_t inLeft = str.size() * sizeof( wchar );
size_t inLeft = str.size() * sizeof( char32_t );
vector< char > result( str.size() * 4 + 1 ); // +1 isn't actually needed,
// but then iconv complains on empty
@ -616,17 +607,17 @@ string encodeToHunspell( Hunspell & hunspell, wstring const & str )
return convStr.toStdString();
}
wstring decodeFromHunspell( Hunspell & hunspell, char const * str )
std::u32string decodeFromHunspell( Hunspell & hunspell, char const * str )
{
Iconv conv( hunspell.get_dic_encoding() );
void const * in = str;
size_t inLeft = strlen( str );
vector< wchar > result( inLeft + 1 ); // +1 isn't needed, but see above
vector< char32_t > result( inLeft + 1 ); // +1 isn't needed, but see above
void * out = &result.front();
size_t outLeft = result.size() * sizeof( wchar );
size_t outLeft = result.size() * sizeof( char32_t );
QString convStr = conv.convert( in, inLeft );
return convStr.toStdU32String();

View file

@ -1,5 +1,5 @@
#include "lingualibre.hh"
#include "utf8.hh"
#include "text.hh"
#include "audiolink.hh"
#include <QJsonArray>
@ -40,8 +40,8 @@ class LinguaArticleRequest: public Dictionary::DataRequest
public:
LinguaArticleRequest( wstring const & word,
vector< wstring > const & alts,
LinguaArticleRequest( std::u32string const & word,
vector< std::u32string > const & alts,
QString const & languageCode_,
QString const & langWikipediaID_,
string const & dictionaryId_,
@ -51,7 +51,7 @@ public:
private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
private slots:
virtual void requestFinished( QNetworkReply * );
@ -165,12 +165,6 @@ WHERE {
}
}
map< Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -181,7 +175,7 @@ WHERE {
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) override
sptr< WordSearchRequest > prefixMatch( std::u32string const & /*word*/, unsigned long /*maxResults*/ ) override
{
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -190,7 +184,10 @@ WHERE {
return sr;
}
sptr< DataRequest > getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ) override
sptr< DataRequest > getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ) override
{
if ( word.size() < 50 ) {
return std::make_shared< LinguaArticleRequest >( word, alts, languageCode, langWikipediaID, getId(), netMgr );
@ -237,8 +234,8 @@ void LinguaArticleRequest::cancel()
finish();
}
LinguaArticleRequest::LinguaArticleRequest( const wstring & str,
const vector< wstring > & alts,
LinguaArticleRequest::LinguaArticleRequest( const std::u32string & str,
const vector< std::u32string > & alts,
const QString & languageCode_,
const QString & langWikipediaID,
const string & dictionaryId_,
@ -251,7 +248,7 @@ LinguaArticleRequest::LinguaArticleRequest( const wstring & str,
addQuery( mgr, str );
}
void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const wstring & word )
void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const std::u32string & word )
{
// Doc of the <https://www.mediawiki.org/wiki/API:Query>
@ -279,7 +276,7 @@ void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const wstring
auto netReply = std::shared_ptr< QNetworkReply >( mgr.get( netRequest ) );
netReplies.emplace_back( netReply, Utf8::encode( word ) );
netReplies.emplace_back( netReply, Text::toUtf8( word ) );
}

View file

@ -34,7 +34,7 @@
#include "dict/transliteration/romaji.hh"
#include "dict/transliteration/russian.hh"
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
#include "dict/epwing.hh"
#endif
@ -83,7 +83,7 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
<< "*.zim"
<< "*.zimaa"
#endif
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
<< "*catalogs"
#endif
;
@ -181,7 +181,7 @@ void LoadDictionaries::handlePath( Config::Path const & path )
#ifdef MAKE_ZIM_SUPPORT
addDicts( Zim::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this, maxHeadwordToExpand ) );
#endif
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
addDicts( Epwing::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this ) );
#endif
}
@ -274,7 +274,7 @@ void loadDictionaries( QWidget * parent,
addDicts( Forvo::makeDictionaries( loadDicts, cfg.forvo, dictNetMgr ) );
addDicts( Lingua::makeDictionaries( loadDicts, cfg.lingua, dictNetMgr ) );
addDicts( Programs::makeDictionaries( cfg.programs ) );
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
addDicts( VoiceEngines::makeDictionaries( cfg.voiceEngines ) );
#endif
addDicts( DictServer::makeDictionaries( cfg.dictServers ) );

View file

@ -5,7 +5,7 @@
#include "dictfile.hh"
#include "iconv.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "btreeidx.hh"
#include "audiolink.hh"
@ -24,7 +24,6 @@
namespace Lsa {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
@ -159,11 +158,6 @@ public:
string getName() noexcept override;
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.soundsCount;
@ -174,8 +168,10 @@ public:
return getArticleCount();
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -204,9 +200,9 @@ LsaDictionary::LsaDictionary( string const & id, string const & indexFile, vecto
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
}
sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > LsaDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -220,13 +216,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, string > mainArticles, alternateArticles;
multimap< std::u32string, string > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -241,12 +237,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, string > & mapToUse = ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
multimap< std::u32string, string > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.word ) );
@ -259,7 +256,7 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
string result;
multimap< wstring, string >::const_iterator i;
multimap< std::u32string, string >::const_iterator i;
result += "<table class=\"lsa_play\">";
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -394,7 +391,7 @@ sptr< Dictionary::DataRequest > LsaDictionary::getResource( string const & name
string strippedName = Utils::endsWithIgnoreCase( name, ".wav" ) ? string( name, 0, name.size() - 4 ) : name;
vector< WordArticleLink > chain = findArticles( Utf8::decode( strippedName ) );
vector< WordArticleLink > chain = findArticles( Text::toUtf32( strippedName ) );
if ( chain.empty() ) {
return std::make_shared< Dictionary::DataRequestInstant >( false ); // No such resource
@ -577,7 +574,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Insert new entry into an index
indexedWords.addWord( Utf8::decode( e.name ), offset );
indexedWords.addWord( Text::toUtf32( e.name ), offset );
}
idxHeader.vorbisOffset = f.tell();

View file

@ -4,10 +4,9 @@
#include "mdx.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "dictfile.hh"
#include "wstring.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "langcoder.hh"
#include "audiolink.hh"
@ -37,8 +36,6 @@ namespace Mdx {
using std::map;
using std::multimap;
using std::set;
using gd::wstring;
using gd::wchar;
using std::list;
using std::pair;
using std::string;
@ -129,7 +126,7 @@ public:
/// Checks whether the given file exists in the mdd file or not.
/// Note that this function is thread-safe, since it does not access mdd file.
bool hasFile( gd::wstring const & name )
bool hasFile( std::u32string const & name )
{
if ( !isFileOpen ) {
return false;
@ -140,7 +137,7 @@ public:
/// Attempts loading the given file into the given vector. Returns true on
/// success, false otherwise.
bool loadFile( gd::wstring const & name, std::vector< char > & result )
bool loadFile( std::u32string const & name, std::vector< char > & result )
{
if ( !isFileOpen ) {
return false;
@ -212,12 +209,6 @@ public:
void deferredInit() override;
map< Dictionary::Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -238,8 +229,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
QString const & getDescription() override;
@ -287,7 +280,7 @@ private:
friend class MdxArticleRequest;
friend class MddResourceRequest;
void loadResourceFile( const wstring & resourceName, vector< char > & data );
void loadResourceFile( const std::u32string & resourceName, vector< char > & data );
};
MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
@ -494,8 +487,8 @@ sptr< Dictionary::DataRequest > MdxDictionary::getSearchResults( QString const &
class MdxArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
MdxDictionary & dict;
bool ignoreDiacritics;
@ -504,8 +497,8 @@ class MdxArticleRequest: public Dictionary::DataRequest
public:
MdxArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
MdxArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
MdxDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -608,8 +601,8 @@ void MdxArticleRequest::run()
// Handle internal redirects
if ( strncmp( articleBody.c_str(), "@@@LINK=", 8 ) == 0 ) {
wstring target = Utf8::decode( articleBody.c_str() + 8 );
target = Folding::trimWhitespace( target );
std::u32string target = Text::toUtf32( articleBody.c_str() + 8 );
target = Folding::trimWhitespace( target );
// Make an additional query for this redirection
vector< WordArticleLink > altChain = dict.findArticles( target );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
@ -632,9 +625,9 @@ void MdxArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const wstring & word,
const vector< wstring > & alts,
const wstring &,
sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const std::u32string & word,
const vector< std::u32string > & alts,
const std::u32string &,
bool ignoreDiacritics )
{
return std::make_shared< MdxArticleRequest >( word, alts, *this, ignoreDiacritics );
@ -644,7 +637,7 @@ sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const wstring & word,
class MddResourceRequest: public Dictionary::DataRequest
{
MdxDictionary & dict;
wstring resourceName;
std::u32string resourceName;
QAtomicInt isCancelled;
QFuture< void > f;
@ -653,7 +646,7 @@ public:
MddResourceRequest( MdxDictionary & dict_, string const & resourceName_ ):
Dictionary::DataRequest( &dict_ ),
dict( dict_ ),
resourceName( Utf8::decode( resourceName_ ) )
resourceName( Text::toUtf32( resourceName_ ) )
{
f = QtConcurrent::run( [ this ]() {
this->run();
@ -728,7 +721,7 @@ void MddResourceRequest::run()
}
// In order to prevent recursive internal redirection...
set< wstring, std::less<> > resourceIncluded;
set< std::u32string, std::less<> > resourceIncluded;
for ( ;; ) {
// Some runnables linger enough that they are cancelled before they start
@ -736,7 +729,7 @@ void MddResourceRequest::run()
finish();
return;
}
string u8ResourceName = Utf8::encode( resourceName );
string u8ResourceName = Text::toUtf8( resourceName );
if ( !resourceIncluded.insert( resourceName ).second ) {
finish();
return;
@ -1157,11 +1150,11 @@ QString MdxDictionary::getCachedFileName( QString filename )
qWarning( R"(Mdx: file "%s" creating error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
return QString();
}
gd::wstring resourceName = filename.toStdU32String();
std::u32string resourceName = filename.toStdU32String();
vector< char > data;
// In order to prevent recursive internal redirection...
set< wstring, std::less<> > resourceIncluded;
set< std::u32string, std::less<> > resourceIncluded;
for ( ;; ) {
if ( !resourceIncluded.insert( resourceName ).second ) {
@ -1200,10 +1193,10 @@ QString MdxDictionary::getCachedFileName( QString filename )
return fullName;
}
void MdxDictionary::loadResourceFile( const wstring & resourceName, vector< char > & data )
void MdxDictionary::loadResourceFile( const std::u32string & resourceName, vector< char > & data )
{
wstring newResourceName = resourceName;
string u8ResourceName = Utf8::encode( resourceName );
std::u32string newResourceName = resourceName;
string u8ResourceName = Text::toUtf8( resourceName );
// Convert to the Windows separator
std::replace( newResourceName.begin(), newResourceName.end(), '/', '\\' );

View file

@ -2,7 +2,6 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "mediawiki.hh"
#include "wstring_qt.hh"
#include <QNetworkAccessManager>
#include <QNetworkReply>
#include <QUrl>
@ -56,11 +55,6 @@ public:
return name;
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -71,9 +65,10 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
quint32 getLangFrom() const override
{
@ -138,7 +133,10 @@ class MediaWikiWordSearchRequest: public MediaWikiWordSearchRequestSlots
public:
MediaWikiWordSearchRequest( wstring const &, QString const & url, QString const & lang, QNetworkAccessManager & mgr );
MediaWikiWordSearchRequest( std::u32string const &,
QString const & url,
QString const & lang,
QNetworkAccessManager & mgr );
~MediaWikiWordSearchRequest();
@ -149,7 +147,7 @@ private:
void downloadFinished() override;
};
MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( wstring const & str,
MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( std::u32string const & str,
QString const & url,
QString const & lang,
QNetworkAccessManager & mgr ):
@ -395,8 +393,8 @@ class MediaWikiArticleRequest: public MediaWikiDataRequestSlots
public:
MediaWikiArticleRequest( wstring const & word,
vector< wstring > const & alts,
MediaWikiArticleRequest( std::u32string const & word,
vector< std::u32string > const & alts,
QString const & url,
QString const & lang,
QNetworkAccessManager & mgr,
@ -406,7 +404,7 @@ public:
private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
void requestFinished( QNetworkReply * ) override;
@ -440,8 +438,8 @@ void MediaWikiArticleRequest::cancel()
finish();
}
MediaWikiArticleRequest::MediaWikiArticleRequest( wstring const & str,
vector< wstring > const & alts,
MediaWikiArticleRequest::MediaWikiArticleRequest( std::u32string const & str,
vector< std::u32string > const & alts,
QString const & url_,
QString const & lang_,
QNetworkAccessManager & mgr,
@ -463,7 +461,7 @@ MediaWikiArticleRequest::MediaWikiArticleRequest( wstring const & str,
}
}
void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const & str )
void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, std::u32string const & str )
{
qDebug( "MediaWiki: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() );
@ -710,7 +708,7 @@ void MediaWikiArticleRequest::requestFinished( QNetworkReply * r )
}
}
sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( wstring const & word, unsigned long maxResults )
sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( std::u32string const & word, unsigned long maxResults )
{
(void)maxResults;
@ -724,8 +722,10 @@ sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( wstring const & word
}
}
sptr< DataRequest >
MediaWikiDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool )
sptr< DataRequest > MediaWikiDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool )
{
if ( word.size() > 80 ) {

View file

@ -4,8 +4,7 @@
#include "programs.hh"
#include "audiolink.hh"
#include "htmlescape.hh"
#include "utf8.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "iconv.hh"
#include "utils.hh"
#include "globalbroadcaster.hh"
@ -36,11 +35,6 @@ public:
return prg.name.toUtf8().data();
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -51,16 +45,17 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const & word, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
protected:
void loadIcon() noexcept override;
};
sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( wstring const & word, unsigned long /*maxResults*/ )
sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( std::u32string const & word, unsigned long /*maxResults*/ )
{
if ( prg.type == Config::Program::PrefixMatch ) {
@ -75,8 +70,10 @@ sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( wstring const & word,
}
}
sptr< Dictionary::DataRequest >
ProgramsDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
sptr< Dictionary::DataRequest > ProgramsDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const &,
std::u32string const &,
bool )
{
switch ( prg.type ) {
@ -84,7 +81,7 @@ ProgramsDictionary::getArticle( wstring const & word, vector< wstring > const &,
// Audio results are instantaneous
string result;
string wordUtf8( Utf8::encode( word ) );
string wordUtf8( Text::toUtf8( word ) );
result += "<table class=\"programs_play\"><tr>";

View file

@ -6,14 +6,13 @@
#include <QProcess>
#include "dictionary.hh"
#include "config.hh"
#include "wstring.hh"
#include "text.hh"
/// Support for arbitrary programs.
namespace Programs {
using std::vector;
using std::string;
using gd::wstring;
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Programs const & );

View file

@ -9,7 +9,7 @@
#include "htmlescape.hh"
#include "langcoder.hh"
#include "sdict.hh"
#include "utf8.hh"
#include "text.hh"
#include <map>
#include <QAtomicInt>
#include <QDir>
@ -26,7 +26,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -113,11 +112,6 @@ public:
~SdictDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -138,8 +132,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override;
@ -421,8 +417,8 @@ SdictDictionary::getSearchResults( QString const & searchString, int searchMode,
class SdictArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
SdictDictionary & dict;
bool ignoreDiacritics;
@ -432,8 +428,8 @@ class SdictArticleRequest: public Dictionary::DataRequest
public:
SdictArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
SdictArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
SdictDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -477,13 +473,13 @@ void SdictArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -512,12 +508,12 @@ void SdictArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -537,7 +533,7 @@ void SdictArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += dict.isFromLanguageRTL() ? "<h3 dir=\"rtl\">" : "<h3>";
@ -566,9 +562,9 @@ void SdictArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > SdictDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > SdictDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -746,7 +742,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Insert new entry
indexedWords.addWord( Utf8::decode( string( data.data(), size ) ), articleOffset );
indexedWords.addWord( Text::toUtf32( string( data.data(), size ) ), articleOffset );
pos += el.nextWord;
}

View file

@ -6,7 +6,7 @@
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "decompress.hh"
#include "langcoder.hh"
#include "ftshelpers.hh"
@ -40,7 +40,6 @@ using std::vector;
using std::multimap;
using std::pair;
using std::set;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -610,12 +609,6 @@ public:
~SlobDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -636,8 +629,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -859,7 +854,7 @@ void SlobDictionary::loadResource( std::string & resourceName, string & data )
vector< WordArticleLink > link;
RefEntry entry;
link = resourceIndex.findArticles( Utf8::decode( resourceName ) );
link = resourceIndex.findArticles( Text::toUtf32( resourceName ) );
if ( link.empty() ) {
return;
@ -995,8 +990,8 @@ SlobDictionary::getSearchResults( QString const & searchString, int searchMode,
class SlobArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
SlobDictionary & dict;
bool ignoreDiacritics;
@ -1005,8 +1000,8 @@ class SlobArticleRequest: public Dictionary::DataRequest
public:
SlobArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
SlobArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
SlobDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -1051,13 +1046,13 @@ void SlobArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint64 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -1090,12 +1085,12 @@ void SlobArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -1111,7 +1106,7 @@ void SlobArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += R"(<div class="slobdict"><h3 class="slobdict_headword">)";
@ -1134,9 +1129,9 @@ void SlobArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > SlobDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > SlobDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{

View file

@ -3,13 +3,12 @@
#include "sounddir.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "btreeidx.hh"
#include "chunkedstorage.hh"
#include "filetype.hh"
#include "htmlescape.hh"
#include "audiolink.hh"
#include "wstring_qt.hh"
#include "utils.hh"
@ -21,7 +20,6 @@
namespace SoundDir {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
@ -75,12 +73,6 @@ public:
vector< string > const & dictionaryFiles,
QString const & iconFilename_ );
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.soundsCount;
@ -91,8 +83,10 @@ public:
return getArticleCount();
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -120,9 +114,9 @@ SoundDirDictionary::SoundDirDictionary( string const & id,
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
}
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics );
@ -136,13 +130,13 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
}
// maps to the chain number
multimap< wstring, unsigned > mainArticles, alternateArticles;
multimap< std::u32string, unsigned > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -157,12 +151,12 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, unsigned > & mapToUse =
multimap< std::u32string, unsigned > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( chain[ x ].word ), x ) );
@ -176,7 +170,7 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
string result;
multimap< wstring, uint32_t >::const_iterator i;
multimap< std::u32string, uint32_t >::const_iterator i;
string displayedName;
vector< char > chunk;
@ -405,11 +399,11 @@ void addDir( QDir const & baseDir,
const uint32_t articleOffset = chunks.startNewBlock();
chunks.addToBlock( fileName.c_str(), fileName.size() + 1 );
wstring name = i->fileName().toStdU32String();
std::u32string name = i->fileName().toStdU32String();
const wstring::size_type pos = name.rfind( L'.' );
const std::u32string::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos ) {
if ( pos != std::u32string::npos ) {
name.erase( pos );
}

View file

@ -16,7 +16,7 @@ Sources::Sources( QWidget * parent, Config::Class const & cfg ):
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT
chineseConversion( new ChineseConversion( this, cfg.transliteration.chinese ) ),
#endif
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
textToSpeechSource( nullptr ),
#endif
itemDelegate( new QItemDelegate( this ) ),
@ -129,7 +129,7 @@ Sources::Sources( QWidget * parent, Config::Class const & cfg ):
ui.forvoLanguageCodes->setText( forvo.languageCodes );
// Text to speech
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
if ( !cfg.notts ) {
textToSpeechSource = new TextToSpeechSource( this, cfg.voiceEngines );
ui.tabWidget->addTab( textToSpeechSource, QIcon( ":/icons/text2speech.svg" ), tr( "Text to Speech" ) );
@ -325,7 +325,7 @@ void Sources::on_removeProgram_clicked()
}
}
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
Config::VoiceEngines Sources::getVoiceEngines() const
{
if ( !textToSpeechSource )

View file

@ -295,7 +295,7 @@ public:
{
return programsModel.getCurrentPrograms();
}
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
Config::VoiceEngines getVoiceEngines() const;
#endif
Config::Hunspell getHunspell() const;
@ -317,7 +317,7 @@ private:
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT
ChineseConversion * chineseConversion;
#endif
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
TextToSpeechSource * textToSpeechSource;
#endif
QItemDelegate * itemDelegate;

View file

@ -4,7 +4,7 @@
#include "stardict.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "dictzip.hh"
#include "xdxf2html.hh"
@ -42,7 +42,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -122,7 +121,7 @@ class StardictDictionary: public BtreeIndexing::BtreeDictionary
File::Index idx;
IdxHeader idxHeader;
string sameTypeSequence;
ChunkedStorage::Reader chunks;
std::unique_ptr< ChunkedStorage::Reader > chunks;
QMutex dzMutex;
dictData * dz;
QMutex resourceZipMutex;
@ -134,11 +133,6 @@ public:
~StardictDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.wordCount;
@ -159,10 +153,12 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -215,12 +211,14 @@ StardictDictionary::StardictDictionary( string const & id,
string const & indexFile,
vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ),
chunks( idx, idxHeader.chunksOffset )
idx( indexFile, QIODevice::ReadOnly )
{
dictionaryName = loadString( idxHeader.bookNameSize );
// reading headers, note that reading order matters
idxHeader = idx.read< IdxHeader >();
dictionaryName = loadString( idxHeader.bookNameSize );
sameTypeSequence = loadString( idxHeader.sameTypeSequenceSize );
chunks = std::make_unique< ChunkedStorage::Reader >( idx, idxHeader.chunksOffset );
// Open the .dict file
DZ_ERRORS error;
@ -301,7 +299,7 @@ void StardictDictionary::getArticleProps( uint32_t articleAddress,
QMutexLocker _( &idxMutex );
char * articleData = chunks.getBlock( articleAddress, chunk );
char * articleData = chunks->getBlock( articleAddress, chunk );
memcpy( &offset, articleData, sizeof( uint32_t ) );
articleData += sizeof( uint32_t );
@ -1167,7 +1165,7 @@ sptr< Dictionary::DataRequest > StardictDictionary::getSearchResults( QString co
class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
{
wstring word;
std::u32string word;
StardictDictionary & dict;
QAtomicInt isCancelled;
@ -1175,7 +1173,7 @@ class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
public:
StardictHeadwordsRequest( wstring const & word_, StardictDictionary & dict_ ):
StardictHeadwordsRequest( std::u32string const & word_, StardictDictionary & dict_ ):
word( word_ ),
dict( dict_ )
{
@ -1210,7 +1208,7 @@ void StardictHeadwordsRequest::run()
//limited the synomys to at most 10 entries
vector< WordArticleLink > chain = dict.findArticles( word, false, 10 );
wstring caseFolded = Folding::applySimpleCaseOnly( word );
std::u32string caseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -1222,7 +1220,7 @@ void StardictHeadwordsRequest::run()
dict.loadArticle( x.articleOffset, headword, articleText );
wstring headwordDecoded = Utf8::decode( headword );
std::u32string headwordDecoded = Text::toUtf32( headword );
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) {
// The headword seems to differ from the input word, which makes the
@ -1240,7 +1238,7 @@ void StardictHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( std::u32string const & word )
{
return synonymSearchEnabled ? std::make_shared< StardictHeadwordsRequest >( word, *this ) :
Class::findHeadwordsForSynonym( word );
@ -1253,8 +1251,8 @@ sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynony
class StardictArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
StardictDictionary & dict;
bool ignoreDiacritics;
@ -1264,8 +1262,8 @@ class StardictArticleRequest: public Dictionary::DataRequest
public:
StardictArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
StardictArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
StardictDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -1315,13 +1313,13 @@ void StardictArticleRequest::run()
}
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -1348,12 +1346,12 @@ void StardictArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -1369,7 +1367,7 @@ void StardictArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner();
@ -1412,9 +1410,9 @@ void StardictArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > StardictDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > StardictDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -1572,7 +1570,7 @@ void StardictResourceRequest::run()
if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive
}
}
@ -1804,10 +1802,10 @@ static void handleIdxSynFile( string const & fileName,
// Insert new entry into an index
if ( parseHeadwords ) {
indexedWords.addWord( Utf8::decode( word ), offset );
indexedWords.addWord( Text::toUtf32( word ), offset );
}
else {
indexedWords.addSingleWord( Utf8::decode( word ), offset );
indexedWords.addSingleWord( Text::toUtf32( word ), offset );
}
}

View file

@ -7,7 +7,7 @@
#include <opencc/opencc.h>
#include "folding.hh"
#include "transliteration.hh"
#include "utf8.hh"
#include "text.hh"
namespace ChineseTranslit {
@ -27,7 +27,7 @@ public:
QString const & openccConfig );
~CharacterConversionDictionary();
std::vector< gd::wstring > getAlternateWritings( gd::wstring const & ) noexcept override;
std::vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept override;
};
CharacterConversionDictionary::CharacterConversionDictionary( std::string const & id,
@ -68,15 +68,15 @@ CharacterConversionDictionary::~CharacterConversionDictionary()
// #endif
}
std::vector< gd::wstring > CharacterConversionDictionary::getAlternateWritings( gd::wstring const & str ) noexcept
std::vector< std::u32string > CharacterConversionDictionary::getAlternateWritings( std::u32string const & str ) noexcept
{
std::vector< gd::wstring > results;
std::vector< std::u32string > results;
if ( converter != NULL ) {
gd::wstring folded = Folding::applySimpleCaseOnly( str );
std::string input = Utf8::encode( folded );
std::u32string folded = Folding::applySimpleCaseOnly( str );
std::string input = Text::toUtf8( folded );
std::string output;
gd::wstring result;
std::u32string result;
try {
// #ifdef Q_OS_MAC
@ -93,7 +93,7 @@ std::vector< gd::wstring > CharacterConversionDictionary::getAlternateWritings(
// #else
// output = converter->Convert( input );
// #endif
result = Utf8::decode( output );
result = Text::toUtf32( output );
}
catch ( std::exception & ex ) {
qWarning( "OpenCC: conversion failed %s", ex.what() );

View file

@ -2,12 +2,11 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "transliteration.hh"
#include "utf8.hh"
#include "text.hh"
#include "folding.hh"
namespace Transliteration {
using gd::wchar;
BaseTransliterationDictionary::BaseTransliterationDictionary( string const & id,
string const & name_,
@ -26,11 +25,6 @@ string BaseTransliterationDictionary::getName() noexcept
return name;
}
map< Dictionary::Property, string > BaseTransliterationDictionary::getProperties() noexcept
{
return map< Dictionary::Property, string >();
}
unsigned long BaseTransliterationDictionary::getArticleCount() noexcept
{
return 0;
@ -41,24 +35,28 @@ unsigned long BaseTransliterationDictionary::getWordCount() noexcept
return 0;
}
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( wstring const &, unsigned long )
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( std::u32string const &,
unsigned long )
{
return std::make_shared< Dictionary::WordSearchRequestInstant >();
}
sptr< Dictionary::DataRequest >
BaseTransliterationDictionary::getArticle( wstring const &, vector< wstring > const &, wstring const &, bool )
sptr< Dictionary::DataRequest > BaseTransliterationDictionary::getArticle( std::u32string const &,
vector< std::u32string > const &,
std::u32string const &,
bool )
{
return std::make_shared< Dictionary::DataRequestInstant >( false );
}
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwordsForSynonym( wstring const & str )
sptr< Dictionary::WordSearchRequest >
BaseTransliterationDictionary::findHeadwordsForSynonym( std::u32string const & str )
{
sptr< Dictionary::WordSearchRequestInstant > result = std::make_shared< Dictionary::WordSearchRequestInstant >();
vector< wstring > alts = getAlternateWritings( str );
vector< std::u32string > alts = getAlternateWritings( str );
qDebug( "alts = %u", (unsigned)alts.size() );
@ -72,13 +70,13 @@ sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwor
void Table::ins( char const * from, char const * to )
{
wstring fr = Utf8::decode( std::string( from ) );
std::u32string fr = Text::toUtf32( std::string( from ) );
if ( fr.size() > maxEntrySize ) {
maxEntrySize = fr.size();
}
insert( std::pair< wstring, wstring >( fr, Utf8::decode( std::string( to ) ) ) );
insert( std::pair< std::u32string, std::u32string >( fr, Text::toUtf32( std::string( to ) ) ) );
}
@ -89,12 +87,12 @@ TransliterationDictionary::TransliterationDictionary(
{
}
vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const & str ) noexcept
vector< std::u32string > TransliterationDictionary::getAlternateWritings( std::u32string const & str ) noexcept
{
vector< wstring > results;
vector< std::u32string > results;
wstring result, folded;
wstring const * target;
std::u32string result, folded;
std::u32string const * target;
if ( caseSensitive ) {
// Don't do any transform -- the transliteration is case-sensitive
@ -105,8 +103,8 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
target = &folded;
}
wchar const * ptr = target->c_str();
size_t left = target->size();
char32_t const * ptr = target->c_str();
size_t left = target->size();
Table::const_iterator i;
@ -115,7 +113,7 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
for ( x = table.getMaxEntrySize(); x >= 1; --x ) {
if ( left >= x ) {
i = table.find( wstring( ptr, x ) );
i = table.find( std::u32string( ptr, x ) );
if ( i != table.end() ) {
result.append( i->second );

View file

@ -9,7 +9,6 @@
namespace Transliteration {
using std::map;
using gd::wstring;
using std::string;
using std::vector;
@ -28,24 +27,22 @@ public:
virtual string getName() noexcept;
virtual map< Dictionary::Property, string > getProperties() noexcept;
virtual unsigned long getArticleCount() noexcept;
virtual unsigned long getWordCount() noexcept;
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept = 0;
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept = 0;
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & );
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( std::u32string const &, unsigned long );
virtual sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const &, wstring const &, bool );
getArticle( std::u32string const &, vector< std::u32string > const &, std::u32string const &, bool );
};
class Table: public map< wstring, wstring >
class Table: public map< std::u32string, std::u32string >
{
unsigned maxEntrySize;
@ -79,7 +76,7 @@ public:
TransliterationDictionary(
string const & id, string const & name, QIcon icon, Table const & table, bool caseSensitive = true );
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept;
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept;
};
} // namespace Transliteration

View file

@ -4,9 +4,8 @@
#include "indexedzip.hh"
#include "zipfile.hh"
#include <zlib.h>
#include "utf8.hh"
#include "text.hh"
#include "iconv.hh"
#include "wstring_qt.hh"
#include <QtCore5Compat/QTextCodec>
#include <QMutexLocker>
@ -23,7 +22,7 @@ bool IndexedZip::openZipFile( QString const & name )
return zipIsOpen;
}
bool IndexedZip::hasFile( gd::wstring const & name )
bool IndexedZip::hasFile( std::u32string const & name )
{
if ( !zipIsOpen ) {
return false;
@ -34,7 +33,7 @@ bool IndexedZip::hasFile( gd::wstring const & name )
return !links.empty();
}
bool IndexedZip::loadFile( gd::wstring const & name, vector< char > & data )
bool IndexedZip::loadFile( std::u32string const & name, vector< char > & data )
{
if ( !zipIsOpen ) {
return false;
@ -180,7 +179,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
if ( !hasNonAscii ) {
// Add entry as is
zipFileNames.addSingleWord( Utf8::decode( entry.fileName.data() ), entry.localHeaderOffset );
zipFileNames.addSingleWord( Text::toUtf32( entry.fileName.data() ), entry.localHeaderOffset );
if ( filesCount ) {
*filesCount += 1;
}
@ -192,7 +191,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// Utf8
try {
wstring decoded = Utf8::decode( entry.fileName.constData() );
std::u32string decoded = Text::toUtf32( entry.fileName.constData() );
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
if ( filesCount != 0 && !alreadyCounted ) {
@ -200,12 +199,12 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
alreadyCounted = true;
}
}
catch ( Utf8::exCantDecode & ) {
catch ( Text::exCantDecode & ) {
// Failed to decode
}
if ( !entry.fileNameInUTF8 ) {
wstring nameInSystemLocale;
std::u32string nameInSystemLocale;
// System locale
if ( localeCodec ) {
@ -224,7 +223,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// CP866
try {
wstring decoded = Iconv::toWstring( "CP866", entry.fileName.constData(), entry.fileName.size() );
std::u32string decoded = Iconv::toWstring( "CP866", entry.fileName.constData(), entry.fileName.size() );
if ( nameInSystemLocale != decoded ) {
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
@ -241,7 +240,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// CP1251
try {
wstring decoded = Iconv::toWstring( "CP1251", entry.fileName.constData(), entry.fileName.size() );
std::u32string decoded = Iconv::toWstring( "CP1251", entry.fileName.constData(), entry.fileName.size() );
if ( nameInSystemLocale != decoded ) {
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );

View file

@ -37,11 +37,11 @@ public:
/// Checks whether the given file exists in the zip file or not.
/// Note that this function is thread-safe, since it does not access zip file.
bool hasFile( gd::wstring const & name );
bool hasFile( std::u32string const & name );
/// Attempts loading the given file into the given vector. Returns true on
/// success, false otherwise.
bool loadFile( gd::wstring const & name, std::vector< char > & );
bool loadFile( std::u32string const & name, std::vector< char > & );
bool loadFile( uint32_t offset, std::vector< char > & );
/// Index compressed files in zip file

View file

@ -1,12 +1,11 @@
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
#include "voiceengines.hh"
#include "audiolink.hh"
#include "htmlescape.hh"
#include "utf8.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include <string>
#include <map>
@ -21,6 +20,7 @@ namespace VoiceEngines {
using namespace Dictionary;
using std::string;
using std::u32string;
using std::map;
inline string toMd5( QByteArray const & b )
@ -47,10 +47,6 @@ public:
return voiceEngine.name.toUtf8().data();
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
@ -62,16 +58,18 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( u32string const & word, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ) override;
protected:
void loadIcon() noexcept override;
};
sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ )
sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( u32string const & /*word*/,
unsigned long /*maxResults*/ )
{
WordSearchRequestInstant * sr = new WordSearchRequestInstant();
@ -80,11 +78,11 @@ sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( wstring const & /
}
sptr< Dictionary::DataRequest >
VoiceEnginesDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
VoiceEnginesDictionary::getArticle( u32string const & word, vector< u32string > const &, u32string const &, bool )
{
string result;
string wordUtf8( Utf8::encode( word ) );
string wordUtf8( Text::toUtf8( word ) );
result += "<table class=\"voiceengines_play\"><tr>";
@ -139,4 +137,4 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const
} // namespace VoiceEngines
#endif
#endif

View file

@ -1,20 +1,17 @@
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
#include "dictionary.hh"
#include "config.hh"
#include "wstring.hh"
#include "text.hh"
#include <QCryptographicHash>
namespace VoiceEngines {
using std::vector;
using std::string;
using gd::wstring;
vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const & voiceEngines );

View file

@ -2,8 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "website.hh"
#include "wstring_qt.hh"
#include "utf8.hh"
#include "text.hh"
#include <QUrl>
#include <QTextCodec>
#include <QDir>
@ -52,12 +51,6 @@ public:
dictionaryDescription = urlTemplate_;
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -68,10 +61,12 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const & word, unsigned long ) override;
sptr< DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const & context, bool ) override;
sptr< DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const & context,
bool ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -96,7 +91,7 @@ protected slots:
virtual void requestFinished( QNetworkReply * ) {}
};
sptr< WordSearchRequest > WebSiteDictionary::prefixMatch( wstring const & /*word*/, unsigned long )
sptr< WordSearchRequest > WebSiteDictionary::prefixMatch( std::u32string const & /*word*/, unsigned long )
{
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -314,9 +309,9 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
finish();
}
sptr< DataRequest > WebSiteDictionary::getArticle( wstring const & str,
vector< wstring > const & /*alts*/,
wstring const & context,
sptr< DataRequest > WebSiteDictionary::getArticle( std::u32string const & str,
vector< std::u32string > const & /*alts*/,
std::u32string const & context,
bool /*ignoreDiacritics*/ )
{
QString urlString = Utils::WebSite::urlReplaceWord( QString( urlTemplate ), QString::fromStdU32String( str ) );

View file

@ -4,7 +4,7 @@
#include "xdxf.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "dictzip.hh"
#include "htmlescape.hh"
@ -39,7 +39,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using std::vector;
using std::list;
@ -140,12 +139,6 @@ public:
~XdxfDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -166,8 +159,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -417,8 +412,8 @@ XdxfDictionary::getSearchResults( QString const & searchString, int searchMode,
class XdxfArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
XdxfDictionary & dict;
bool ignoreDiacritics;
@ -427,8 +422,8 @@ class XdxfArticleRequest: public Dictionary::DataRequest
public:
XdxfArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
XdxfArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
XdxfDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -473,13 +468,13 @@ void XdxfArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -508,12 +503,12 @@ void XdxfArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -533,7 +528,7 @@ void XdxfArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner();
@ -560,9 +555,9 @@ void XdxfArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > XdxfDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > XdxfDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -979,7 +974,7 @@ void XdxfResourceRequest::run()
if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive
}
}
@ -1200,7 +1195,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
else if ( stream.name() == u"abbreviations" ) {
QString s;
string value;
list< wstring > keys;
list< std::u32string > keys;
while ( !( stream.isEndElement() && stream.name() == u"abbreviations" ) && !stream.atEnd() ) {
if ( !stream.readNextStartElement() ) {
break;
@ -1216,7 +1211,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
s = readElementText( stream );
value = Folding::trimWhitespace( s ).toStdString();
for ( const auto & key : keys ) {
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
}
keys.clear();
}
@ -1236,7 +1231,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
s = readElementText( stream );
value = Folding::trimWhitespace( s ).toStdString();
for ( const auto & key : keys ) {
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
}
keys.clear();
}

View file

@ -3,8 +3,7 @@
#include "xdxf2html.hh"
#include <QtXml>
#include "utf8.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "folding.hh"
#include "audiolink.hh"
@ -442,7 +441,7 @@ string convert( string const & in,
if ( i != pAbrv->end() ) {
string title;
if ( Utf8::decode( i->second ).size() < 70 ) {
if ( Text::toUtf32( i->second ).size() < 70 ) {
// Replace all spaces with non-breakable ones, since that's how Lingvo shows tooltips
title.reserve( i->second.size() );
@ -466,7 +465,7 @@ string convert( string const & in,
else {
title = i->second;
}
el.setAttribute( "title", QString::fromStdU32String( Utf8::decode( title ) ) );
el.setAttribute( "title", QString::fromStdU32String( Text::toUtf32( title ) ) );
}
}
}
@ -628,7 +627,7 @@ string convert( string const & in,
// if( type == XDXF && dictPtr != NULL && !el.hasAttribute( "start" ) )
if ( dictPtr != NULL && !el.hasAttribute( "start" ) ) {
string filename = Utf8::encode( el.text().toStdU32String() );
string filename = Text::toUtf8( el.text().toStdU32String() );
if ( Filetype::isNameOfPicture( filename ) ) {
QUrl url;

View file

@ -6,7 +6,7 @@
#include "zim.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "langcoder.hh"
#include "filetype.hh"
#include "dictfile.hh"
@ -38,12 +38,12 @@
namespace Zim {
using std::string;
using std::u32string;
using std::map;
using std::vector;
using std::multimap;
using std::pair;
using std::set;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -161,11 +161,6 @@ public:
~ZimDictionary() = default;
map< Dictionary::Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -187,7 +182,7 @@ public:
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -524,8 +519,8 @@ ZimDictionary::getSearchResults( QString const & searchString, int searchMode, b
class ZimArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
u32string word;
vector< u32string > alts;
ZimDictionary & dict;
bool ignoreDiacritics;
@ -534,7 +529,10 @@ class ZimArticleRequest: public Dictionary::DataRequest
public:
ZimArticleRequest( wstring word_, vector< wstring > const & alts_, ZimDictionary & dict_, bool ignoreDiacritics_ ):
ZimArticleRequest( u32string word_,
vector< u32string > const & alts_,
ZimDictionary & dict_,
bool ignoreDiacritics_ ):
word( std::move( word_ ) ),
alts( alts_ ),
dict( dict_ ),
@ -576,13 +574,13 @@ void ZimArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -619,12 +617,12 @@ void ZimArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -643,7 +641,7 @@ void ZimArticleRequest::run()
// See Issue #271: A mechanism to clean-up invalid HTML cards.
string cleaner = Utils::Html::getHtmlCleaner();
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -671,9 +669,9 @@ void ZimArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > ZimDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > ZimDictionary::getArticle( u32string const & word,
vector< u32string > const & alts,
u32string const &,
bool ignoreDiacritics )
{
@ -771,7 +769,7 @@ sptr< Dictionary::DataRequest > ZimDictionary::getResource( string const & name
return std::make_shared< ZimResourceRequest >( *this, noLeadingDot.toStdString() );
}
wstring normalizeWord( const std::string & url );
u32string normalizeWord( const std::string & url );
vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames,
string const & indicesDir,
Dictionary::Initializing & initializing,
@ -854,7 +852,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( maxHeadwordsToExpand > 0 && ( articleCount >= maxHeadwordsToExpand ) ) {
if ( !title.empty() ) {
wstring word = Utf8::decode( title );
u32string word = Text::toUtf32( title );
indexedWords.addSingleWord( word, index );
}
else if ( !url.empty() ) {
@ -863,7 +861,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
else {
if ( !title.empty() ) {
auto word = Utf8::decode( title );
auto word = Text::toUtf32( title );
indexedWords.addWord( word, index );
wordCount++;
}
@ -908,7 +906,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
return dictionaries;
}
wstring normalizeWord( const std::string & url )
u32string normalizeWord( const std::string & url )
{
auto formattedUrl = QString::fromStdString( url ).remove( RX::Zim::leadingDotSlash );
return formattedUrl.toStdU32String();

View file

@ -4,7 +4,7 @@
#include "zipsounds.hh"
#include "dictfile.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "btreeidx.hh"
#include "audiolink.hh"
@ -24,7 +24,6 @@
namespace ZipSounds {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
@ -64,19 +63,19 @@ bool indexIsOldOrBad( string const & indexFile )
|| header.formatVersion != CurrentFormatVersion;
}
wstring stripExtension( string const & str )
std::u32string stripExtension( string const & str )
{
wstring name;
std::u32string name;
try {
name = Utf8::decode( str );
name = Text::toUtf32( str );
}
catch ( Utf8::exCantDecode & ) {
catch ( Text::exCantDecode & ) {
return name;
}
if ( Filetype::isNameOfSound( str ) ) {
wstring::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos ) {
std::u32string::size_type pos = name.rfind( L'.' );
if ( pos != std::u32string::npos ) {
name.erase( pos );
}
@ -107,10 +106,6 @@ public:
string getName() noexcept override;
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
@ -122,8 +117,10 @@ public:
return getArticleCount();
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -161,9 +158,9 @@ string ZipSoundsDictionary::getName() noexcept
return result;
}
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -177,13 +174,13 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, uint32_t > mainArticles, alternateArticles;
multimap< std::u32string, uint32_t > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -198,12 +195,12 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, uint32_t > & mapToUse =
multimap< std::u32string, uint32_t > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.articleOffset ) );
@ -217,7 +214,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
string result;
multimap< wstring, uint32_t >::const_iterator i;
multimap< std::u32string, uint32_t >::const_iterator i;
result += "<table class=\"lsa_play\">";
@ -248,7 +245,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
nameBlock += sz;
string displayedName =
mainArticles.size() + alternateArticles.size() > 1 ? name : Utf8::encode( stripExtension( name ) );
mainArticles.size() + alternateArticles.size() > 1 ? name : Text::toUtf8( stripExtension( name ) );
result += "<tr>";
@ -290,7 +287,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
nameBlock += sz;
string displayedName =
mainArticles.size() + alternateArticles.size() > 1 ? name : Utf8::encode( stripExtension( name ) );
mainArticles.size() + alternateArticles.size() > 1 ? name : Text::toUtf8( stripExtension( name ) );
result += "<tr>";
@ -320,7 +317,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getResource( string const &
{
// Remove extension for sound files (like in sound dirs)
wstring strippedName = stripExtension( name );
std::u32string strippedName = stripExtension( name );
vector< WordArticleLink > chain = findArticles( strippedName );
@ -434,7 +431,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Remove extension for sound files (like in sound dirs)
wstring word = stripExtension( link.word );
std::u32string word = stripExtension( link.word );
if ( !word.empty() ) {
names.addWord( word, offset );
}

View file

@ -5,7 +5,6 @@
#include <cstdlib>
#include "fulltextsearch.hh"
#include "ftshelpers.hh"
#include "wstring_qt.hh"
#include "dictfile.hh"
#include "folding.hh"
#include "utils.hh"

View file

@ -7,7 +7,6 @@
#include "btreeidx.hh"
#include "fulltextsearch.hh"
#include "folding.hh"
#include "wstring_qt.hh"
namespace FtsHelpers {
@ -44,7 +43,7 @@ public:
{
if ( ignoreDiacritics_ )
searchString =
QString::fromStdU32String( Folding::applyDiacriticsOnly( gd::removeTrailingZero( searchString_ ) ) );
QString::fromStdU32String( Folding::applyDiacriticsOnly( Text::removeTrailingZero( searchString_ ) ) );
foundHeadwords = new QList< FTS::FtsHeadword >;
results = 0;

View file

@ -1,5 +1,4 @@
#include "headwordsmodel.hh"
#include "wstring_qt.hh"
HeadwordListModel::HeadwordListModel( QObject * parent ):
QAbstractListModel( parent ),
@ -67,7 +66,7 @@ void HeadwordListModel::setFilter( const QRegularExpression & reg )
}
}
filterWords.clear();
auto sr = _dict->prefixMatch( gd::removeTrailingZero( reg.pattern() ), maxFilterResults );
auto sr = _dict->prefixMatch( Text::removeTrailingZero( reg.pattern() ), maxFilterResults );
connect( sr.get(), &Dictionary::Request::finished, this, &HeadwordListModel::requestFinished, Qt::QueuedConnection );
queuedRequests.push_back( sr );
}

View file

@ -3,7 +3,7 @@
#include "langcoder.hh"
#include "language.hh"
#include "utf8.hh"
#include "text.hh"
#include <QFileInfo>
#include <QLocale>
@ -226,9 +226,9 @@ QString LangCoder::intToCode2( quint32 val )
return QString::fromLatin1( ba );
}
quint32 LangCoder::findIdForLanguage( gd::wstring const & lang )
quint32 LangCoder::findIdForLanguage( std::u32string const & lang )
{
const auto langFolded = QByteArrayView( Utf8::encode( lang ) );
const auto langFolded = QByteArrayView( Text::toUtf8( lang ) );
for ( auto const & lc : LANG_CODE_MAP ) {
if ( langFolded.compare( lc.lang, Qt::CaseInsensitive ) == 0 ) {

View file

@ -2,7 +2,7 @@
#include <QString>
#include <QIcon>
#include "wstring.hh"
#include "text.hh"
struct GDLangCode
{
@ -34,7 +34,7 @@ public:
/// Finds the id for the given language name, written in english. The search
/// is case- and punctuation insensitive.
static quint32 findIdForLanguage( gd::wstring const & );
static quint32 findIdForLanguage( std::u32string const & );
static quint32 findIdForLanguageCode3( std::string const & );

View file

@ -465,7 +465,7 @@ BabylonLang getBabylonLangByIndex( int index )
return BabylonDb[ index ];
}
quint32 findBlgLangIDByEnglishName( gd::wstring const & lang )
quint32 findBlgLangIDByEnglishName( std::u32string const & lang )
{
QString enName = QString::fromStdU32String( lang );
for ( const auto & idx : BabylonDb ) {

View file

@ -4,7 +4,6 @@
#pragma once
#include <QString>
#include "wstring_qt.hh"
/// Language-specific stuff - codes, names, ids etc.
namespace Language {
@ -47,5 +46,5 @@ struct BabylonLang
const char * localizedName;
};
BabylonLang getBabylonLangByIndex( int index );
quint32 findBlgLangIDByEnglishName( gd::wstring const & lang );
quint32 findBlgLangIDByEnglishName( std::u32string const & lang );
} // namespace Language

View file

@ -510,7 +510,7 @@ int main( int argc, char ** argv )
if ( gdcl.notts ) {
cfg.notts = true;
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
cfg.voiceEngines.clear();
#endif
}

View file

@ -1,4 +1,4 @@
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
#include "speechclient.hh"

View file

@ -1,5 +1,5 @@
#pragma once
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
#include <QObject>
#include "config.hh"

View file

@ -15,7 +15,6 @@
<file>qt-lingvo.css</file>
<file>qt-modern.css</file>
<file>qt-style-win.css</file>
<file>qt-style.css</file>
<file>article-style-darkmode.css</file>
</qresource>
</RCC>

View file

@ -1,6 +1,6 @@
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
#include "texttospeechsource.hh"
#include <QVariant>

View file

@ -3,7 +3,7 @@
#pragma once
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
#include "ui_texttospeechsource.h"
#include "config.hh"

View file

@ -10,7 +10,6 @@
#include "utils.hh"
#include "webmultimediadownload.hh"
#include "wildcard.hh"
#include "wstring_qt.hh"
#include <QBuffer>
#include <QClipboard>
#include <QCryptographicHash>
@ -1048,7 +1047,7 @@ void ArticleView::openLink( QUrl const & url, QUrl const & ref, QString const &
QMessageBox::critical( this, "GoldenDict", tr( "The referenced audio program doesn't exist." ) );
}
else if ( url.scheme() == "gdtts" ) {
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
// Text to speech
QString md5Id = Utils::Url::queryItemValue( url, "engine" );
QString text( url.path().mid( 1 ) );

View file

@ -173,7 +173,7 @@ bool EditDictionaries::isSourcesChanged() const
|| sources.getLingua() != cfg.lingua || sources.getForvo() != cfg.forvo || sources.getMediaWikis() != cfg.mediawikis
|| sources.getWebSites() != cfg.webSites || sources.getDictServers() != cfg.dictServers
|| sources.getPrograms() != cfg.programs
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
|| sources.getVoiceEngines() != cfg.voiceEngines
#endif
;
@ -197,7 +197,7 @@ void EditDictionaries::acceptChangedSources( bool rebuildGroups )
cfg.webSites = sources.getWebSites();
cfg.dictServers = sources.getDictServers();
cfg.programs = sources.getPrograms();
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
cfg.voiceEngines = sources.getVoiceEngines();
#endif
setUpdatesEnabled( false );

View file

@ -3,7 +3,7 @@
#include <Qt>
#include <QScopeGuard>
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
#include "dict/epwing_book.hh"
#endif
@ -209,7 +209,7 @@ MainWindow::MainWindow( Config::Class & cfg_ ):
+ " GoldenDict/WebEngine" );
}
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
Epwing::initialize();
#endif
@ -1173,7 +1173,7 @@ MainWindow::~MainWindow()
scanPopup = nullptr;
}
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
Epwing::finalize();
#endif
}
@ -1374,10 +1374,8 @@ void MainWindow::updateAppearances( QString const & addonStyle,
}
#endif
QFile builtInCssFile( ":qt-style.css" );
builtInCssFile.open( QFile::ReadOnly );
QByteArray css = builtInCssFile.readAll();
QByteArray css{};
#if defined( Q_OS_WIN )
QFile winCssFile( ":qt-style-win.css" );
winCssFile.open( QFile::ReadOnly );

View file

@ -185,6 +185,7 @@ Preferences::Preferences( QWidget * parent, Config::Class & cfg_ ):
ui.doubleClickTranslates->setChecked( p.doubleClickTranslates );
ui.selectBySingleClick->setChecked( p.selectWordBySingleClick );
ui.autoScrollToTargetArticle->setChecked( p.autoScrollToTargetArticle );
ui.targetArticleAtFirst->setChecked( p.targetArticleAtFirst );
ui.escKeyHidesMainWindow->setChecked( p.escKeyHidesMainWindow );
ui.darkMode->addItem( tr( "On" ), QVariant::fromValue( Config::Dark::On ) );
@ -390,7 +391,7 @@ Preferences::Preferences( QWidget * parent, Config::Class & cfg_ ):
#ifndef MAKE_ZIM_SUPPORT
ui.allowZim->hide();
#endif
#ifdef NO_EPWING_SUPPORT
#ifndef EPWING_SUPPORT
ui.allowEpwing->hide();
#endif
ui.maxDictionarySize->setValue( p.fts.maxDictionarySize );
@ -441,6 +442,7 @@ Config::Preferences Preferences::getPreferences()
p.doubleClickTranslates = ui.doubleClickTranslates->isChecked();
p.selectWordBySingleClick = ui.selectBySingleClick->isChecked();
p.autoScrollToTargetArticle = ui.autoScrollToTargetArticle->isChecked();
p.targetArticleAtFirst = ui.targetArticleAtFirst->isChecked();
p.escKeyHidesMainWindow = ui.escKeyHidesMainWindow->isChecked();
p.darkMode = ui.darkMode->currentData().value< Config::Dark >();

View file

@ -169,6 +169,16 @@ however, the article from the topmost dictionary is shown.</string>
</property>
</widget>
</item>
<item row="4" column="1">
<widget class="QCheckBox" name="targetArticleAtFirst">
<property name="text">
<string>Place the target article at the first place.</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QGroupBox" name="enableTrayIcon">
<property name="toolTip">

View file

@ -8,8 +8,8 @@ const QLatin1String flags = QLatin1String(
#ifdef MAKE_ZIM_SUPPORT
" MAKE_ZIM_SUPPORT"
#endif
#ifdef NO_EPWING_SUPPORT
" NO_EPWING_SUPPORT"
#ifdef EPWING_SUPPORT
" EPWING_SUPPORT"
#endif
#ifdef USE_ICONV
" USE_ICONV"
@ -17,8 +17,8 @@ const QLatin1String flags = QLatin1String(
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT
" MAKE_CHINESE_CONVERSION_SUPPORT"
#endif
#ifdef NO_TTS_SUPPORT
" NO_TTS_SUPPORT"
#ifdef TTS_SUPPORT
" TTS_SUPPORT"
#endif
#ifndef MAKE_FFMPEG_PLAYER
" no_ffmpeg_player"

View file

@ -3,14 +3,11 @@
#include "wordfinder.hh"
#include "folding.hh"
#include "wstring_qt.hh"
#include <map>
using std::vector;
using std::list;
using gd::wstring;
using gd::wchar;
using std::map;
using std::pair;
@ -134,7 +131,7 @@ void WordFinder::startSearch()
allWordWritings[ 0 ] = inputWord.toStdU32String();
for ( const auto & inputDict : *inputDicts ) {
vector< wstring > writings = inputDict->getAlternateWritings( allWordWritings[ 0 ] );
vector< std::u32string > writings = inputDict->getAlternateWritings( allWordWritings[ 0 ] );
allWordWritings.insert( allWordWritings.end(), writings.begin(), writings.end() );
}
@ -255,7 +252,9 @@ unsigned saturated( unsigned x )
/// both sides by either whitespace, punctuation or begin/end of string.
/// If true is returned, pos holds the offset in the haystack. If the offset
/// is larger than 255, it is set to 255.
bool hasSurroundedWithWs( wstring const & haystack, wstring const & needle, wstring::size_type & pos )
bool hasSurroundedWithWs( std::u32string const & haystack,
std::u32string const & needle,
std::u32string::size_type & pos )
{
if ( haystack.size() < needle.size() ) {
return false; // Needle won't even fit into a haystack
@ -264,7 +263,7 @@ bool hasSurroundedWithWs( wstring const & haystack, wstring const & needle, wstr
for ( pos = 0;; ++pos ) {
pos = haystack.find( needle, pos );
if ( pos == wstring::npos ) {
if ( pos == std::u32string::npos ) {
return false; // Not found
}
@ -290,13 +289,13 @@ void WordFinder::updateResults()
updateResultsTimer.stop(); // Can happen when we were done before it'd expire
}
wstring original = Folding::applySimpleCaseOnly( allWordWritings[ 0 ] );
std::u32string original = Folding::applySimpleCaseOnly( allWordWritings[ 0 ] );
for ( auto i = finishedRequests.begin(); i != finishedRequests.end(); ) {
for ( size_t count = ( *i )->matchesCount(), x = 0; x < count; ++x ) {
wstring match = ( **i )[ x ].word;
int weight = ( **i )[ x ].weight;
wstring lowerCased = Folding::applySimpleCaseOnly( match );
std::u32string match = ( **i )[ x ].word;
int weight = ( **i )[ x ].weight;
std::u32string lowerCased = Folding::applySimpleCaseOnly( match );
if ( searchType == ExpressionMatch ) {
unsigned ws;
@ -320,7 +319,7 @@ void WordFinder::updateResults()
weight = ws;
}
auto insertResult =
resultsIndex.insert( pair< wstring, ResultsArray::iterator >( lowerCased, resultsArray.end() ) );
resultsIndex.insert( pair< std::u32string, ResultsArray::iterator >( lowerCased, resultsArray.end() ) );
if ( !insertResult.second ) {
// Wasn't inserted since there was already an item -- check the case
@ -369,16 +368,16 @@ void WordFinder::updateResults()
};
for ( const auto & allWordWriting : allWordWritings ) {
wstring target = Folding::applySimpleCaseOnly( allWordWriting );
wstring targetNoFullCase = Folding::applyFullCaseOnly( target );
wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia );
wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
std::u32string target = Folding::applySimpleCaseOnly( allWordWriting );
std::u32string targetNoFullCase = Folding::applyFullCaseOnly( target );
std::u32string targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
std::u32string targetNoPunct = Folding::applyPunctOnly( targetNoDia );
std::u32string targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
wstring::size_type matchPos = 0;
std::u32string::size_type matchPos = 0;
for ( const auto & i : resultsIndex ) {
wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
std::u32string resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
int rank;
@ -441,14 +440,14 @@ void WordFinder::updateResults()
// only the first one, storing it in rank. Then we sort the results using
// SortByRankAndLength.
for ( const auto & allWordWriting : allWordWritings ) {
wstring target = Folding::apply( allWordWriting );
std::u32string target = Folding::apply( allWordWriting );
for ( const auto & i : resultsIndex ) {
wstring resultFolded = Folding::apply( i.first );
std::u32string resultFolded = Folding::apply( i.first );
int charsInCommon = 0;
for ( wchar const *t = target.c_str(), *r = resultFolded.c_str(); *t && *t == *r;
for ( char32_t const *t = target.c_str(), *r = resultFolded.c_str(); *t && *t == *r;
++t, ++r, ++charsInCommon ) {
;
}

View file

@ -48,11 +48,11 @@ private:
std::vector< sptr< Dictionary::Class > > const * inputDicts;
std::vector< gd::wstring > allWordWritings; // All writings of the inputWord
std::vector< std::u32string > allWordWritings; // All writings of the inputWord
struct OneResult
{
gd::wstring word;
std::u32string word;
int rank;
bool wasSuggested;
};
@ -60,7 +60,7 @@ private:
// Maps lowercased string to the original one. This catches all duplicates
// without case sensitivity. Made as an array and a map indexing that array.
using ResultsArray = std::list< OneResult >;
using ResultsIndex = std::map< gd::wstring, ResultsArray::iterator >;
using ResultsIndex = std::map< std::u32string, ResultsArray::iterator >;
ResultsArray resultsArray;
ResultsIndex resultsIndex;