Compare commits

...

12 commits

Author SHA1 Message Date
xiaoyifang 0dc58685ac
Merge 15b918eb6a into 0c42c300e1 2024-11-23 09:14:56 -05:00
shenleban tongying 0c42c300e1
Merge pull request #1987 from shenlebantongying/refactor/gd-text
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
refactor: use standard string types and merge wstring(-qt)/utf8/ namespaces to Text
2024-11-23 08:26:21 -05:00
shenleban tongying 1471bc3926 ignore last commit 2024-11-23 08:19:03 -05:00
shenleban tongying f1e158578f refactor: use standard string types and merge string namespaces to Text 2024-11-23 08:15:43 -05:00
shenleban tongying abeacef13d
clean: delete unused and empty builtin qt-style.css for macOS/Linux
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
2024-11-23 08:41:00 +00:00
atauzki 1fb1c5c9de
feat: auto dark reader mode for Windows
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
2024-11-22 22:47:19 -05:00
shenleban tongying 5406b3022a
dev: generally improve cmake build script
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
2024-11-22 16:50:55 -05:00
shenleban tongying f446ad358f clean: delete Dictionary::getProperties which is unused since 2009 2024-11-22 14:51:36 -05:00
shenleban tongying 3c5b76f77a
fix startdict index file reading caused by wrong order of reading
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
partially revert  https://github.com/xiaoyifang/goldendict-ng/pull/1972
2024-11-22 04:53:10 +00:00
autofix-ci[bot] 15b918eb6a
[autofix.ci] apply automated fixes 2024-11-08 01:47:29 +00:00
xiaoyifang 27cbb7351b opt: add option about 2024-11-06 13:35:22 +08:00
xiaoyifang c787a08d2f opt: add option about 2024-11-06 12:07:23 +08:00
85 changed files with 1306 additions and 1495 deletions

View file

@ -21,3 +21,6 @@ c8af0450f1f7f8188004db96e3f53e7e33e2ccad
# remove gddebug.hh and associated functions # remove gddebug.hh and associated functions
76aaed116bdc3aeb53fd61553aedb877baf9b510 76aaed116bdc3aeb53fd61553aedb877baf9b510
# wstring & wchar -> std::u32string & char32_t
f1e158578f62c96059bef1a616b75495adb6e2c6

View file

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.25) # ubuntu 23.04 Fedora 36 cmake_minimum_required(VERSION 3.25) # Debian 11 Ubuntu 24.04 Fedora 36
option(WITH_FFMPEG_PLAYER "Enable support for FFMPEG player" ON) option(WITH_FFMPEG_PLAYER "Enable support for FFMPEG player" ON)
option(WITH_EPWING_SUPPORT "Enable epwing support" ON) option(WITH_EPWING_SUPPORT "Enable epwing support" ON)
@ -9,20 +9,12 @@ option(WITH_TTS "enable QTexttoSpeech support" OFF)
option(USE_SYSTEM_FMT "use system fmt instead of bundled one" OFF) option(USE_SYSTEM_FMT "use system fmt instead of bundled one" OFF)
option(USE_SYSTEM_TOML "use system toml++ instead of bundled one" OFF) option(USE_SYSTEM_TOML "use system toml++ instead of bundled one" OFF)
option(WITH_VCPKG_BREAKPAD "build with Breakpad support for VCPKG build only" OFF) ## This should be avoided because of small regressions, as some scripts and icons themes assume the binary name and resources folder to be `goldendict`
option(USE_ALTERNATIVE_NAME "For Linux, change the binary name and resource folder to goldendict-ng to parallel install with the original GD" OFF)
## Change binary & resources folder to parallel install with original GD.
## This flag should be avoided because it leads to small regressions:
## 1. There are personal scripts assuming the binary name to be "goldendict" -> require everyone to change the name in their script
## 2. There are icon themes that assuming the icon name to be "goldendict" -> invalidate the GD icon when using a icon theme
## 3. There are dictionary packages that install files to "/usr/share/goldendict/content" -> nullify the auto dict discovery
option(USE_ALTERNATIVE_NAME "Force the name goldendict-ng " OFF)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}") # to put staff in the ./cmake folder
# vcpkg handling code, must be placed before project() # vcpkg handling code, must be placed before project()
if (WIN32) if (WIN32)
option(WITH_VCPKG_BREAKPAD "build with Breakpad support for VCPKG build only" OFF)
if (DEFINED CMAKE_TOOLCHAIN_FILE) if (DEFINED CMAKE_TOOLCHAIN_FILE)
message(STATUS "Using toolchain file: ${CMAKE_TOOLCHAIN_FILE}") message(STATUS "Using toolchain file: ${CMAKE_TOOLCHAIN_FILE}")
else () else ()
@ -37,11 +29,9 @@ if (WIN32)
set(VCPKG_MANIFEST_MODE OFF CACHE BOOL "disable existing manifest mode caused by the existrance of vcpkg.json" FORCE) set(VCPKG_MANIFEST_MODE OFF CACHE BOOL "disable existing manifest mode caused by the existrance of vcpkg.json" FORCE)
set(CMAKE_TOOLCHAIN_FILE "${CMAKE_BINARY_DIR}/_deps/vcpkg-export-src/scripts/buildsystems/vcpkg.cmake") set(CMAKE_TOOLCHAIN_FILE "${CMAKE_BINARY_DIR}/_deps/vcpkg-export-src/scripts/buildsystems/vcpkg.cmake")
endif () endif ()
endif () if (WITH_VCPKG_BREAKPAD)
list(APPEND VCPKG_MANIFEST_FEATURES "breakpad")
endif ()
if (WITH_VCPKG_BREAKPAD)
list(APPEND VCPKG_MANIFEST_FEATURES "breakpad")
endif () endif ()
include(FeatureSummary) include(FeatureSummary)
@ -49,7 +39,7 @@ include(FeatureSummary)
project(goldendict-ng project(goldendict-ng
VERSION 24.11.0 VERSION 24.11.0
LANGUAGES CXX C) LANGUAGES CXX C)
if (APPLE) if (APPLE)
enable_language(OBJCXX) enable_language(OBJCXX)
set(CMAKE_OBJCXX_STANDARD 17) set(CMAKE_OBJCXX_STANDARD 17)
@ -60,13 +50,12 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(GOLDENDICT "goldendict") # binary/executable name set(GOLDENDICT "goldendict") # binary/executable name
if (USE_ALTERNATIVE_NAME ) if (USE_ALTERNATIVE_NAME)
set(GOLDENDICT "goldendict-ng") set(GOLDENDICT "goldendict-ng")
endif () endif ()
if (APPLE) if (APPLE)
set(GOLDENDICT "GoldenDict-ng") set(GOLDENDICT "GoldenDict-ng")
endif() endif ()
#### Qt #### Qt
@ -78,11 +67,10 @@ endif ()
find_package(Qt6 REQUIRED COMPONENTS ${GD_QT_COMPONENTS}) find_package(Qt6 REQUIRED COMPONENTS ${GD_QT_COMPONENTS})
qt_standard_project_setup() # availiable after find_package(Qt6 .... Core qt_standard_project_setup()
set(CMAKE_AUTORCC ON) # not included in the qt_standard_project_setup set(CMAKE_AUTORCC ON) # not included in the qt_standard_project_setup
#### Things required during configuration #### Things required during configuration
block() # generate version.txt block() # generate version.txt
string(TIMESTAMP build_time UTC) string(TIMESTAMP build_time UTC)
find_package(Git) find_package(Git)
@ -163,11 +151,8 @@ target_link_libraries(${GOLDENDICT} PRIVATE
Qt6::WebEngineWidgets Qt6::WebEngineWidgets
Qt6::Widgets Qt6::Widgets
Qt6::Svg Qt6::Svg
) $<$<BOOL:${WITH_TTS}>:Qt6::TextToSpeech>
)
if (WITH_TTS)
target_link_libraries(${GOLDENDICT} PRIVATE Qt6::TextToSpeech)
endif ()
target_include_directories(${GOLDENDICT} PRIVATE target_include_directories(${GOLDENDICT} PRIVATE
${PROJECT_SOURCE_DIR}/thirdparty/qtsingleapplication/src ${PROJECT_SOURCE_DIR}/thirdparty/qtsingleapplication/src
@ -176,11 +161,7 @@ target_include_directories(${GOLDENDICT} PRIVATE
${PROJECT_SOURCE_DIR}/src/dict ${PROJECT_SOURCE_DIR}/src/dict
${PROJECT_SOURCE_DIR}/src/dict/utils ${PROJECT_SOURCE_DIR}/src/dict/utils
${PROJECT_SOURCE_DIR}/src/ui ${PROJECT_SOURCE_DIR}/src/ui
) )
if (WIN32)
target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/src/windows)
endif ()
if (NOT USE_SYSTEM_TOML) if (NOT USE_SYSTEM_TOML)
target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/thirdparty/tomlplusplus) target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/thirdparty/tomlplusplus)
@ -199,45 +180,22 @@ target_compile_definitions(${GOLDENDICT} PRIVATE
) )
target_compile_definitions(${GOLDENDICT} PUBLIC target_compile_definitions(${GOLDENDICT} PUBLIC
CMAKE_USED_HACK # temporal hack to avoid breaking qmake build
MAKE_QTMULTIMEDIA_PLAYER MAKE_QTMULTIMEDIA_PLAYER
MAKE_CHINESE_CONVERSION_SUPPORT MAKE_CHINESE_CONVERSION_SUPPORT
) $<$<BOOL:${WIN32}>:__WIN32>
$<$<BOOL:${WITH_FFMPEG_PLAYER}>:MAKE_FFMPEG_PLAYER>
if (WIN32) $<$<BOOL:${WITH_TTS}>:TTS_SUPPORT>
target_compile_definitions(${GOLDENDICT} PUBLIC $<$<BOOL:${WITH_EPWING_SUPPORT}>:EPWING_SUPPORT>
__WIN32 $<$<BOOL:${WITH_ZIM}>:MAKE_ZIM_SUPPORT>
INCLUDE_LIBRARY_PATH $<$<BOOL:${WITH_VCPKG_BREAKPAD}>:USE_BREAKPAD>
) )
endif ()
if (WITH_FFMPEG_PLAYER)
target_compile_definitions(${GOLDENDICT} PUBLIC MAKE_FFMPEG_PLAYER)
endif ()
if(NOT WITH_TTS)
target_compile_definitions(${GOLDENDICT} PUBLIC NO_TTS_SUPPORT)
endif()
if (NOT WITH_EPWING_SUPPORT)
target_compile_definitions(${GOLDENDICT} PUBLIC NO_EPWING_SUPPORT)
endif ()
if (WITH_ZIM)
target_compile_definitions(${GOLDENDICT} PUBLIC MAKE_ZIM_SUPPORT)
endif ()
if (WITH_VCPKG_BREAKPAD)
target_compile_definitions(${GOLDENDICT} PUBLIC USE_BREAKPAD)
endif ()
#### libraries linking && includes for Win or Unix #### libraries linking && includes for Win or Unix
if (WIN32) if (WIN32)
include(Deps_Vcpkg) include(cmake/Deps_Vcpkg.cmake)
else () else ()
include(Deps_Unix) include(cmake/Deps_Unix.cmake)
endif () endif ()
#### add translations #### add translations
@ -261,156 +219,11 @@ add_dependencies(${GOLDENDICT} "release_translations")
#### installation or assemble redistribution #### installation or assemble redistribution
if (APPLE) if (APPLE)
set(PLIST_FILE "${CMAKE_BINARY_DIR}/info_generated.plist") include(cmake/Package_macOS.cmake)
configure_file("${CMAKE_SOURCE_DIR}/redist/mac_info_plist_template_cmake.plist" "${PLIST_FILE}" @ONLY) elseif (LINUX OR BSD)
include(cmake/Package_Linux.cmake)
set_target_properties(${GOLDENDICT} PROPERTIES elseif (WIN32)
MACOSX_BUNDLE TRUE include(cmake/Package_Windows.cmake)
MACOSX_BUNDLE_INFO_PLIST "${PLIST_FILE}"
)
set(Assembling_Dir "${CMAKE_BINARY_DIR}/redist")
set(App_Name "${GOLDENDICT}.app")
set(Redistributable_APP "${Assembling_Dir}/${App_Name}")
# if anything wrong, delete this and affect lines, and see what's Qt will generate by default.
set(QtConfPath "${Redistributable_APP}/Contents/Resources/qt.conf")
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "
set(QT_DEPLOY_PREFIX \"${Redistributable_APP}\")
set(QT_DEPLOY_TRANSLATIONS_DIR \"Contents/Resources/translations\")
qt_deploy_runtime_dependencies(
EXECUTABLE \"${Redistributable_APP}\"
ADDITIONAL_LIBRARIES ${BREW_ICU_ADDITIONAL_DYLIBS}
GENERATE_QT_CONF
NO_APP_STORE_COMPLIANCE)
qt_deploy_translations()
qt_deploy_qt_conf(\"${QtConfPath}\"
PLUGINS_DIR PlugIns
TRANSLATIONS_DIR Resources/translations)
"
)
install(TARGETS ${GOLDENDICT} BUNDLE DESTINATION "${Assembling_Dir}")
install(FILES ${qm_files} DESTINATION "${Redistributable_APP}/Contents/MacOS/locale")
if (IS_READABLE "/opt/homebrew/share/opencc/")
set(OPENCC_DATA_PATH "/opt/homebrew/share/opencc/" CACHE PATH "opencc's data path")
elseif (IS_READABLE "/usr/local/share/opencc/")
set(OPENCC_DATA_PATH "/usr/local/share/opencc/" CACHE PATH "opencc's data path")
else ()
message(FATAL_ERROR "Cannot find opencc's data folder!")
endif ()
file(REAL_PATH "${OPENCC_DATA_PATH}" OPENCC_DATA_PATH_FOR_REAL)
message(STATUS "OPENCC data is found -> ${OPENCC_DATA_PATH_FOR_REAL}")
install(DIRECTORY "${OPENCC_DATA_PATH_FOR_REAL}" DESTINATION "${Redistributable_APP}/Contents/MacOS")
install(SCRIPT ${deploy_script})
install(CODE "execute_process(COMMAND codesign --force --deep -s - ${Redistributable_APP})")
find_program(CREATE-DMG "create-dmg")
if (CREATE-DMG)
install(CODE "
execute_process(COMMAND ${CREATE-DMG} \
--skip-jenkins \
--format \"ULMO\"
--volname ${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${CMAKE_SYSTEM_PROCESSOR} \
--volicon ${CMAKE_SOURCE_DIR}/icons/macicon.icns \
--icon \"${App_Name}\" 100 100
--app-drop-link 300 100 \
\"GoldenDict-ng-${CMAKE_PROJECT_VERSION}-Qt${Qt6_VERSION}-macOS-${CMAKE_SYSTEM_PROCESSOR}.dmg\" \
\"${Assembling_Dir}\")"
)
else ()
message(WARNING "create-dmg not found. No .dmg will be created")
endif ()
endif ()
if (LINUX OR BSD)
install(TARGETS ${GOLDENDICT})
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop DESTINATION share/applications)
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.metainfo.xml DESTINATION share/metainfo)
if (NOT USE_ALTERNATIVE_NAME)
# see: config.cc -> getProgramDataDir
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps)
install(FILES ${qm_files} DESTINATION share/goldendict/locale)
else ()
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict-ng")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps
RENAME goldendict-ng.png)
install(FILES ${qm_files} DESTINATION share/goldendict-ng/locale)
block() # patch the desktop file to adapt the binary & icon file's name change
file(READ "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" DESKTOP_FILE_CONTENT)
string(REGEX REPLACE "\nIcon=goldendict\n" "\nIcon=goldendict-ng\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
string(REGEX REPLACE "\nExec=goldendict %u\n" "\nExec=goldendict-ng %u\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
file(WRITE "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" "${DESKTOP_FILE_CONTENT}")
endblock()
endif ()
endif ()
if (WIN32)
set_target_properties(${GOLDENDICT}
PROPERTIES
WIN32_EXECUTABLE TRUE
RUNTIME_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
LIBRARY_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
)
set(CMAKE_INSTALL_PREFIX "${GD_WIN_OUTPUT_DIR}" CACHE PATH "If you see this message, don't change this unless you want look into CMake build script. If you are an expert, yes, this is wrong. Help welcomed." FORCE)
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "qt_deploy_runtime_dependencies(
EXECUTABLE \"${CMAKE_INSTALL_PREFIX}/goldendict.exe\"
BIN_DIR .
LIB_DIR .
)"
)
install(SCRIPT ${deploy_script})
install(DIRECTORY "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/share/opencc" DESTINATION .)
# TODO: do we really need to carry a copy of openSSL?
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libssl-3-x64.dll" DESTINATION .)
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libcrypto-3-x64.dll" DESTINATION .)
# trick CPack to make the output folder as NSIS installer
install(DIRECTORY "${GD_WIN_OUTPUT_DIR}/"
DESTINATION .
FILES_MATCHING
PATTERN "*"
PATTERN "*.pdb" EXCLUDE
PATTERN "*.ilk" EXCLUDE)
set(CPACK_PACKAGE_FILE_NAME "GoldenDict-ng-${PROJECT_VERSION}-Qt${Qt6Widgets_VERSION}")
set(CPACK_GENERATOR "7Z;NSIS64")
# override the default install path, which is $PROGRAMFILES64\${project-name} ${project-version} in NSIS
set(CPACK_PACKAGE_INSTALL_DIRECTORY "GoldenDict-ng")
# NSIS specificS
set(CPACK_NSIS_MANIFEST_DPI_AWARE ON)
set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/icons/programicon.ico")
set(CPACK_NSIS_PACKAGE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_DISPLAY_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_URL_INFO_ABOUT [=[https://xiaoyifang.github.io/goldendict-ng/]=])
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
set(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\GoldenDict-ng.lnk' '$INSTDIR\\\\${GOLDENDICT}.exe'")
set(CPACK_NSIS_DELETE_ICONS_EXTRA "Delete '$SMPROGRAMS\\\\$START_MENU\\\\GoldenDict-ng.lnk'")
include(CPack)
endif () endif ()
feature_summary(WHAT ALL DESCRIPTION "Build configuration:") feature_summary(WHAT ALL DESCRIPTION "Build configuration:")

View file

@ -1,5 +1,4 @@
#### Various workarounds #### Various workarounds
if (APPLE) if (APPLE)
# old & new homebrew's include paths # old & new homebrew's include paths
target_include_directories(${GOLDENDICT} PRIVATE /usr/local/include /opt/homebrew/include) target_include_directories(${GOLDENDICT} PRIVATE /usr/local/include /opt/homebrew/include)
@ -29,25 +28,20 @@ endif ()
##### Finding packages from package manager ##### Finding packages from package manager
find_package(PkgConfig REQUIRED) find_package(PkgConfig REQUIRED)
find_package(ZLIB REQUIRED)
find_package(BZip2 REQUIRED) find_package(BZip2 REQUIRED)
# Import all PkgConfig dependencies as one
# Consider all PkgConfig dependencies as one pkg_check_modules(DEPS REQUIRED IMPORTED_TARGET
pkg_check_modules(PKGCONFIG_DEPS IMPORTED_TARGET
hunspell hunspell
liblzma
lzo2 lzo2
opencc opencc
vorbis # .ogg vorbis # .ogg
vorbisfile vorbisfile
liblzma
xapian-core xapian-core
zlib
) )
target_link_libraries(${GOLDENDICT} PRIVATE target_link_libraries(${GOLDENDICT} PRIVATE PkgConfig::DEPS BZip2::BZip2)
PkgConfig::PKGCONFIG_DEPS
BZip2::BZip2
ZLIB::ZLIB
)
# On FreeBSD, there are two iconv, libc iconv & GNU libiconv. # On FreeBSD, there are two iconv, libc iconv & GNU libiconv.
# The system one is good enough, the following is a workaround to use libc iconv on freeBSD. # The system one is good enough, the following is a workaround to use libc iconv on freeBSD.
@ -88,7 +82,7 @@ if (WITH_ZIM)
COMMAND_ERROR_IS_FATAL ANY) COMMAND_ERROR_IS_FATAL ANY)
message(STATUS "Found correct homebrew icu path -> ${ICU_REQUIRED_BY_ZIM_PREFIX}") message(STATUS "Found correct homebrew icu path -> ${ICU_REQUIRED_BY_ZIM_PREFIX}")
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/pkgconfig") set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/pkgconfig")
message(STATUS "Updated pkg_config_path -> $ENV{PKG_CONFIG_PATH}:${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/pkgconfig") message(STATUS "Updated pkg_config_path -> $ENV{PKG_CONFIG_PATH}")
# icu4c as transitive dependency of libzim may not be automatically copied into app bundle # icu4c as transitive dependency of libzim may not be automatically copied into app bundle
# so we manually discover the icu4c from homebrew, then find the relevent dylibs # so we manually discover the icu4c from homebrew, then find the relevent dylibs

22
cmake/Package_Linux.cmake Normal file
View file

@ -0,0 +1,22 @@
install(TARGETS ${GOLDENDICT})
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop DESTINATION share/applications)
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.metainfo.xml DESTINATION share/metainfo)
if (NOT USE_ALTERNATIVE_NAME)
# see: config.cc -> getProgramDataDir
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps)
install(FILES ${qm_files} DESTINATION share/goldendict/locale)
else ()
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict-ng")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps
RENAME goldendict-ng.png)
install(FILES ${qm_files} DESTINATION share/goldendict-ng/locale)
block() # patch the desktop file to adapt the binary & icon file's name change
file(READ "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" DESKTOP_FILE_CONTENT)
string(REGEX REPLACE "\nIcon=goldendict\n" "\nIcon=goldendict-ng\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
string(REGEX REPLACE "\nExec=goldendict %u\n" "\nExec=goldendict-ng %u\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
file(WRITE "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" "${DESKTOP_FILE_CONTENT}")
endblock()
endif ()

View file

@ -0,0 +1,55 @@
set_target_properties(${GOLDENDICT}
PROPERTIES
WIN32_EXECUTABLE TRUE
RUNTIME_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
LIBRARY_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
)
# TODO: this breaks "Multi-Config" build systems like VisualStudio.
set(CMAKE_INSTALL_PREFIX "${GD_WIN_OUTPUT_DIR}" CACHE PATH "If you see this message, don't change this unless you want look into CMake build script. If you are an expert, yes, this is wrong. Help welcomed." FORCE)
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "qt_deploy_runtime_dependencies(
EXECUTABLE \"${CMAKE_INSTALL_PREFIX}/goldendict.exe\"
BIN_DIR .
LIB_DIR .
)"
)
install(SCRIPT ${deploy_script})
install(DIRECTORY "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/share/opencc" DESTINATION .)
# Note: This is runtime dependency that aren't copied automatically
# See Qt's network -> SSDL documentation https://doc.qt.io/qt-6/ssl.html#considerations-while-packaging-your-application
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libssl-3-x64.dll" DESTINATION .)
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libcrypto-3-x64.dll" DESTINATION .)
# trick CPack to make the output folder as NSIS installer
install(DIRECTORY "${GD_WIN_OUTPUT_DIR}/"
DESTINATION .
FILES_MATCHING
PATTERN "*"
PATTERN "*.pdb" EXCLUDE
PATTERN "*.ilk" EXCLUDE)
set(CPACK_PACKAGE_FILE_NAME "GoldenDict-ng-${PROJECT_VERSION}-Qt${Qt6Widgets_VERSION}")
set(CPACK_GENERATOR "7Z;NSIS64")
# override the default install path, which is $PROGRAMFILES64\${project-name} ${project-version} in NSIS
set(CPACK_PACKAGE_INSTALL_DIRECTORY "GoldenDict-ng")
# NSIS specificS
set(CPACK_NSIS_MANIFEST_DPI_AWARE ON)
set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/icons/programicon.ico")
set(CPACK_NSIS_PACKAGE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_DISPLAY_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
# Copied from https://crascit.com/2015/08/07/cmake_cpack_nsis_shortcuts_with_parameters/
set(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\GoldenDict-ng.lnk' '$INSTDIR\\\\${GOLDENDICT}.exe'")
set(CPACK_NSIS_DELETE_ICONS_EXTRA "Delete '$SMPROGRAMS\\\\$START_MENU\\\\GoldenDict-ng.lnk'")
include(CPack)

69
cmake/Package_macOS.cmake Normal file
View file

@ -0,0 +1,69 @@
set(PLIST_FILE "${CMAKE_BINARY_DIR}/info_generated.plist")
configure_file("${CMAKE_SOURCE_DIR}/redist/mac_info_plist_template_cmake.plist" "${PLIST_FILE}" @ONLY)
set_target_properties(${GOLDENDICT} PROPERTIES
MACOSX_BUNDLE TRUE
MACOSX_BUNDLE_INFO_PLIST "${PLIST_FILE}"
)
set(Assembling_Dir "${CMAKE_BINARY_DIR}/redist")
set(App_Name "${GOLDENDICT}.app")
set(Redistributable_APP "${Assembling_Dir}/${App_Name}")
# if anything wrong, delete this and affect lines, and see what's Qt will generate by default.
set(QtConfPath "${Redistributable_APP}/Contents/Resources/qt.conf")
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "
set(QT_DEPLOY_PREFIX \"${Redistributable_APP}\")
set(QT_DEPLOY_TRANSLATIONS_DIR \"Contents/Resources/translations\")
qt_deploy_runtime_dependencies(
EXECUTABLE \"${Redistributable_APP}\"
ADDITIONAL_LIBRARIES ${BREW_ICU_ADDITIONAL_DYLIBS}
GENERATE_QT_CONF
NO_APP_STORE_COMPLIANCE)
qt_deploy_translations()
qt_deploy_qt_conf(\"${QtConfPath}\"
PLUGINS_DIR PlugIns
TRANSLATIONS_DIR Resources/translations)
"
)
install(TARGETS ${GOLDENDICT} BUNDLE DESTINATION "${Assembling_Dir}")
install(FILES ${qm_files} DESTINATION "${Redistributable_APP}/Contents/MacOS/locale")
if (IS_READABLE "/opt/homebrew/share/opencc/")
set(OPENCC_DATA_PATH "/opt/homebrew/share/opencc/" CACHE PATH "opencc's data path")
elseif (IS_READABLE "/usr/local/share/opencc/")
set(OPENCC_DATA_PATH "/usr/local/share/opencc/" CACHE PATH "opencc's data path")
else ()
message(FATAL_ERROR "Cannot find opencc's data folder!")
endif ()
file(REAL_PATH "${OPENCC_DATA_PATH}" OPENCC_DATA_PATH_FOR_REAL)
message(STATUS "OPENCC data is found -> ${OPENCC_DATA_PATH_FOR_REAL}")
install(DIRECTORY "${OPENCC_DATA_PATH_FOR_REAL}" DESTINATION "${Redistributable_APP}/Contents/MacOS")
install(SCRIPT ${deploy_script})
install(CODE "execute_process(COMMAND codesign --force --deep -s - ${Redistributable_APP})")
find_program(CREATE-DMG "create-dmg")
if (CREATE-DMG)
install(CODE "
execute_process(COMMAND ${CREATE-DMG} \
--skip-jenkins \
--format \"ULMO\"
--volname ${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${CMAKE_SYSTEM_PROCESSOR} \
--volicon ${CMAKE_SOURCE_DIR}/icons/macicon.icns \
--icon \"${App_Name}\" 100 100
--app-drop-link 300 100 \
\"GoldenDict-ng-${CMAKE_PROJECT_VERSION}-Qt${Qt6_VERSION}-macOS-${CMAKE_SYSTEM_PROCESSOR}.dmg\" \
\"${Assembling_Dir}\")"
)
else ()
message(WARNING "create-dmg not found. No .dmg will be created")
endif ()

View file

@ -9,7 +9,6 @@
#include "htmlescape.hh" #include "htmlescape.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "utils.hh" #include "utils.hh"
#include "wstring_qt.hh"
#include <QDir> #include <QDir>
#include <QFile> #include <QFile>
#include <QTextDocumentFragment> #include <QTextDocumentFragment>
@ -21,7 +20,6 @@
using std::vector; using std::vector;
using std::string; using std::string;
using gd::wstring;
using std::set; using std::set;
using std::list; using std::list;
@ -161,7 +159,10 @@ std::string ArticleMaker::makeHtmlHeader( QString const & word, QString const &
#if QT_VERSION >= QT_VERSION_CHECK( 6, 5, 0 ) #if QT_VERSION >= QT_VERSION_CHECK( 6, 5, 0 )
if ( GlobalBroadcaster::instance()->getPreference()->darkReaderMode == Config::Dark::Auto if ( GlobalBroadcaster::instance()->getPreference()->darkReaderMode == Config::Dark::Auto
&& QGuiApplication::styleHints()->colorScheme() == Qt::ColorScheme::Dark ) { #if !defined( Q_OS_WINDOWS ) // not properly works on Windows.
&& QGuiApplication::styleHints()->colorScheme() == Qt::ColorScheme::Dark
#endif
&& GlobalBroadcaster::instance()->getPreference()->darkMode == Config::Dark::On ) {
darkReaderModeEnabled = true; darkReaderModeEnabled = true;
} }
#endif #endif
@ -481,7 +482,7 @@ ArticleRequest::ArticleRequest( QString const & word,
// Accumulate main forms // Accumulate main forms
for ( const auto & activeDict : activeDicts ) { for ( const auto & activeDict : activeDicts ) {
auto const s = activeDict->findHeadwordsForSynonym( gd::removeTrailingZero( word ) ); auto const s = activeDict->findHeadwordsForSynonym( Text::removeTrailingZero( word ) );
connect( s.get(), &Dictionary::Request::finished, this, &ArticleRequest::altSearchFinished, Qt::QueuedConnection ); connect( s.get(), &Dictionary::Request::finished, this, &ArticleRequest::altSearchFinished, Qt::QueuedConnection );
@ -518,9 +519,9 @@ void ArticleRequest::altSearchFinished()
altsDone = true; // So any pending signals in queued mode won't mess us up altsDone = true; // So any pending signals in queued mode won't mess us up
vector< wstring > altsVector( alts.begin(), alts.end() ); vector< std::u32string > altsVector( alts.begin(), alts.end() );
wstring wordStd = word.toStdU32String(); std::u32string wordStd = word.toStdU32String();
if ( activeDicts.size() <= 1 ) { if ( activeDicts.size() <= 1 ) {
articleSizeLimit = -1; // Don't collapse article if only one dictionary presented articleSizeLimit = -1; // Don't collapse article if only one dictionary presented
@ -531,7 +532,7 @@ void ArticleRequest::altSearchFinished()
sptr< Dictionary::DataRequest > r = activeDict->getArticle( sptr< Dictionary::DataRequest > r = activeDict->getArticle(
wordStd, wordStd,
altsVector, altsVector,
gd::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ), Text::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
ignoreDiacritics ); ignoreDiacritics );
connect( r.get(), &Dictionary::Request::finished, this, &ArticleRequest::bodyFinished, Qt::QueuedConnection ); connect( r.get(), &Dictionary::Request::finished, this, &ArticleRequest::bodyFinished, Qt::QueuedConnection );
@ -1005,7 +1006,7 @@ void ArticleRequest::individualWordFinished()
WordFinder::SearchResults const & results = stemmedWordFinder->getResults(); WordFinder::SearchResults const & results = stemmedWordFinder->getResults();
if ( results.size() ) { if ( results.size() ) {
wstring source = Folding::applySimpleCaseOnly( currentSplittedWordCompound ); std::u32string source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );
bool hadSomething = false; bool hadSomething = false;
@ -1019,7 +1020,7 @@ void ArticleRequest::individualWordFinished()
// Prefix match found. Check if the aliases are acceptable. // Prefix match found. Check if the aliases are acceptable.
wstring result( Folding::applySimpleCaseOnly( results[ x ].first ) ); std::u32string result( Folding::applySimpleCaseOnly( results[ x ].first ) );
if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 ) { if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 ) {
// The resulting string begins with the source one // The resulting string begins with the source one

View file

@ -88,7 +88,7 @@ class ArticleRequest: public Dictionary::DataRequest
QMap< QString, QString > contexts; QMap< QString, QString > contexts;
std::vector< sptr< Dictionary::Class > > activeDicts; std::vector< sptr< Dictionary::Class > > activeDicts;
std::set< gd::wstring, std::less<> > alts; // Accumulated main forms std::set< std::u32string, std::less<> > alts; // Accumulated main forms
std::list< sptr< Dictionary::WordSearchRequest > > altSearches; std::list< sptr< Dictionary::WordSearchRequest > > altSearches;
std::list< sptr< Dictionary::DataRequest > > bodyRequests; std::list< sptr< Dictionary::DataRequest > > bodyRequests;
bool altsDone{ false }; bool altsDone{ false };

View file

@ -2,7 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "filetype.hh" #include "filetype.hh"
#include "utf8.hh" #include "text.hh"
#include <ctype.h> #include <ctype.h>
namespace Filetype { namespace Filetype {
@ -26,13 +26,13 @@ string simplifyString( string const & str, bool lowercase )
size_t beginPos = 0; size_t beginPos = 0;
while ( beginPos < str.size() && Utf8::isspace( str[ beginPos ] ) ) { while ( beginPos < str.size() && Text::isspace( str[ beginPos ] ) ) {
++beginPos; ++beginPos;
} }
size_t endPos = str.size(); size_t endPos = str.size();
while ( endPos && Utf8::isspace( str[ endPos - 1 ] ) ) { while ( endPos && Text::isspace( str[ endPos - 1 ] ) ) {
--endPos; --endPos;
} }

View file

@ -3,7 +3,7 @@
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "globalregex.hh" #include "globalregex.hh"
#include "inc_case_folding.hh" #include "inc_case_folding.hh"
@ -13,12 +13,12 @@ namespace Folding {
/// caught by the diacritics folding table, but they are only handled there /// caught by the diacritics folding table, but they are only handled there
/// when they come with their main characters, not by themselves. The rest /// when they come with their main characters, not by themselves. The rest
/// are caught here. /// are caught here.
bool isCombiningMark( wchar ch ) bool isCombiningMark( char32_t ch )
{ {
return QChar::isMark( ch ); return QChar::isMark( ch );
} }
wstring apply( wstring const & in, bool preserveWildcards ) std::u32string apply( std::u32string const & in, bool preserveWildcards )
{ {
// remove diacritics (normalization), white space, punt, // remove diacritics (normalization), white space, punt,
auto temp = QString::fromStdU32String( in ) auto temp = QString::fromStdU32String( in )
@ -32,7 +32,7 @@ wstring apply( wstring const & in, bool preserveWildcards )
// case folding // case folding
std::u32string caseFolded; std::u32string caseFolded;
caseFolded.reserve( temp.size() ); caseFolded.reserve( temp.size() );
wchar buf[ foldCaseMaxOut ]; char32_t buf[ foldCaseMaxOut ];
for ( const char32_t ch : temp ) { for ( const char32_t ch : temp ) {
auto n = foldCase( ch, buf ); auto n = foldCase( ch, buf );
caseFolded.append( buf, n ); caseFolded.append( buf, n );
@ -40,11 +40,11 @@ wstring apply( wstring const & in, bool preserveWildcards )
return caseFolded; return caseFolded;
} }
wstring applySimpleCaseOnly( wstring const & in ) std::u32string applySimpleCaseOnly( std::u32string const & in )
{ {
wchar const * nextChar = in.data(); char32_t const * nextChar = in.data();
wstring out; std::u32string out;
out.reserve( in.size() ); out.reserve( in.size() );
@ -55,27 +55,27 @@ wstring applySimpleCaseOnly( wstring const & in )
return out; return out;
} }
wstring applySimpleCaseOnly( QString const & in ) std::u32string applySimpleCaseOnly( QString const & in )
{ {
//qt only support simple case folding. //qt only support simple case folding.
return in.toCaseFolded().toStdU32String(); return in.toCaseFolded().toStdU32String();
} }
wstring applySimpleCaseOnly( std::string const & in ) std::u32string applySimpleCaseOnly( std::string const & in )
{ {
return applySimpleCaseOnly( Utf8::decode( in ) ); return applySimpleCaseOnly( Text::toUtf32( in ) );
// return QString::fromStdString( in ).toCaseFolded().toStdU32String(); // return QString::fromStdString( in ).toCaseFolded().toStdU32String();
} }
wstring applyFullCaseOnly( wstring const & in ) std::u32string applyFullCaseOnly( std::u32string const & in )
{ {
wstring caseFolded; std::u32string caseFolded;
caseFolded.reserve( in.size() * foldCaseMaxOut ); caseFolded.reserve( in.size() * foldCaseMaxOut );
wchar const * nextChar = in.data(); char32_t const * nextChar = in.data();
wchar buf[ foldCaseMaxOut ]; char32_t buf[ foldCaseMaxOut ];
for ( size_t left = in.size(); left--; ) { for ( size_t left = in.size(); left--; ) {
caseFolded.append( buf, foldCase( *nextChar++, buf ) ); caseFolded.append( buf, foldCase( *nextChar++, buf ) );
@ -84,17 +84,17 @@ wstring applyFullCaseOnly( wstring const & in )
return caseFolded; return caseFolded;
} }
wstring applyDiacriticsOnly( wstring const & in ) std::u32string applyDiacriticsOnly( std::u32string const & in )
{ {
auto noAccent = QString::fromStdU32String( in ).normalized( QString::NormalizationForm_KD ).remove( RX::accentMark ); auto noAccent = QString::fromStdU32String( in ).normalized( QString::NormalizationForm_KD ).remove( RX::accentMark );
return noAccent.toStdU32String(); return noAccent.toStdU32String();
} }
wstring applyPunctOnly( wstring const & in ) std::u32string applyPunctOnly( std::u32string const & in )
{ {
wchar const * nextChar = in.data(); char32_t const * nextChar = in.data();
wstring out; std::u32string out;
out.reserve( in.size() ); out.reserve( in.size() );
@ -119,11 +119,11 @@ QString applyPunctOnly( QString const & in )
return out; return out;
} }
wstring applyWhitespaceOnly( wstring const & in ) std::u32string applyWhitespaceOnly( std::u32string const & in )
{ {
wchar const * nextChar = in.data(); char32_t const * nextChar = in.data();
wstring out; std::u32string out;
out.reserve( in.size() ); out.reserve( in.size() );
@ -136,11 +136,11 @@ wstring applyWhitespaceOnly( wstring const & in )
return out; return out;
} }
wstring applyWhitespaceAndPunctOnly( wstring const & in ) std::u32string applyWhitespaceAndPunctOnly( std::u32string const & in )
{ {
wchar const * nextChar = in.data(); char32_t const * nextChar = in.data();
wstring out; std::u32string out;
out.reserve( in.size() ); out.reserve( in.size() );
@ -153,26 +153,26 @@ wstring applyWhitespaceAndPunctOnly( wstring const & in )
return out; return out;
} }
bool isWhitespace( wchar ch ) bool isWhitespace( char32_t ch )
{ {
//invisible character should be treated as whitespace as well. //invisible character should be treated as whitespace as well.
return QChar::isSpace( ch ) || !QChar::isPrint( ch ); return QChar::isSpace( ch ) || !QChar::isPrint( ch );
} }
bool isWhitespaceOrPunct( wchar ch ) bool isWhitespaceOrPunct( char32_t ch )
{ {
return isWhitespace( ch ) || QChar::isPunct( ch ); return isWhitespace( ch ) || QChar::isPunct( ch );
} }
bool isPunct( wchar ch ) bool isPunct( char32_t ch )
{ {
return QChar::isPunct( ch ); return QChar::isPunct( ch );
} }
wstring trimWhitespaceOrPunct( wstring const & in ) std::u32string trimWhitespaceOrPunct( std::u32string const & in )
{ {
wchar const * wordBegin = in.c_str(); char32_t const * wordBegin = in.c_str();
wstring::size_type wordSize = in.size(); std::u32string::size_type wordSize = in.size();
// Skip any leading whitespace // Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespaceOrPunct( *wordBegin ) ) { while ( *wordBegin && Folding::isWhitespaceOrPunct( *wordBegin ) ) {
@ -185,7 +185,7 @@ wstring trimWhitespaceOrPunct( wstring const & in )
--wordSize; --wordSize;
} }
return wstring( wordBegin, wordSize ); return std::u32string( wordBegin, wordSize );
} }
QString trimWhitespaceOrPunct( QString const & in ) QString trimWhitespaceOrPunct( QString const & in )
@ -209,13 +209,13 @@ QString trimWhitespaceOrPunct( QString const & in )
return in.mid( wordBegin, wordSize ); return in.mid( wordBegin, wordSize );
} }
wstring trimWhitespace( wstring const & in ) std::u32string trimWhitespace( std::u32string const & in )
{ {
if ( in.empty() ) { if ( in.empty() ) {
return in; return in;
} }
wchar const * wordBegin = in.c_str(); char32_t const * wordBegin = in.c_str();
wstring::size_type wordSize = in.size(); std::u32string::size_type wordSize = in.size();
// Skip any leading whitespace // Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) { while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
@ -228,7 +228,7 @@ wstring trimWhitespace( wstring const & in )
--wordSize; --wordSize;
} }
return wstring( wordBegin, wordSize ); return std::u32string( wordBegin, wordSize );
} }
QString trimWhitespace( QString const & in ) QString trimWhitespace( QString const & in )

View file

@ -3,7 +3,7 @@
#pragma once #pragma once
#include "wstring.hh" #include "text.hh"
#include <QString> #include <QString>
/// Folding provides means to translate several possible ways to write a /// Folding provides means to translate several possible ways to write a
@ -17,8 +17,6 @@
namespace Folding { namespace Folding {
using gd::wstring;
using gd::wchar;
/// The algorithm's version. /// The algorithm's version.
enum { enum {
@ -27,48 +25,48 @@ enum {
/// Applies the folding algorithm to each character in the given string, /// Applies the folding algorithm to each character in the given string,
/// making another one as a result. /// making another one as a result.
wstring apply( wstring const &, bool preserveWildcards = false ); std::u32string apply( std::u32string const &, bool preserveWildcards = false );
/// Applies only simple case folding algorithm. Since many dictionaries have /// Applies only simple case folding algorithm. Since many dictionaries have
/// different case style, we interpret words differing only by case as synonyms. /// different case style, we interpret words differing only by case as synonyms.
wstring applySimpleCaseOnly( wstring const & ); std::u32string applySimpleCaseOnly( std::u32string const & );
wstring applySimpleCaseOnly( QString const & in ); std::u32string applySimpleCaseOnly( QString const & in );
wstring applySimpleCaseOnly( std::string const & in ); std::u32string applySimpleCaseOnly( std::string const & in );
/// Applies only full case folding algorithm. This includes simple case, but also /// Applies only full case folding algorithm. This includes simple case, but also
/// decomposing ligatures and complex letters. /// decomposing ligatures and complex letters.
wstring applyFullCaseOnly( wstring const & ); std::u32string applyFullCaseOnly( std::u32string const & );
/// Applies only diacritics folding algorithm. /// Applies only diacritics folding algorithm.
wstring applyDiacriticsOnly( wstring const & ); std::u32string applyDiacriticsOnly( std::u32string const & );
/// Applies only punctuation folding algorithm. /// Applies only punctuation folding algorithm.
wstring applyPunctOnly( wstring const & ); std::u32string applyPunctOnly( std::u32string const & );
QString applyPunctOnly( QString const & in ); QString applyPunctOnly( QString const & in );
/// Applies only whitespace folding algorithm. /// Applies only whitespace folding algorithm.
wstring applyWhitespaceOnly( wstring const & ); std::u32string applyWhitespaceOnly( std::u32string const & );
/// Applies only whitespace&punctuation folding algorithm. /// Applies only whitespace&punctuation folding algorithm.
wstring applyWhitespaceAndPunctOnly( wstring const & ); std::u32string applyWhitespaceAndPunctOnly( std::u32string const & );
/// Returns true if the given character is any form of whitespace, false /// Returns true if the given character is any form of whitespace, false
/// otherwise. Whitespace corresponds to Zl/Zp/Zs Unicode classes, and also /// otherwise. Whitespace corresponds to Zl/Zp/Zs Unicode classes, and also
/// includes \n, \r and \t. /// includes \n, \r and \t.
bool isWhitespace( wchar ch ); bool isWhitespace( char32_t ch );
bool isWhitespaceOrPunct( wchar ch ); bool isWhitespaceOrPunct( char32_t ch );
/// Returns true if the given character is any form of punctuation, false /// Returns true if the given character is any form of punctuation, false
/// otherwise. Punctuation corresponds to Pc/Pd/Pe/Pf/Pi/Po/Ps classes. /// otherwise. Punctuation corresponds to Pc/Pd/Pe/Pf/Pi/Po/Ps classes.
bool isPunct( wchar ch ); bool isPunct( char32_t ch );
/// Removes any whitespace or punctuation from the beginning and the end of /// Removes any whitespace or punctuation from the beginning and the end of
/// the word. /// the word.
wstring trimWhitespaceOrPunct( wstring const & ); std::u32string trimWhitespaceOrPunct( std::u32string const & );
QString trimWhitespaceOrPunct( QString const & in ); QString trimWhitespaceOrPunct( QString const & in );
/// Removes any whitespace from the beginning and the end of /// Removes any whitespace from the beginning and the end of
/// the word. /// the word.
wstring trimWhitespace( wstring const & ); std::u32string trimWhitespace( std::u32string const & );
QString trimWhitespace( QString const & in ); QString trimWhitespace( QString const & in );
/// Same as apply( wstring ), but without any heap operations, therefore /// Same as apply( wstring ), but without any heap operations, therefore
@ -86,6 +84,6 @@ QString unescapeWildcardSymbols( QString const & );
QString escapeWildcardSymbols( QString const & ); QString escapeWildcardSymbols( QString const & );
/// Tests if the given char is one of the Unicode combining marks. /// Tests if the given char is one of the Unicode combining marks.
bool isCombiningMark( wchar ch ); bool isCombiningMark( char32_t ch );
} // namespace Folding } // namespace Folding

View file

@ -5,7 +5,6 @@
#include <vector> #include <vector>
#include <errno.h> #include <errno.h>
#include <string.h> #include <string.h>
#include "wstring_qt.hh"
char const * const Iconv::GdWchar = "UTF-32LE"; char const * const Iconv::GdWchar = "UTF-32LE";
char const * const Iconv::Utf16Le = "UTF-16LE"; char const * const Iconv::Utf16Le = "UTF-16LE";
@ -80,7 +79,7 @@ QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
return QString::fromUtf8( &outBuf.front(), datasize ); return QString::fromUtf8( &outBuf.front(), datasize );
} }
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize ) std::u32string Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
{ {
/// Special-case the dataSize == 0 to avoid any kind of iconv-specific /// Special-case the dataSize == 0 to avoid any kind of iconv-specific

View file

@ -5,7 +5,7 @@
#include <QString> #include <QString>
#include "wstring.hh" #include "text.hh"
#include "ex.hh" #include "ex.hh"
#include <iconv.h> #include <iconv.h>
@ -35,7 +35,7 @@ public:
QString convert( void const *& inBuf, size_t & inBytesLeft ); QString convert( void const *& inBuf, size_t & inBytesLeft );
// Converts a given block of data from the given encoding to a wide string. // Converts a given block of data from the given encoding to a wide string.
static gd::wstring toWstring( char const * fromEncoding, void const * fromData, size_t dataSize ); static std::u32string toWstring( char const * fromEncoding, void const * fromData, size_t dataSize );
// Converts a given block of data from the given encoding to an utf8-encoded // Converts a given block of data from the given encoding to an utf8-encoded
// string. // string.

View file

@ -1,15 +1,21 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org> /* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "utf8.hh" #include "text.hh"
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <QByteArray> #include <QByteArray>
#include <QString> #include <QString>
#include <QList>
namespace Utf8 { namespace Text {
size_t encode( wchar const * in, size_t inSize, char * out_ )
/// Encodes the given UTF-32 into UTF-8. The inSize specifies the number
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
/// at least inSize * 4 bytes long. The function returns the number of chars
/// stored in the 'out' buffer. The result is not 0-terminated.
size_t encode( char32_t const * in, size_t inSize, char * out_ )
{ {
unsigned char * out = (unsigned char *)out_; unsigned char * out = (unsigned char *)out_;
@ -37,13 +43,18 @@ size_t encode( wchar const * in, size_t inSize, char * out_ )
return out - (unsigned char *)out_; return out - (unsigned char *)out_;
} }
long decode( char const * in_, size_t inSize, wchar * out_ ) /// Decodes the given UTF-8 into UTF-32. The inSize specifies the number
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
/// function returns -1, otherwise it returns the number of wide characters
/// stored in the 'out' buffer. The result is not 0-terminated.
long decode( char const * in_, size_t inSize, char32_t * out_ )
{ {
unsigned char const * in = (unsigned char const *)in_; unsigned char const * in = (unsigned char const *)in_;
wchar * out = out_; char32_t * out = out_;
while ( inSize-- ) { while ( inSize-- ) {
wchar result; char32_t result;
if ( *in & 0x80 ) { if ( *in & 0x80 ) {
if ( *in & 0x40 ) { if ( *in & 0x40 ) {
@ -61,22 +72,22 @@ long decode( char const * in_, size_t inSize, wchar * out_ )
inSize -= 3; inSize -= 3;
result = ( (wchar)*in++ & 7 ) << 18; result = ( (char32_t)*in++ & 7 ) << 18;
if ( ( *in & 0xC0 ) != 0x80 ) { if ( ( *in & 0xC0 ) != 0x80 ) {
return -1; return -1;
} }
result |= ( (wchar)*in++ & 0x3F ) << 12; result |= ( (char32_t)*in++ & 0x3F ) << 12;
if ( ( *in & 0xC0 ) != 0x80 ) { if ( ( *in & 0xC0 ) != 0x80 ) {
return -1; return -1;
} }
result |= ( (wchar)*in++ & 0x3F ) << 6; result |= ( (char32_t)*in++ & 0x3F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) { if ( ( *in & 0xC0 ) != 0x80 ) {
return -1; return -1;
} }
result |= (wchar)*in++ & 0x3F; result |= (char32_t)*in++ & 0x3F;
} }
else { else {
// Three-byte sequence // Three-byte sequence
@ -87,17 +98,17 @@ long decode( char const * in_, size_t inSize, wchar * out_ )
inSize -= 2; inSize -= 2;
result = ( (wchar)*in++ & 0xF ) << 12; result = ( (char32_t)*in++ & 0xF ) << 12;
if ( ( *in & 0xC0 ) != 0x80 ) { if ( ( *in & 0xC0 ) != 0x80 ) {
return -1; return -1;
} }
result |= ( (wchar)*in++ & 0x3F ) << 6; result |= ( (char32_t)*in++ & 0x3F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) { if ( ( *in & 0xC0 ) != 0x80 ) {
return -1; return -1;
} }
result |= (wchar)*in++ & 0x3F; result |= (char32_t)*in++ & 0x3F;
} }
} }
else { else {
@ -108,12 +119,12 @@ long decode( char const * in_, size_t inSize, wchar * out_ )
--inSize; --inSize;
result = ( (wchar)*in++ & 0x1F ) << 6; result = ( (char32_t)*in++ & 0x1F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) { if ( ( *in & 0xC0 ) != 0x80 ) {
return -1; return -1;
} }
result |= (wchar)*in++ & 0x3F; result |= (char32_t)*in++ & 0x3F;
} }
} }
else { else {
@ -132,7 +143,7 @@ long decode( char const * in_, size_t inSize, wchar * out_ )
return out - out_; return out - out_;
} }
string encode( wstring const & in ) noexcept std::string toUtf8( std::u32string const & in ) noexcept
{ {
if ( in.empty() ) { if ( in.empty() ) {
return {}; return {};
@ -140,16 +151,16 @@ string encode( wstring const & in ) noexcept
std::vector< char > buffer( in.size() * 4 ); std::vector< char > buffer( in.size() * 4 );
return string( &buffer.front(), encode( in.data(), in.size(), &buffer.front() ) ); return { &buffer.front(), encode( in.data(), in.size(), &buffer.front() ) };
} }
wstring decode( string const & in ) std::u32string toUtf32( std::string const & in )
{ {
if ( in.empty() ) { if ( in.empty() ) {
return {}; return {};
} }
std::vector< wchar > buffer( in.size() ); std::vector< char32_t > buffer( in.size() );
long result = decode( in.data(), in.size(), &buffer.front() ); long result = decode( in.data(), in.size(), &buffer.front() );
@ -157,7 +168,7 @@ wstring decode( string const & in )
throw exCantDecode( in ); throw exCantDecode( in );
} }
return wstring( &buffer.front(), result ); return std::u32string( &buffer.front(), result );
} }
bool isspace( int c ) bool isspace( int c )
@ -247,29 +258,29 @@ LineFeed initLineFeed( const Encoding e )
{ {
LineFeed lf{}; LineFeed lf{};
switch ( e ) { switch ( e ) {
case Utf8::Utf32LE: case Utf32LE:
lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 }; lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 };
lf.length = 4; lf.length = 4;
break; break;
case Utf8::Utf32BE: case Utf32BE:
lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A }; lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A };
lf.length = 4; lf.length = 4;
break; break;
case Utf8::Utf16LE: case Utf16LE:
lf.lineFeed = new char[ 2 ]{ 0x0A, 0 }; lf.lineFeed = new char[ 2 ]{ 0x0A, 0 };
lf.length = 2; lf.length = 2;
break; break;
case Utf8::Utf16BE: case Utf16BE:
lf.lineFeed = new char[ 2 ]{ 0, 0x0A }; lf.lineFeed = new char[ 2 ]{ 0, 0x0A };
lf.length = 2; lf.length = 2;
break; break;
case Utf8::Windows1252: case Windows1252:
case Utf8::Windows1251: case Windows1251:
case Utf8::Utf8: case Utf8:
case Utf8::Windows1250: case Windows1250:
default: default:
lf.length = 1; lf.length = 1;
lf.lineFeed = new char[ 1 ]{ 0x0A }; lf.lineFeed = new char[ 1 ]{ 0x0A };
@ -277,4 +288,36 @@ LineFeed initLineFeed( const Encoding e )
return lf; return lf;
} }
} // namespace Utf8 // When convert non-BMP characters to wstring,the ending char maybe \0 .This method remove the tailing \0 from the wstring
// as \0 is sensitive in the index. This method will be only used with index related operations like store/query.
std::u32string removeTrailingZero( std::u32string const & v )
{
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
return std::u32string( v.data(), n );
}
std::u32string removeTrailingZero( QString const & in )
{
QList< unsigned int > v = in.toUcs4();
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
if ( n != v.size() ) {
v.resize( n );
}
return std::u32string( (const char32_t *)v.constData(), v.size() );
}
std::u32string normalize( const std::u32string & str )
{
return QString::fromStdU32String( str ).normalized( QString::NormalizationForm_C ).toStdU32String();
}
} // namespace Text

50
src/common/text.hh Normal file
View file

@ -0,0 +1,50 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include <cstdio>
#include <QByteArray>
#include <string>
#include "ex.hh"
/// Facilities to process Text, focusing on Unicode
namespace Text {
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
// Those are possible encodings for .dsl files
enum Encoding {
Utf16LE,
Utf16BE,
Windows1252,
Windows1251,
Windows1250,
Utf8,
Utf32BE,
Utf32LE,
};
std::string toUtf8( std::u32string const & ) noexcept;
std::u32string toUtf32( std::string const & );
/// Since the standard isspace() is locale-specific, we need something
/// that would never mess up our utf8 input. The stock one worked fine under
/// Linux but was messing up strings under Windows.
bool isspace( int c );
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
char const * getEncodingNameFor( Encoding e );
Encoding getEncodingForName( const QByteArray & name );
struct LineFeed
{
int length;
char * lineFeed;
};
LineFeed initLineFeed( Encoding e );
std::u32string removeTrailingZero( std::u32string const & v );
std::u32string removeTrailingZero( QString const & in );
std::u32string normalize( std::u32string const & );
} // namespace Text

View file

@ -1,68 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include <cstdio>
#include <QByteArray>
#include <string>
#include "ex.hh"
#include "wstring.hh"
/// A simple UTF-8 encoder/decoder. Some dictionary backends only require
/// utf8, so we have this separately, removing the iconv dependency for them.
/// Besides, utf8 is quite ubiquitous now, and its use is spreaded over many
/// places.
namespace Utf8 {
// Those are possible encodings for .dsl files
enum Encoding {
Utf16LE,
Utf16BE,
Windows1252,
Windows1251,
Windows1250,
Utf8, // This is an extension. Detected solely by the UTF8 BOM.
Utf32BE,
Utf32LE,
};
using std::string;
using gd::wstring;
using gd::wchar;
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
/// Encodes the given UCS-4 into UTF-8. The inSize specifies the number
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
/// at least inSize * 4 bytes long. The function returns the number of chars
/// stored in the 'out' buffer. The result is not 0-terminated.
size_t encode( wchar const * in, size_t inSize, char * out );
/// Decodes the given UTF-8 into UCS-32. The inSize specifies the number
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
/// function returns -1, otherwise it returns the number of wide characters
/// stored in the 'out' buffer. The result is not 0-terminated.
long decode( char const * in, size_t inSize, wchar * out );
/// Versions for non time-critical code.
string encode( wstring const & ) noexcept;
wstring decode( string const & );
/// Since the standard isspace() is locale-specific, we need something
/// that would never mess up our utf8 input. The stock one worked fine under
/// Linux but was messing up strings under Windows.
bool isspace( int c );
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
char const * getEncodingNameFor( Encoding e );
Encoding getEncodingForName( const QByteArray & name );
struct LineFeed
{
int length;
char * lineFeed;
};
LineFeed initLineFeed( Encoding e );
} // namespace Utf8

View file

@ -1,17 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include <string>
///
/// Aliases for legacy reasons.
///
/// For new code, just use the standardized std::u32string for UTF-32 strings instead.
///
namespace gd {
using wchar = char32_t;
using wstring = std::u32string;
} // namespace gd

View file

@ -1,38 +0,0 @@
#include "wstring_qt.hh"
#include <QList>
namespace gd {
// When convert non-BMP characters to wstring,the ending char maybe \0 .This method remove the tailing \0 from the wstring
// as \0 is sensitive in the index. This method will be only used with index related operations like store/query.
wstring removeTrailingZero( wstring const & v )
{
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
return wstring( v.data(), n );
}
wstring removeTrailingZero( QString const & in )
{
QList< unsigned int > v = in.toUcs4();
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
if ( n != v.size() ) {
v.resize( n );
}
return wstring( (const wchar *)v.constData(), v.size() );
}
wstring normalize( const wstring & str )
{
return QString::fromStdU32String( str ).normalized( QString::NormalizationForm_C ).toStdU32String();
}
} // namespace gd

View file

@ -1,16 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
/// This file adds conversions between gd::wstring and QString. See wstring.hh
/// for more details on gd::wstring.
#include "wstring.hh"
#include <QString>
namespace gd {
wstring removeTrailingZero( wstring const & v );
wstring removeTrailingZero( QString const & in );
wstring normalize( wstring const & );
} // namespace gd

View file

@ -149,6 +149,7 @@ Preferences::Preferences():
doubleClickTranslates( true ), doubleClickTranslates( true ),
selectWordBySingleClick( false ), selectWordBySingleClick( false ),
autoScrollToTargetArticle( true ), autoScrollToTargetArticle( true ),
targetArticleAtFirst( false ),
escKeyHidesMainWindow( false ), escKeyHidesMainWindow( false ),
alwaysOnTop( false ), alwaysOnTop( false ),
searchInDock( false ), searchInDock( false ),
@ -800,7 +801,7 @@ Class load()
// Upgrading // Upgrading
c.dictServers = makeDefaultDictServers(); c.dictServers = makeDefaultDictServers();
} }
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
QDomNode ves = root.namedItem( "voiceEngines" ); QDomNode ves = root.namedItem( "voiceEngines" );
if ( !ves.isNull() ) { if ( !ves.isNull() ) {
@ -877,6 +878,11 @@ Class load()
( preferences.namedItem( "autoScrollToTargetArticle" ).toElement().text() == "1" ); ( preferences.namedItem( "autoScrollToTargetArticle" ).toElement().text() == "1" );
} }
if ( !preferences.namedItem( "targetArticleAtFirst" ).isNull() ) {
c.preferences.targetArticleAtFirst =
( preferences.namedItem( "targetArticleAtFirst" ).toElement().text() == "1" );
}
if ( !preferences.namedItem( "escKeyHidesMainWindow" ).isNull() ) { if ( !preferences.namedItem( "escKeyHidesMainWindow" ).isNull() ) {
c.preferences.escKeyHidesMainWindow = c.preferences.escKeyHidesMainWindow =
( preferences.namedItem( "escKeyHidesMainWindow" ).toElement().text() == "1" ); ( preferences.namedItem( "escKeyHidesMainWindow" ).toElement().text() == "1" );
@ -1684,7 +1690,7 @@ void save( Class const & c )
p.setAttributeNode( icon ); p.setAttributeNode( icon );
} }
} }
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
{ {
QDomNode ves = dd.createElement( "voiceEngines" ); QDomNode ves = dd.createElement( "voiceEngines" );
root.appendChild( ves ); root.appendChild( ves );
@ -1814,6 +1820,10 @@ void save( Class const & c )
opt.appendChild( dd.createTextNode( c.preferences.autoScrollToTargetArticle ? "1" : "0" ) ); opt.appendChild( dd.createTextNode( c.preferences.autoScrollToTargetArticle ? "1" : "0" ) );
preferences.appendChild( opt ); preferences.appendChild( opt );
opt = dd.createElement( "targetArticleAtFirst" );
opt.appendChild( dd.createTextNode( c.preferences.targetArticleAtFirst ? "1" : "0" ) );
preferences.appendChild( opt );
opt = dd.createElement( "escKeyHidesMainWindow" ); opt = dd.createElement( "escKeyHidesMainWindow" );
opt.appendChild( dd.createTextNode( c.preferences.escKeyHidesMainWindow ? "1" : "0" ) ); opt.appendChild( dd.createTextNode( c.preferences.escKeyHidesMainWindow ? "1" : "0" ) );
preferences.appendChild( opt ); preferences.appendChild( opt );

View file

@ -297,6 +297,7 @@ struct Preferences
bool doubleClickTranslates; bool doubleClickTranslates;
bool selectWordBySingleClick; bool selectWordBySingleClick;
bool autoScrollToTargetArticle; bool autoScrollToTargetArticle;
bool targetArticleAtFirst;
bool escKeyHidesMainWindow; bool escKeyHidesMainWindow;
bool alwaysOnTop; bool alwaysOnTop;
@ -725,7 +726,7 @@ struct Program
using Programs = QList< Program >; using Programs = QList< Program >;
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
struct VoiceEngine struct VoiceEngine
{ {
bool enabled; bool enabled;
@ -818,7 +819,7 @@ struct Class
Lingua lingua; Lingua lingua;
Forvo forvo; Forvo forvo;
Programs programs; Programs programs;
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
VoiceEngines voiceEngines; VoiceEngines voiceEngines;
#endif #endif

View file

@ -4,7 +4,7 @@
#include "aard.hh" #include "aard.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "chunkedstorage.hh" #include "chunkedstorage.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "decompress.hh" #include "decompress.hh"
@ -29,7 +29,6 @@ using std::multimap;
using std::pair; using std::pair;
using std::set; using std::set;
using std::string; using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
@ -216,11 +215,6 @@ public:
~AardDictionary(); ~AardDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -241,8 +235,10 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override; QString const & getDescription() override;
@ -606,8 +602,8 @@ AardDictionary::getSearchResults( QString const & searchString, int searchMode,
class AardArticleRequest: public Dictionary::DataRequest class AardArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
AardDictionary & dict; AardDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -616,8 +612,8 @@ class AardArticleRequest: public Dictionary::DataRequest
public: public:
AardArticleRequest( wstring const & word_, AardArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
AardDictionary & dict_, AardDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -661,13 +657,13 @@ void AardArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonims make it that the articles set< quint32 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -698,12 +694,12 @@ void AardArticleRequest::run()
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, pair< string, string > > & mapToUse = multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -719,7 +715,7 @@ void AardArticleRequest::run()
string result; string result;
multimap< wstring, pair< string, string > >::const_iterator i; multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += "<h3>"; result += "<h3>";
@ -742,9 +738,9 @@ void AardArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > AardDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > AardDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -920,7 +916,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
} }
// Insert new entry // Insert new entry
wstring word = Utf8::decode( string( data.data(), wordSize ) ); std::u32string word = Text::toUtf32( string( data.data(), wordSize ) );
if ( maxHeadwordsToExpand && dictHeader.wordsCount >= maxHeadwordsToExpand ) { if ( maxHeadwordsToExpand && dictHeader.wordsCount >= maxHeadwordsToExpand ) {
indexedWords.addSingleWord( word, articleOffset ); indexedWords.addSingleWord( word, articleOffset );
} }

View file

@ -11,7 +11,7 @@
#include "htmlescape.hh" #include "htmlescape.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "language.hh" #include "language.hh"
#include "utf8.hh" #include "text.hh"
#include "utils.hh" #include "utils.hh"
#include <ctype.h> #include <ctype.h>
#include <list> #include <list>
@ -30,8 +30,6 @@ namespace Bgl {
using std::map; using std::map;
using std::multimap; using std::multimap;
using std::set; using std::set;
using gd::wstring;
using gd::wchar;
using std::list; using std::list;
using std::pair; using std::pair;
using std::string; using std::string;
@ -111,7 +109,7 @@ void trimWs( string & word )
if ( word.size() ) { if ( word.size() ) {
unsigned begin = 0; unsigned begin = 0;
while ( begin < word.size() && Utf8::isspace( word[ begin ] ) ) { while ( begin < word.size() && Text::isspace( word[ begin ] ) ) {
++begin; ++begin;
} }
@ -123,7 +121,7 @@ void trimWs( string & word )
// Doesn't consist of ws entirely, so must end with just isspace() // Doesn't consist of ws entirely, so must end with just isspace()
// condition. // condition.
while ( Utf8::isspace( word[ end - 1 ] ) ) { while ( Text::isspace( word[ end - 1 ] ) ) {
--end; --end;
} }
@ -137,7 +135,7 @@ void trimWs( string & word )
void addEntryToIndex( string & word, void addEntryToIndex( string & word,
uint32_t articleOffset, uint32_t articleOffset,
IndexedWords & indexedWords, IndexedWords & indexedWords,
vector< wchar > & wcharBuffer ) vector< char32_t > & wcharBuffer )
{ {
// Strip any leading or trailing whitespaces // Strip any leading or trailing whitespaces
trimWs( word ); trimWs( word );
@ -159,7 +157,7 @@ void addEntryToIndex( string & word,
} }
// Convert the word from utf8 to wide chars // Convert the word from utf8 to wide chars
indexedWords.addWord( Utf8::decode( word ), articleOffset ); indexedWords.addWord( Text::toUtf32( word ), articleOffset );
} }
class BglDictionary: public BtreeIndexing::BtreeDictionary class BglDictionary: public BtreeIndexing::BtreeDictionary
@ -173,11 +171,6 @@ public:
BglDictionary( string const & id, string const & indexFile, string const & dictionaryFile ); BglDictionary( string const & id, string const & indexFile, string const & dictionaryFile );
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -198,10 +191,12 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override; sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -392,7 +387,7 @@ void BglDictionary::getArticleText( uint32_t articleAddress, QString & headword,
headword = QString::fromUtf8( headwordStr.data(), headwordStr.size() ); headword = QString::fromUtf8( headwordStr.data(), headwordStr.size() );
wstring wstr = Utf8::decode( articleStr ); std::u32string wstr = Text::toUtf32( articleStr );
if ( getLangTo() == LangCoder::code2toInt( "he" ) ) { if ( getLangTo() == LangCoder::code2toInt( "he" ) ) {
for ( char32_t & i : wstr ) { for ( char32_t & i : wstr ) {
@ -441,7 +436,7 @@ void BglDictionary::makeFTSIndex( QAtomicInt & isCancelled )
class BglHeadwordsRequest: public Dictionary::WordSearchRequest class BglHeadwordsRequest: public Dictionary::WordSearchRequest
{ {
wstring str; std::u32string str;
BglDictionary & dict; BglDictionary & dict;
QAtomicInt isCancelled; QAtomicInt isCancelled;
@ -449,7 +444,7 @@ class BglHeadwordsRequest: public Dictionary::WordSearchRequest
public: public:
BglHeadwordsRequest( wstring const & word_, BglDictionary & dict_ ): BglHeadwordsRequest( std::u32string const & word_, BglDictionary & dict_ ):
str( word_ ), str( word_ ),
dict( dict_ ) dict( dict_ )
{ {
@ -481,7 +476,7 @@ void BglHeadwordsRequest::run()
vector< WordArticleLink > chain = dict.findArticles( str ); vector< WordArticleLink > chain = dict.findArticles( str );
wstring caseFolded = Folding::applySimpleCaseOnly( str ); std::u32string caseFolded = Folding::applySimpleCaseOnly( str );
for ( auto & x : chain ) { for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) { if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -493,11 +488,11 @@ void BglHeadwordsRequest::run()
dict.loadArticle( x.articleOffset, headword, displayedHeadword, articleText ); dict.loadArticle( x.articleOffset, headword, displayedHeadword, articleText );
wstring headwordDecoded; std::u32string headwordDecoded;
try { try {
headwordDecoded = Utf8::decode( removePostfix( headword ) ); headwordDecoded = Text::toUtf32( removePostfix( headword ) );
} }
catch ( Utf8::exCantDecode & ) { catch ( Text::exCantDecode & ) {
} }
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) && !headwordDecoded.empty() ) { if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) && !headwordDecoded.empty() ) {
@ -512,7 +507,7 @@ void BglHeadwordsRequest::run()
finish(); finish();
} }
sptr< Dictionary::WordSearchRequest > BglDictionary::findHeadwordsForSynonym( wstring const & word ) sptr< Dictionary::WordSearchRequest > BglDictionary::findHeadwordsForSynonym( std::u32string const & word )
{ {
return synonymSearchEnabled ? std::make_shared< BglHeadwordsRequest >( word, *this ) : return synonymSearchEnabled ? std::make_shared< BglHeadwordsRequest >( word, *this ) :
@ -552,8 +547,8 @@ string postfixToSuperscript( string const & in )
class BglArticleRequest: public Dictionary::DataRequest class BglArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
BglDictionary & dict; BglDictionary & dict;
QAtomicInt isCancelled; QAtomicInt isCancelled;
@ -562,8 +557,8 @@ class BglArticleRequest: public Dictionary::DataRequest
public: public:
BglArticleRequest( wstring const & word_, BglArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
BglDictionary & dict_, BglDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -595,11 +590,11 @@ public:
void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - convert non-unicode to unicode void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - convert non-unicode to unicode
{ {
wstring hebWStr; std::u32string hebWStr;
try { try {
hebWStr = Utf8::decode( hebStr ); hebWStr = Text::toUtf32( hebStr );
} }
catch ( Utf8::exCantDecode & ) { catch ( Text::exCantDecode & ) {
hebStr = "Utf-8 decoding error"; hebStr = "Utf-8 decoding error";
return; return;
} }
@ -613,7 +608,7 @@ void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - conv
i += 1488 - 224; // Convert to Hebrew unicode i += 1488 - 224; // Convert to Hebrew unicode
} }
} }
hebStr = Utf8::encode( hebWStr ); hebStr = Text::toUtf8( hebWStr );
} }
void BglArticleRequest::fixHebArticle( string & hebArticle ) // Hebrew support - remove extra chars at the end void BglArticleRequest::fixHebArticle( string & hebArticle ) // Hebrew support - remove extra chars at the end
@ -649,7 +644,7 @@ void BglArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
@ -658,7 +653,7 @@ void BglArticleRequest::run()
// the bodies to account for this. // the bodies to account for this.
set< QByteArray > articleBodiesIncluded; set< QByteArray > articleBodiesIncluded;
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -686,7 +681,7 @@ void BglArticleRequest::run()
// We do the case-folded and postfix-less comparison here. // We do the case-folded and postfix-less comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( removePostfix( headword ) ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( removePostfix( headword ) );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
@ -709,7 +704,7 @@ void BglArticleRequest::run()
continue; // Already had this body continue; // Already had this body
} }
multimap< wstring, pair< string, string > > & mapToUse = multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( targetHeadword, articleText ) ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( targetHeadword, articleText ) ) );
@ -730,7 +725,7 @@ void BglArticleRequest::run()
string result; string result;
multimap< wstring, pair< string, string > >::const_iterator i; multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner(); string cleaner = Utils::Html::getHtmlCleaner();
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -807,9 +802,9 @@ void BglArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > BglDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > BglDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -1090,7 +1085,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
IndexedWords indexedWords; IndexedWords indexedWords;
// We use this buffer to decode utf8 into it. // We use this buffer to decode utf8 into it.
vector< wchar > wcharBuffer; vector< char32_t > wcharBuffer;
ChunkedStorage::Writer chunks( idx ); ChunkedStorage::Writer chunks( idx );

View file

@ -3,11 +3,10 @@
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include <math.h> #include <math.h>
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include "wstring_qt.hh"
#include "utils.hh" #include "utils.hh"
#include <QRegularExpression> #include <QRegularExpression>
@ -19,8 +18,6 @@
namespace BtreeIndexing { namespace BtreeIndexing {
using gd::wstring;
using gd::wchar;
using std::pair; using std::pair;
enum { enum {
@ -59,14 +56,14 @@ void BtreeIndex::openIndex( IndexInfo const & indexInfo, File::Index & file, QMu
} }
vector< WordArticleLink > vector< WordArticleLink >
BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount ) BtreeIndex::findArticles( std::u32string const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount )
{ {
//First trim ending zero //First trim ending zero
wstring word = gd::removeTrailingZero( search_word ); std::u32string word = Text::removeTrailingZero( search_word );
vector< WordArticleLink > result; vector< WordArticleLink > result;
try { try {
wstring folded = Folding::apply( word ); std::u32string folded = Folding::apply( word );
if ( folded.empty() ) { if ( folded.empty() ) {
folded = Folding::applyWhitespaceOnly( word ); folded = Folding::applyWhitespaceOnly( word );
} }
@ -100,7 +97,7 @@ BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, ui
BtreeWordSearchRequest::BtreeWordSearchRequest( BtreeDictionary & dict_, BtreeWordSearchRequest::BtreeWordSearchRequest( BtreeDictionary & dict_,
wstring const & str_, std::u32string const & str_,
unsigned minLength_, unsigned minLength_,
int maxSuffixVariation_, int maxSuffixVariation_,
bool allowMiddleMatches_, bool allowMiddleMatches_,
@ -137,11 +134,11 @@ void BtreeWordSearchRequest::findMatches()
bool useWildcards = false; bool useWildcards = false;
if ( allowMiddleMatches ) { if ( allowMiddleMatches ) {
useWildcards = ( str.find( '*' ) != wstring::npos || str.find( '?' ) != wstring::npos useWildcards = ( str.find( '*' ) != std::u32string::npos || str.find( '?' ) != std::u32string::npos
|| str.find( '[' ) != wstring::npos || str.find( ']' ) != wstring::npos ); || str.find( '[' ) != std::u32string::npos || str.find( ']' ) != std::u32string::npos );
} }
wstring folded = Folding::apply( str ); std::u32string folded = Folding::apply( str );
int minMatchLength = 0; int minMatchLength = 0;
@ -154,7 +151,7 @@ void BtreeWordSearchRequest::findMatches()
regexp.setPatternOptions( QRegularExpression::CaseInsensitiveOption ); regexp.setPatternOptions( QRegularExpression::CaseInsensitiveOption );
bool bNoLetters = folded.empty(); bool bNoLetters = folded.empty();
wstring foldedWithWildcards; std::u32string foldedWithWildcards;
if ( bNoLetters ) { if ( bNoLetters ) {
foldedWithWildcards = Folding::applyWhitespaceOnly( str ); foldedWithWildcards = Folding::applyWhitespaceOnly( str );
@ -268,9 +265,9 @@ void BtreeWordSearchRequest::findMatches()
vector< WordArticleLink > chain = dict.readChain( chainOffset ); vector< WordArticleLink > chain = dict.readChain( chainOffset );
wstring chainHead = Utf8::decode( chain[ 0 ].word ); std::u32string chainHead = Text::toUtf32( chain[ 0 ].word );
wstring resultFolded = Folding::apply( chainHead ); std::u32string resultFolded = Folding::apply( chainHead );
if ( resultFolded.empty() ) { if ( resultFolded.empty() ) {
resultFolded = Folding::applyWhitespaceOnly( chainHead ); resultFolded = Folding::applyWhitespaceOnly( chainHead );
} }
@ -286,9 +283,9 @@ void BtreeWordSearchRequest::findMatches()
break; break;
} }
if ( useWildcards ) { if ( useWildcards ) {
wstring word = Utf8::decode( x.prefix + x.word ); std::u32string word = Text::toUtf32( x.prefix + x.word );
wstring result = Folding::applyDiacriticsOnly( word ); std::u32string result = Folding::applyDiacriticsOnly( word );
if ( result.size() >= (wstring::size_type)minMatchLength ) { if ( result.size() >= (std::u32string::size_type)minMatchLength ) {
QRegularExpressionMatch match = regexp.match( QString::fromStdU32String( result ) ); QRegularExpressionMatch match = regexp.match( QString::fromStdU32String( result ) );
if ( match.hasMatch() && match.capturedStart() == 0 ) { if ( match.hasMatch() && match.capturedStart() == 0 ) {
addMatch( word ); addMatch( word );
@ -298,10 +295,10 @@ void BtreeWordSearchRequest::findMatches()
else { else {
// Skip middle matches, if requested. If suffix variation is specified, // Skip middle matches, if requested. If suffix variation is specified,
// make sure the string isn't larger than requested. // make sure the string isn't larger than requested.
if ( ( allowMiddleMatches || Folding::apply( Utf8::decode( x.prefix ) ).empty() ) if ( ( allowMiddleMatches || Folding::apply( Text::toUtf32( x.prefix ) ).empty() )
&& ( maxSuffixVariation < 0 && ( maxSuffixVariation < 0
|| (int)resultFolded.size() - initialFoldedSize <= maxSuffixVariation ) ) { || (int)resultFolded.size() - initialFoldedSize <= maxSuffixVariation ) ) {
addMatch( Utf8::decode( x.prefix + x.word ) ); addMatch( Text::toUtf32( x.prefix + x.word ) );
} }
} }
if ( matches.size() >= maxResults ) { if ( matches.size() >= maxResults ) {
@ -393,13 +390,14 @@ BtreeWordSearchRequest::~BtreeWordSearchRequest()
f.waitForFinished(); f.waitForFinished();
} }
sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch( wstring const & str, unsigned long maxResults ) sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch( std::u32string const & str,
unsigned long maxResults )
{ {
return std::make_shared< BtreeWordSearchRequest >( *this, str, 0, -1, true, maxResults ); return std::make_shared< BtreeWordSearchRequest >( *this, str, 0, -1, true, maxResults );
} }
sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch( wstring const & str, sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch( std::u32string const & str,
unsigned minLength, unsigned minLength,
unsigned maxSuffixVariation, unsigned maxSuffixVariation,
unsigned long maxResults ) unsigned long maxResults )
@ -437,8 +435,11 @@ void BtreeIndex::readNode( uint32_t offset, vector< char > & out )
} }
} }
char const * BtreeIndex::findChainOffsetExactOrPrefix( char const * BtreeIndex::findChainOffsetExactOrPrefix( std::u32string const & target,
wstring const & target, bool & exactMatch, vector< char > & extLeaf, uint32_t & nextLeaf, char const *& leafEnd ) bool & exactMatch,
vector< char > & extLeaf,
uint32_t & nextLeaf,
char const *& leafEnd )
{ {
if ( !idxFile ) { if ( !idxFile ) {
throw exIndexWasNotOpened(); throw exIndexWasNotOpened();
@ -449,7 +450,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
// Lookup the index by traversing the index btree // Lookup the index by traversing the index btree
// vector< wchar > wcharBuffer; // vector< wchar > wcharBuffer;
wstring w_word; std::u32string w_word;
exactMatch = false; exactMatch = false;
// Read a node // Read a node
@ -530,7 +531,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
size_t wordSize = strlen( closestString ); size_t wordSize = strlen( closestString );
w_word = Utf8::decode( string( closestString, wordSize ) ); w_word = Text::toUtf32( string( closestString, wordSize ) );
compareResult = target.compare( w_word ); compareResult = target.compare( w_word );
@ -649,9 +650,9 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
size_t wordSize = strlen( ptr ); size_t wordSize = strlen( ptr );
w_word = Utf8::decode( string( ptr, wordSize ) ); w_word = Text::toUtf32( string( ptr, wordSize ) );
wstring foldedWord = Folding::apply( w_word ); std::u32string foldedWord = Folding::apply( w_word );
if ( foldedWord.empty() ) { if ( foldedWord.empty() ) {
foldedWord = Folding::applyWhitespaceOnly( w_word ); foldedWord = Folding::applyWhitespaceOnly( w_word );
} }
@ -750,9 +751,9 @@ vector< WordArticleLink > BtreeIndex::readChain( char const *& ptr, uint32_t max
return result; return result;
} }
void BtreeIndex::antialias( wstring const & str, vector< WordArticleLink > & chain, bool ignoreDiacritics ) void BtreeIndex::antialias( std::u32string const & str, vector< WordArticleLink > & chain, bool ignoreDiacritics )
{ {
wstring caseFolded = Folding::applySimpleCaseOnly( gd::normalize( str ) ); std::u32string caseFolded = Folding::applySimpleCaseOnly( Text::normalize( str ) );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
caseFolded = Folding::applyDiacriticsOnly( caseFolded ); caseFolded = Folding::applyDiacriticsOnly( caseFolded );
} }
@ -764,8 +765,8 @@ void BtreeIndex::antialias( wstring const & str, vector< WordArticleLink > & cha
for ( unsigned x = chain.size(); x--; ) { for ( unsigned x = chain.size(); x--; ) {
// If after applying case folding to each word they wouldn't match, we // If after applying case folding to each word they wouldn't match, we
// drop the entry. // drop the entry.
wstring entry = std::u32string entry =
Folding::applySimpleCaseOnly( gd::normalize( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ) ); Folding::applySimpleCaseOnly( Text::normalize( Text::toUtf32( chain[ x ].prefix + chain[ x ].word ) ) );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
entry = Folding::applyDiacriticsOnly( entry ); entry = Folding::applyDiacriticsOnly( entry );
} }
@ -923,9 +924,9 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex,
return offset; return offset;
} }
void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset, unsigned int maxHeadwordSize ) void IndexedWords::addWord( std::u32string const & index_word, uint32_t articleOffset, unsigned int maxHeadwordSize )
{ {
wstring word = gd::removeTrailingZero( index_word ); std::u32string word = Text::removeTrailingZero( index_word );
string::size_type wordSize = word.size(); string::size_type wordSize = word.size();
// Safeguard us against various bugs here. Don't attempt adding words // Safeguard us against various bugs here. Don't attempt adding words
@ -945,7 +946,7 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
wordSize = word.size(); wordSize = word.size();
} }
wchar const * wordBegin = word.c_str(); char32_t const * wordBegin = word.c_str();
// Skip any leading whitespace // Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) { while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
@ -958,7 +959,7 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
--wordSize; --wordSize;
} }
wchar const * nextChar = wordBegin; char32_t const * nextChar = wordBegin;
vector< char > utfBuffer( wordSize * 4 ); vector< char > utfBuffer( wordSize * 4 );
@ -970,11 +971,11 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
if ( !*nextChar ) // End of string ends everything if ( !*nextChar ) // End of string ends everything
{ {
if ( wordsAdded == 0 ) { if ( wordsAdded == 0 ) {
wstring folded = Folding::applyWhitespaceOnly( wstring( wordBegin, wordSize ) ); std::u32string folded = Folding::applyWhitespaceOnly( std::u32string( wordBegin, wordSize ) );
if ( !folded.empty() ) { if ( !folded.empty() ) {
auto i = insert( { Utf8::encode( folded ), vector< WordArticleLink >() } ).first; auto i = insert( { Text::toUtf8( folded ), vector< WordArticleLink >() } ).first;
string utfWord = Utf8::encode( wstring( wordBegin, wordSize ) ); string utfWord = Text::toUtf8( std::u32string( wordBegin, wordSize ) );
string utfPrefix; string utfPrefix;
i->second.emplace_back( utfWord, articleOffset, utfPrefix ); i->second.emplace_back( utfWord, articleOffset, utfPrefix );
} }
@ -988,15 +989,15 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
} }
// Insert this word // Insert this word
wstring folded = Folding::apply( nextChar ); std::u32string folded = Folding::apply( nextChar );
auto name = Utf8::encode( folded ); auto name = Text::toUtf8( folded );
auto i = insert( { std::move( name ), vector< WordArticleLink >() } ).first; auto i = insert( { std::move( name ), vector< WordArticleLink >() } ).first;
if ( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches if ( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches
{ {
string utfWord = Utf8::encode( wstring( nextChar, wordSize - ( nextChar - wordBegin ) ) ); string utfWord = Text::toUtf8( std::u32string( nextChar, wordSize - ( nextChar - wordBegin ) ) );
string utfPrefix = Utf8::encode( wstring( wordBegin, nextChar - wordBegin ) ); string utfPrefix = Text::toUtf8( std::u32string( wordBegin, nextChar - wordBegin ) );
i->second.emplace_back( std::move( utfWord ), articleOffset, std::move( utfPrefix ) ); i->second.emplace_back( std::move( utfWord ), articleOffset, std::move( utfPrefix ) );
// reduce the vector reallocation. // reduce the vector reallocation.
@ -1020,14 +1021,14 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
} }
} }
void IndexedWords::addSingleWord( wstring const & index_word, uint32_t articleOffset ) void IndexedWords::addSingleWord( std::u32string const & index_word, uint32_t articleOffset )
{ {
wstring const & word = gd::removeTrailingZero( index_word ); std::u32string const & word = Text::removeTrailingZero( index_word );
wstring folded = Folding::apply( word ); std::u32string folded = Folding::apply( word );
if ( folded.empty() ) { if ( folded.empty() ) {
folded = Folding::applyWhitespaceOnly( word ); folded = Folding::applyWhitespaceOnly( word );
} }
operator[]( Utf8::encode( folded ) ).emplace_back( Utf8::encode( word ), articleOffset ); operator[]( Text::toUtf8( folded ) ).emplace_back( Text::toUtf8( word ), articleOffset );
} }
IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file ) IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file )

View file

@ -18,7 +18,6 @@
namespace BtreeIndexing { namespace BtreeIndexing {
using std::string; using std::string;
using gd::wstring;
using std::vector; using std::vector;
using std::map; using std::map;
@ -80,7 +79,8 @@ public:
/// Finds articles that match the given string. A case-insensitive search /// Finds articles that match the given string. A case-insensitive search
/// is performed. /// is performed.
vector< WordArticleLink > findArticles( wstring const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 ); vector< WordArticleLink >
findArticles( std::u32string const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 );
/// Find all unique article links in the index /// Find all unique article links in the index
void findAllArticleLinks( QList< WordArticleLink > & articleLinks ); void findAllArticleLinks( QList< WordArticleLink > & articleLinks );
@ -116,8 +116,11 @@ protected:
/// case, the returned pointer wouldn't belong to 'leaf' at all. To that end, /// case, the returned pointer wouldn't belong to 'leaf' at all. To that end,
/// the leafEnd pointer always holds the pointer to the first byte outside /// the leafEnd pointer always holds the pointer to the first byte outside
/// the node data. /// the node data.
char const * findChainOffsetExactOrPrefix( char const * findChainOffsetExactOrPrefix( std::u32string const & target,
wstring const & target, bool & exactMatch, vector< char > & leaf, uint32_t & nextLeaf, char const *& leafEnd ); bool & exactMatch,
vector< char > & leaf,
uint32_t & nextLeaf,
char const *& leafEnd );
/// Reads a node or leaf at the given offset. Just uncompresses its data /// Reads a node or leaf at the given offset. Just uncompresses its data
/// to the given vector and does nothing more. /// to the given vector and does nothing more.
@ -129,7 +132,7 @@ protected:
/// Drops any aliases which arose due to folding. Only case-folded aliases /// Drops any aliases which arose due to folding. Only case-folded aliases
/// are left. /// are left.
void antialias( wstring const &, vector< WordArticleLink > &, bool ignoreDiactitics ); void antialias( std::u32string const &, vector< WordArticleLink > &, bool ignoreDiactitics );
protected: protected:
@ -161,10 +164,10 @@ public:
/// This function does the search using the btree index. Derivatives usually /// This function does the search using the btree index. Derivatives usually
/// need not to implement this function. /// need not to implement this function.
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long ); virtual sptr< Dictionary::WordSearchRequest > prefixMatch( std::u32string const &, unsigned long );
virtual sptr< Dictionary::WordSearchRequest > virtual sptr< Dictionary::WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ); stemmedMatch( std::u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
virtual bool isLocalDictionary() virtual bool isLocalDictionary()
{ {
@ -210,7 +213,7 @@ class BtreeWordSearchRequest: public Dictionary::WordSearchRequest
{ {
protected: protected:
BtreeDictionary & dict; BtreeDictionary & dict;
wstring str; std::u32string str;
unsigned long maxResults; unsigned long maxResults;
unsigned minLength; unsigned minLength;
int maxSuffixVariation; int maxSuffixVariation;
@ -221,7 +224,7 @@ protected:
public: public:
BtreeWordSearchRequest( BtreeDictionary & dict_, BtreeWordSearchRequest( BtreeDictionary & dict_,
wstring const & str_, std::u32string const & str_,
unsigned minLength_, unsigned minLength_,
int maxSuffixVariation_, int maxSuffixVariation_,
bool allowMiddleMatches_, bool allowMiddleMatches_,
@ -251,11 +254,11 @@ struct IndexedWords: public map< string, vector< WordArticleLink > >
/// Instead of adding to the map directly, use this function. It does folding /// Instead of adding to the map directly, use this function. It does folding
/// itself, and for phrases/sentences it adds additional entries beginning with /// itself, and for phrases/sentences it adds additional entries beginning with
/// each new word. /// each new word.
void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U ); void addWord( std::u32string const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
/// Differs from addWord() in that it only adds a single entry. We use this /// Differs from addWord() in that it only adds a single entry. We use this
/// for zip's file names. /// for zip's file names.
void addSingleWord( wstring const & word, uint32_t articleOffset ); void addSingleWord( std::u32string const & word, uint32_t articleOffset );
}; };
/// Builds the index, as a compressed btree. Returns IndexInfo. /// Builds the index, as a compressed btree. Returns IndexInfo.

View file

@ -4,7 +4,7 @@
#include "dictdfiles.hh" #include "dictdfiles.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "dictzip.hh" #include "dictzip.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
#include "langcoder.hh" #include "langcoder.hh"
@ -29,7 +29,6 @@ using std::multimap;
using std::pair; using std::pair;
using std::set; using std::set;
using std::string; using std::string;
using gd::wstring;
using std::vector; using std::vector;
using std::list; using std::list;
@ -91,11 +90,6 @@ public:
~DictdDictionary(); ~DictdDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -118,8 +112,10 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override; QString const & getDescription() override;
@ -239,9 +235,9 @@ uint32_t decodeBase64( string const & str )
return number; return number;
} }
sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > DictdDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -256,13 +252,13 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, string > mainArticles, alternateArticles; multimap< std::u32string, string > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -382,12 +378,12 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, string > & mapToUse = multimap< std::u32string, string > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( x.word ), articleText ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( x.word ), articleText ) );
@ -401,7 +397,7 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
string result; string result;
multimap< wstring, string >::const_iterator i; multimap< std::u32string, string >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += i->second; result += i->second;
@ -427,7 +423,8 @@ QString const & DictdDictionary::getDescription()
return dictionaryDescription; return dictionaryDescription;
} }
sptr< Dictionary::DataRequest > req = getArticle( U"00databaseinfo", vector< wstring >(), wstring(), false ); sptr< Dictionary::DataRequest > req =
getArticle( U"00databaseinfo", vector< std::u32string >(), std::u32string(), false );
if ( req->dataSize() > 0 ) { if ( req->dataSize() > 0 ) {
dictionaryDescription = QString::fromUtf8( req->getFullData().data(), req->getFullData().size() ); dictionaryDescription = QString::fromUtf8( req->getFullData().data(), req->getFullData().size() );
@ -634,10 +631,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Handle the forth entry, if it exists. From dictfmt man: // Handle the forth entry, if it exists. From dictfmt man:
// When --index-keep-orig option is used fourth column is created // When --index-keep-orig option is used fourth column is created
// (if necessary) in .index file. // (if necessary) in .index file.
indexedWords.addWord( Utf8::decode( string( tab3 + 1, strlen( tab3 + 1 ) ) ), curOffset ); indexedWords.addWord( Text::toUtf32( string( tab3 + 1, strlen( tab3 + 1 ) ) ), curOffset );
++idxHeader.wordCount; ++idxHeader.wordCount;
} }
indexedWords.addWord( Utf8::decode( string( buf, strchr( buf, '\t' ) - buf ) ), curOffset ); indexedWords.addWord( Text::toUtf32( string( buf, strchr( buf, '\t' ) - buf ) ), curOffset );
++idxHeader.wordCount; ++idxHeader.wordCount;
++idxHeader.articleCount; ++idxHeader.articleCount;
@ -662,7 +659,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
eol = articleBody; // No headword itself eol = articleBody; // No headword itself
} }
if ( eol ) { if ( eol ) {
while ( *eol && Utf8::isspace( *eol ) ) { while ( *eol && Text::isspace( *eol ) ) {
++eol; // skip spaces ++eol; // skip spaces
} }

View file

@ -177,7 +177,7 @@ void Class::deferredInit()
//base method. //base method.
} }
sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/, sptr< WordSearchRequest > Class::stemmedMatch( std::u32string const & /*str*/,
unsigned /*minLength*/, unsigned /*minLength*/,
unsigned /*maxSuffixVariation*/, unsigned /*maxSuffixVariation*/,
unsigned long /*maxResults*/ ) unsigned long /*maxResults*/ )
@ -185,12 +185,12 @@ sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/,
return std::make_shared< WordSearchRequestInstant >(); return std::make_shared< WordSearchRequestInstant >();
} }
sptr< WordSearchRequest > Class::findHeadwordsForSynonym( wstring const & ) sptr< WordSearchRequest > Class::findHeadwordsForSynonym( std::u32string const & )
{ {
return std::make_shared< WordSearchRequestInstant >(); return std::make_shared< WordSearchRequestInstant >();
} }
vector< wstring > Class::getAlternateWritings( wstring const & ) noexcept vector< std::u32string > Class::getAlternateWritings( std::u32string const & ) noexcept
{ {
return {}; return {};
} }

View file

@ -19,7 +19,7 @@
#include "langcoder.hh" #include "langcoder.hh"
#include "sptr.hh" #include "sptr.hh"
#include "utils.hh" #include "utils.hh"
#include "wstring.hh" #include "text.hh"
#include <QtGlobal> #include <QtGlobal>
/// Abstract dictionary-related stuff /// Abstract dictionary-related stuff
@ -27,16 +27,8 @@ namespace Dictionary {
using std::vector; using std::vector;
using std::string; using std::string;
using gd::wstring;
using std::map; using std::map;
enum Property {
Author,
Copyright,
Description,
Email
};
DEF_EX( Ex, "Dictionary error", std::exception ) DEF_EX( Ex, "Dictionary error", std::exception )
DEF_EX( exIndexOutOfRange, "The supplied index is out of range", Ex ) DEF_EX( exIndexOutOfRange, "The supplied index is out of range", Ex )
DEF_EX( exSliceOutOfRange, "The requested data slice is out of range", Ex ) DEF_EX( exSliceOutOfRange, "The requested data slice is out of range", Ex )
@ -131,19 +123,19 @@ private:
/// algorithms. Positive values are used by morphology matches. /// algorithms. Positive values are used by morphology matches.
struct WordMatch struct WordMatch
{ {
wstring word; std::u32string word;
int weight; int weight;
WordMatch(): WordMatch():
weight( 0 ) weight( 0 )
{ {
} }
WordMatch( wstring const & word_ ): WordMatch( std::u32string const & word_ ):
word( word_ ), word( word_ ),
weight( 0 ) weight( 0 )
{ {
} }
WordMatch( wstring const & word_, int weight_ ): WordMatch( std::u32string const & word_, int weight_ ):
word( word_ ), word( word_ ),
weight( weight_ ) weight( weight_ )
{ {
@ -380,10 +372,6 @@ public:
metadata_enable_fts = _enable_FTS; metadata_enable_fts = _enable_FTS;
} }
/// Returns all the available properties, like the author's name, copyright,
/// description etc. All strings are in utf8.
virtual map< Property, string > getProperties() noexcept = 0;
/// Returns the features the dictionary possess. See the Feature enum for /// Returns the features the dictionary possess. See the Feature enum for
/// their list. /// their list.
virtual Features getFeatures() const noexcept virtual Features getFeatures() const noexcept
@ -442,7 +430,7 @@ public:
/// prefix results should be added. Not more than maxResults results should /// prefix results should be added. Not more than maxResults results should
/// be stored. The whole operation is supposed to be fast, though some /// be stored. The whole operation is supposed to be fast, though some
/// dictionaries, the network ones particularly, may of course be slow. /// dictionaries, the network ones particularly, may of course be slow.
virtual sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) = 0; virtual sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) = 0;
/// Looks up a given word in the dictionary, aiming to find different forms /// Looks up a given word in the dictionary, aiming to find different forms
/// of the given word by allowing suffix variations. This means allowing words /// of the given word by allowing suffix variations. This means allowing words
@ -453,20 +441,20 @@ public:
/// in the middle of a phrase got matched should be returned. /// in the middle of a phrase got matched should be returned.
/// The default implementation does nothing, returning an empty result. /// The default implementation does nothing, returning an empty result.
virtual sptr< WordSearchRequest > virtual sptr< WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ); stemmedMatch( std::u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
/// Finds known headwords for the given word, that is, the words for which /// Finds known headwords for the given word, that is, the words for which
/// the given word is a synonym. If a dictionary can't perform this operation, /// the given word is a synonym. If a dictionary can't perform this operation,
/// it should leave the default implementation which always returns an empty /// it should leave the default implementation which always returns an empty
/// result. /// result.
virtual sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & ); virtual sptr< WordSearchRequest > findHeadwordsForSynonym( std::u32string const & );
/// For a given word, provides alternate writings of it which are to be looked /// For a given word, provides alternate writings of it which are to be looked
/// up alongside with it. Transliteration dictionaries implement this. The /// up alongside with it. Transliteration dictionaries implement this. The
/// default implementation returns an empty list. Note that this function is /// default implementation returns an empty list. Note that this function is
/// supposed to be very fast and simple, and the results are thus returned /// supposed to be very fast and simple, and the results are thus returned
/// synchronously. /// synchronously.
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept; virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept;
/// Returns a definition for the given word. The definition should /// Returns a definition for the given word. The definition should
/// be an html fragment (without html/head/body tags) in an utf8 encoding. /// be an html fragment (without html/head/body tags) in an utf8 encoding.
@ -475,10 +463,10 @@ public:
/// synonyms for the main word. /// synonyms for the main word.
/// context is a dictionary-specific data, currently only used for the /// context is a dictionary-specific data, currently only used for the
/// 'Websites' feature. /// 'Websites' feature.
virtual sptr< DataRequest > getArticle( wstring const &, virtual sptr< DataRequest > getArticle( std::u32string const &,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const & context = wstring(), std::u32string const & context = std::u32string(),
bool ignoreDiacritics = false ) = 0; bool ignoreDiacritics = false ) = 0;
/// Loads contents of a resource named 'name' into the 'data' vector. This is /// Loads contents of a resource named 'name' into the 'data' vector. This is
/// usually a picture file referenced in the article or something like that. /// usually a picture file referenced in the article or something like that.

View file

@ -2,7 +2,6 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "dictserver.hh" #include "dictserver.hh"
#include "wstring_qt.hh"
#include <QTimer> #include <QTimer>
#include <QUrl> #include <QUrl>
#include <QTcpSocket> #include <QTcpSocket>
@ -303,10 +302,6 @@ public:
disconnectFromServer( socket ); disconnectFromServer( socket );
} }
map< Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
@ -318,9 +313,10 @@ public:
return 0; return 0;
} }
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override; sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override; sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
quint32 getLangFrom() const override quint32 getLangFrom() const override
{ {
@ -391,7 +387,7 @@ class DictServerWordSearchRequest: public Dictionary::WordSearchRequest
{ {
Q_OBJECT Q_OBJECT
QAtomicInt isCancelled; QAtomicInt isCancelled;
wstring word; std::u32string word;
QString errorString; QString errorString;
DictServerDictionary & dict; DictServerDictionary & dict;
@ -406,7 +402,7 @@ class DictServerWordSearchRequest: public Dictionary::WordSearchRequest
public: public:
DictServerWordSearchRequest( wstring word_, DictServerDictionary & dict_ ): DictServerWordSearchRequest( std::u32string word_, DictServerDictionary & dict_ ):
word( std::move( word_ ) ), word( std::move( word_ ) ),
dict( dict_ ), dict( dict_ ),
dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-w" ) ) dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-w" ) )
@ -566,7 +562,7 @@ void DictServer::DictServerWordSearchRequest::addMatchedWord( const QString & st
class DictServerArticleRequest: public Dictionary::DataRequest class DictServerArticleRequest: public Dictionary::DataRequest
{ {
QAtomicInt isCancelled; QAtomicInt isCancelled;
wstring word; std::u32string word;
QString errorString; QString errorString;
DictServerDictionary & dict; DictServerDictionary & dict;
string articleData; string articleData;
@ -582,7 +578,7 @@ class DictServerArticleRequest: public Dictionary::DataRequest
public: public:
DictServerImpl * dictImpl; DictServerImpl * dictImpl;
DictServerArticleRequest( wstring word_, DictServerDictionary & dict_ ): DictServerArticleRequest( std::u32string word_, DictServerDictionary & dict_ ):
word( std::move( word_ ) ), word( std::move( word_ ) ),
dict( dict_ ), dict( dict_ ),
dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-t" ) ) dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-t" ) )
@ -874,7 +870,7 @@ void DictServerArticleRequest::cancel()
finish(); finish();
} }
sptr< WordSearchRequest > DictServerDictionary::prefixMatch( wstring const & word, unsigned long maxResults ) sptr< WordSearchRequest > DictServerDictionary::prefixMatch( std::u32string const & word, unsigned long maxResults )
{ {
(void)maxResults; (void)maxResults;
if ( word.size() > 80 ) { if ( word.size() > 80 ) {
@ -887,8 +883,10 @@ sptr< WordSearchRequest > DictServerDictionary::prefixMatch( wstring const & wor
} }
} }
sptr< DataRequest > sptr< DataRequest > DictServerDictionary::getArticle( std::u32string const & word,
DictServerDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool ) vector< std::u32string > const &,
std::u32string const &,
bool )
{ {
if ( word.size() > 80 ) { if ( word.size() > 80 ) {

View file

@ -5,7 +5,7 @@
#include "dsl_details.hh" #include "dsl_details.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "chunkedstorage.hh" #include "chunkedstorage.hh"
#include "dictzip.hh" #include "dictzip.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
@ -13,7 +13,6 @@
#include "filetype.hh" #include "filetype.hh"
#include "audiolink.hh" #include "audiolink.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "wstring_qt.hh"
#include "indexedzip.hh" #include "indexedzip.hh"
#include "tiff.hh" #include "tiff.hh"
#include "ftshelpers.hh" #include "ftshelpers.hh"
@ -44,11 +43,9 @@ using std::multimap;
using std::pair; using std::pair;
using std::set; using std::set;
using std::string; using std::string;
using gd::wstring;
using gd::wchar;
using std::vector; using std::vector;
using std::list; using std::list;
using Utf8::Encoding; using Text::Encoding;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
@ -100,8 +97,8 @@ struct InsidedCard
{ {
uint32_t offset; uint32_t offset;
uint32_t size; uint32_t size;
QList< wstring > headwords; QList< std::u32string > headwords;
InsidedCard( uint32_t _offset, uint32_t _size, QList< wstring > const & words ): InsidedCard( uint32_t _offset, uint32_t _size, QList< std::u32string > const & words ):
offset( _offset ), offset( _offset ),
size( _size ), size( _size ),
headwords( words ) headwords( words )
@ -144,7 +141,7 @@ class DslDictionary: public BtreeIndexing::BtreeDictionary
int optionalPartNom; int optionalPartNom;
quint8 articleNom; quint8 articleNom;
wstring currentHeadword; std::u32string currentHeadword;
string resourceDir1, resourceDir2; string resourceDir1, resourceDir2;
public: public:
@ -156,11 +153,6 @@ public:
~DslDictionary(); ~DslDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -192,8 +184,10 @@ public:
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -237,15 +231,15 @@ private:
/// Loads the article. Does not process the DSL language. /// Loads the article. Does not process the DSL language.
void loadArticle( uint32_t address, void loadArticle( uint32_t address,
wstring const & requestedHeadwordFolded, std::u32string const & requestedHeadwordFolded,
bool ignoreDiacritics, bool ignoreDiacritics,
wstring & tildeValue, std::u32string & tildeValue,
wstring & displayedHeadword, std::u32string & displayedHeadword,
unsigned & headwordIndex, unsigned & headwordIndex,
wstring & articleText ); std::u32string & articleText );
/// Converts DSL language to an Html. /// Converts DSL language to an Html.
string dslToHtml( wstring const &, wstring const & headword = wstring() ); string dslToHtml( std::u32string const &, std::u32string const & headword = std::u32string() );
// Parts of dslToHtml() // Parts of dslToHtml()
string nodeToHtml( ArticleDom::Node const & ); string nodeToHtml( ArticleDom::Node const & );
@ -457,7 +451,7 @@ void DslDictionary::loadIcon() noexcept
/// so nbsp is not a whitespace character for Dsl compiler. /// so nbsp is not a whitespace character for Dsl compiler.
/// For now we have only space and tab, since those are most likely the only /// For now we have only space and tab, since those are most likely the only
/// ones recognized as spaces by that compiler. /// ones recognized as spaces by that compiler.
bool isDslWs( wchar ch ) bool isDslWs( char32_t ch )
{ {
switch ( ch ) { switch ( ch ) {
case ' ': case ' ':
@ -469,14 +463,14 @@ bool isDslWs( wchar ch )
} }
void DslDictionary::loadArticle( uint32_t address, void DslDictionary::loadArticle( uint32_t address,
wstring const & requestedHeadwordFolded, std::u32string const & requestedHeadwordFolded,
bool ignoreDiacritics, bool ignoreDiacritics,
wstring & tildeValue, std::u32string & tildeValue,
wstring & displayedHeadword, std::u32string & displayedHeadword,
unsigned & headwordIndex, unsigned & headwordIndex,
wstring & articleText ) std::u32string & articleText )
{ {
wstring articleData; std::u32string articleData;
{ {
vector< char > chunk; vector< char > chunk;
@ -512,7 +506,7 @@ void DslDictionary::loadArticle( uint32_t address,
else { else {
try { try {
articleData = articleData =
Iconv::toWstring( Utf8::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize ); Iconv::toWstring( Text::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
free( articleBody ); free( articleBody );
// Strip DSL comments // Strip DSL comments
@ -533,27 +527,27 @@ void DslDictionary::loadArticle( uint32_t address,
// Check is we retrieve insided card // Check is we retrieve insided card
bool insidedCard = isDslWs( articleData.at( 0 ) ); bool insidedCard = isDslWs( articleData.at( 0 ) );
wstring tildeValueWithUnsorted; // This one has unsorted parts left std::u32string tildeValueWithUnsorted; // This one has unsorted parts left
for ( headwordIndex = 0;; ) { for ( headwordIndex = 0;; ) {
size_t begin = pos; size_t begin = pos;
pos = articleData.find_first_of( U"\n\r", begin ); pos = articleData.find_first_of( U"\n\r", begin );
if ( pos == wstring::npos ) { if ( pos == std::u32string::npos ) {
pos = articleData.size(); pos = articleData.size();
} }
if ( !foundDisplayedHeadword ) { if ( !foundDisplayedHeadword ) {
// Process the headword // Process the headword
wstring rawHeadword = wstring( articleData, begin, pos - begin ); std::u32string rawHeadword = std::u32string( articleData, begin, pos - begin );
if ( insidedCard && !rawHeadword.empty() && isDslWs( rawHeadword[ 0 ] ) ) { if ( insidedCard && !rawHeadword.empty() && isDslWs( rawHeadword[ 0 ] ) ) {
// Headword of the insided card // Headword of the insided card
wstring::size_type hpos = rawHeadword.find( L'@' ); std::u32string::size_type hpos = rawHeadword.find( L'@' );
if ( hpos != string::npos ) { if ( hpos != string::npos ) {
wstring head = Folding::trimWhitespace( rawHeadword.substr( hpos + 1 ) ); std::u32string head = Folding::trimWhitespace( rawHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' ); hpos = head.find( L'~' );
while ( hpos != string::npos ) { while ( hpos != string::npos ) {
if ( hpos == 0 || head[ hpos ] != L'\\' ) { if ( hpos == 0 || head[ hpos ] != L'\\' ) {
break; break;
@ -574,7 +568,7 @@ void DslDictionary::loadArticle( uint32_t address,
// We need our tilde expansion value // We need our tilde expansion value
tildeValue = rawHeadword; tildeValue = rawHeadword;
list< wstring > lst; list< std::u32string > lst;
expandOptionalParts( tildeValue, &lst ); expandOptionalParts( tildeValue, &lst );
@ -586,7 +580,7 @@ void DslDictionary::loadArticle( uint32_t address,
processUnsortedParts( tildeValue, false ); processUnsortedParts( tildeValue, false );
} }
wstring str = rawHeadword; std::u32string str = rawHeadword;
if ( hadFirstHeadword ) { if ( hadFirstHeadword ) {
expandTildes( str, tildeValueWithUnsorted ); expandTildes( str, tildeValueWithUnsorted );
@ -596,7 +590,7 @@ void DslDictionary::loadArticle( uint32_t address,
str = Folding::applySimpleCaseOnly( str ); str = Folding::applySimpleCaseOnly( str );
list< wstring > lst; list< std::u32string > lst;
expandOptionalParts( str, &lst ); expandOptionalParts( str, &lst );
// Does one of the results match the requested word? If so, we'd choose // Does one of the results match the requested word? If so, we'd choose
@ -662,15 +656,15 @@ void DslDictionary::loadArticle( uint32_t address,
// Check for begin article text // Check for begin article text
if ( insidedCard ) { if ( insidedCard ) {
// Check for next insided headword // Check for next insided headword
wstring::size_type hpos = articleData.find_first_of( U"\n\r", pos ); std::u32string::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == wstring::npos ) { if ( hpos == std::u32string::npos ) {
hpos = articleData.size(); hpos = articleData.size();
} }
wstring str = wstring( articleData, pos, hpos - pos ); std::u32string str = std::u32string( articleData, pos, hpos - pos );
hpos = str.find( L'@' ); hpos = str.find( L'@' );
if ( hpos == wstring::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) { if ( hpos == std::u32string::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
break; break;
} }
} }
@ -692,18 +686,18 @@ void DslDictionary::loadArticle( uint32_t address,
} }
if ( pos != articleData.size() ) { if ( pos != articleData.size() ) {
articleText = wstring( articleData, pos ); articleText = std::u32string( articleData, pos );
} }
else { else {
articleText.clear(); articleText.clear();
} }
} }
string DslDictionary::dslToHtml( wstring const & str, wstring const & headword ) string DslDictionary::dslToHtml( std::u32string const & str, std::u32string const & headword )
{ {
// Normalize the string // Normalize the string
wstring normalizedStr = gd::normalize( str ); std::u32string normalizedStr = Text::normalize( str );
currentHeadword = headword; currentHeadword = headword;
ArticleDom dom( normalizedStr, getName(), headword ); ArticleDom dom( normalizedStr, getName(), headword );
@ -738,7 +732,7 @@ string DslDictionary::getNodeLink( ArticleDom::Node const & node )
} }
} }
if ( link.empty() ) { if ( link.empty() ) {
link = Html::escape( Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false ) ); link = Html::escape( Filetype::simplifyString( Text::toUtf8( node.renderAsText() ), false ) );
} }
return link; return link;
@ -749,7 +743,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
string result; string result;
if ( !node.isTag ) { if ( !node.isTag ) {
result = Html::escape( Utf8::encode( node.text ) ); result = Html::escape( Text::toUtf8( node.text ) );
// Handle all end-of-line // Handle all end-of-line
@ -789,7 +783,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<span class=\"c_default_color\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"c_default_color\">" + processNodeChildren( node ) + "</span>";
} }
else { else {
result += "<font color=\"" + Html::escape( Utf8::encode( node.tagAttrs ) ) + "\">" + processNodeChildren( node ) result += "<font color=\"" + Html::escape( Text::toUtf8( node.tagAttrs ) ) + "\">" + processNodeChildren( node )
+ "</font>"; + "</font>";
} }
} }
@ -802,7 +796,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<div class=\"dsl_m\">" + processNodeChildren( node ) + "</div>"; result += "<div class=\"dsl_m\">" + processNodeChildren( node ) + "</div>";
} }
else if ( node.tagName.size() == 2 && node.tagName[ 0 ] == L'm' && iswdigit( node.tagName[ 1 ] ) ) { else if ( node.tagName.size() == 2 && node.tagName[ 0 ] == L'm' && iswdigit( node.tagName[ 1 ] ) ) {
result += "<div class=\"dsl_" + Utf8::encode( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>"; result += "<div class=\"dsl_" + Text::toUtf8( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>";
} }
else if ( node.tagName == U"trn" ) { else if ( node.tagName == U"trn" ) {
result += "<span class=\"dsl_trn\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"dsl_trn\">" + processNodeChildren( node ) + "</span>";
@ -814,7 +808,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<span class=\"dsl_com\">" + processNodeChildren( node ) + "</span>"; result += "<span class=\"dsl_com\">" + processNodeChildren( node ) + "</span>";
} }
else if ( node.tagName == U"s" || node.tagName == U"video" ) { else if ( node.tagName == U"s" || node.tagName == U"video" ) {
string filename = Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false ); string filename = Filetype::simplifyString( Text::toUtf8( node.renderAsText() ), false );
string n = resourceDir1 + filename; string n = resourceDir1 + filename;
if ( Filetype::isNameOfSound( filename ) ) { if ( Filetype::isNameOfSound( filename ) ) {
@ -893,7 +887,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
else if ( node.tagName == U"p" ) { else if ( node.tagName == U"p" ) {
result += "<span class=\"dsl_p\""; result += "<span class=\"dsl_p\"";
string val = Utf8::encode( node.renderAsText() ); string val = Text::toUtf8( node.renderAsText() );
// If we have such a key, display a title // If we have such a key, display a title
@ -913,7 +907,8 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
// user could pick up the best suitable option. // user could pick up the best suitable option.
string data = processNodeChildren( node ); string data = processNodeChildren( node );
result += R"(<span class="dsl_stress"><span class="dsl_stress_without_accent">)" + data + "</span>" result += R"(<span class="dsl_stress"><span class="dsl_stress_without_accent">)" + data + "</span>"
+ "<span class=\"dsl_stress_with_accent\">" + data + Utf8::encode( wstring( 1, 0x301 ) ) + "</span></span>"; + "<span class=\"dsl_stress_with_accent\">" + data + Text::toUtf8( std::u32string( 1, 0x301 ) )
+ "</span></span>";
} }
else if ( node.tagName == U"lang" ) { else if ( node.tagName == U"lang" ) {
result += "<span class=\"dsl_lang\""; result += "<span class=\"dsl_lang\"";
@ -949,7 +944,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
url.setScheme( "gdlookup" ); url.setScheme( "gdlookup" );
url.setHost( "localhost" ); url.setHost( "localhost" );
auto nodeStr = Utf8::decode( getNodeLink( node ) ); auto nodeStr = Text::toUtf32( getNodeLink( node ) );
normalizeHeadword( nodeStr ); normalizeHeadword( nodeStr );
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) ); url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) );
@ -973,7 +968,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
url.setScheme( "gdlookup" ); url.setScheme( "gdlookup" );
url.setHost( "localhost" ); url.setHost( "localhost" );
wstring nodeStr = node.renderAsText(); std::u32string nodeStr = node.renderAsText();
normalizeHeadword( nodeStr ); normalizeHeadword( nodeStr );
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) ); url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) );
@ -1125,7 +1120,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
vector< char > chunk; vector< char > chunk;
char * articleProps; char * articleProps;
wstring articleData; std::u32string articleData;
{ {
QMutexLocker _( &idxMutex ); QMutexLocker _( &idxMutex );
@ -1166,7 +1161,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
// Skip headword // Skip headword
size_t pos = 0; size_t pos = 0;
wstring articleHeadword, tildeValue; std::u32string articleHeadword, tildeValue;
// Check if we retrieve insided card // Check if we retrieve insided card
bool insidedCard = isDslWs( articleData.at( 0 ) ); bool insidedCard = isDslWs( articleData.at( 0 ) );
@ -1175,20 +1170,20 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
size_t begin = pos; size_t begin = pos;
pos = articleData.find_first_of( U"\n\r", begin ); pos = articleData.find_first_of( U"\n\r", begin );
if ( pos == wstring::npos ) { if ( pos == std::u32string::npos ) {
pos = articleData.size(); pos = articleData.size();
} }
if ( articleHeadword.empty() ) { if ( articleHeadword.empty() ) {
// Process the headword // Process the headword
articleHeadword = wstring( articleData, begin, pos - begin ); articleHeadword = std::u32string( articleData, begin, pos - begin );
if ( insidedCard && !articleHeadword.empty() && isDslWs( articleHeadword[ 0 ] ) ) { if ( insidedCard && !articleHeadword.empty() && isDslWs( articleHeadword[ 0 ] ) ) {
// Headword of the insided card // Headword of the insided card
wstring::size_type hpos = articleHeadword.find( L'@' ); std::u32string::size_type hpos = articleHeadword.find( L'@' );
if ( hpos != string::npos ) { if ( hpos != string::npos ) {
wstring head = Folding::trimWhitespace( articleHeadword.substr( hpos + 1 ) ); std::u32string head = Folding::trimWhitespace( articleHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' ); hpos = head.find( L'~' );
while ( hpos != string::npos ) { while ( hpos != string::npos ) {
if ( hpos == 0 || head[ hpos ] != L'\\' ) { if ( hpos == 0 || head[ hpos ] != L'\\' ) {
break; break;
@ -1205,7 +1200,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
} }
if ( !articleHeadword.empty() ) { if ( !articleHeadword.empty() ) {
list< wstring > lst; list< std::u32string > lst;
tildeValue = articleHeadword; tildeValue = articleHeadword;
@ -1242,15 +1237,15 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
// Check for begin article text // Check for begin article text
if ( insidedCard ) { if ( insidedCard ) {
// Check for next insided headword // Check for next insided headword
wstring::size_type hpos = articleData.find_first_of( U"\n\r", pos ); std::u32string::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == wstring::npos ) { if ( hpos == std::u32string::npos ) {
hpos = articleData.size(); hpos = articleData.size();
} }
wstring str = wstring( articleData, pos, hpos - pos ); std::u32string str = std::u32string( articleData, pos, hpos - pos );
hpos = str.find( L'@' ); hpos = str.find( L'@' );
if ( hpos == wstring::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) { if ( hpos == std::u32string::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
break; break;
} }
} }
@ -1266,17 +1261,17 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
headword = QString::fromStdU32String( articleHeadword ); headword = QString::fromStdU32String( articleHeadword );
} }
wstring articleText; std::u32string articleText;
if ( pos != articleData.size() ) { if ( pos != articleData.size() ) {
articleText = wstring( articleData, pos ); articleText = std::u32string( articleData, pos );
} }
else { else {
articleText.clear(); articleText.clear();
} }
if ( !tildeValue.empty() ) { if ( !tildeValue.empty() ) {
list< wstring > lst; list< std::u32string > lst;
processUnsortedParts( tildeValue, false ); processUnsortedParts( tildeValue, false );
expandOptionalParts( tildeValue, &lst ); expandOptionalParts( tildeValue, &lst );
@ -1382,8 +1377,8 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
class DslArticleRequest: public Dictionary::DataRequest class DslArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
DslDictionary & dict; DslDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -1392,8 +1387,8 @@ class DslArticleRequest: public Dictionary::DataRequest
public: public:
DslArticleRequest( wstring const & word_, DslArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
DslDictionary & dict_, DslDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -1449,7 +1444,7 @@ void DslArticleRequest::run()
// index here. // index here.
set< pair< uint32_t, unsigned > > articlesIncluded; set< pair< uint32_t, unsigned > > articlesIncluded;
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) { for ( auto & x : chain ) {
// Check if we're cancelled occasionally // Check if we're cancelled occasionally
@ -1460,9 +1455,9 @@ void DslArticleRequest::run()
// Grab that article // Grab that article
wstring tildeValue; std::u32string tildeValue;
wstring displayedHeadword; std::u32string displayedHeadword;
wstring articleBody; std::u32string articleBody;
unsigned headwordIndex; unsigned headwordIndex;
string articleText, articleAfter; string articleText, articleAfter;
@ -1546,9 +1541,9 @@ void DslArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > DslDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > DslDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -1637,7 +1632,7 @@ void DslResourceRequest::run()
if ( dict.resourceZip.isOpen() ) { if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex ); QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) { if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive throw; // Make it fail since we couldn't read the archive
} }
} }
@ -1766,7 +1761,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
} }
// Building the index // Building the index
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) ); initializing.indexingDictionary( Text::toUtf8( scanner.getDictionaryName() ) );
qDebug( "Dsl: Building the index for dictionary: %s", qDebug( "Dsl: Building the index for dictionary: %s",
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() ); QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
@ -1782,12 +1777,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( idxHeader ); idx.write( idxHeader );
string dictionaryName = Utf8::encode( scanner.getDictionaryName() ); string dictionaryName = Text::toUtf8( scanner.getDictionaryName() );
idx.write( (uint32_t)dictionaryName.size() ); idx.write( (uint32_t)dictionaryName.size() );
idx.write( dictionaryName.data(), dictionaryName.size() ); idx.write( dictionaryName.data(), dictionaryName.size() );
string soundDictName = Utf8::encode( scanner.getSoundDictionaryName() ); string soundDictName = Text::toUtf8( scanner.getSoundDictionaryName() );
if ( !soundDictName.empty() ) { if ( !soundDictName.empty() ) {
idxHeader.hasSoundDictionaryName = 1; idxHeader.hasSoundDictionaryName = 1;
idx.write( (uint32_t)soundDictName.size() ); idx.write( (uint32_t)soundDictName.size() );
@ -1808,7 +1803,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
map< string, string > abrv; map< string, string > abrv;
wstring curString; std::u32string curString;
size_t curOffset; size_t curOffset;
for ( ;; ) { for ( ;; ) {
@ -1820,7 +1815,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
continue; continue;
} }
list< wstring > keys; list< std::u32string > keys;
bool eof = false; bool eof = false;
@ -1856,13 +1851,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
} }
// If the string has any dsl markup, we strip it // If the string has any dsl markup, we strip it
string value = Utf8::encode( ArticleDom( curString ).root.renderAsText() ); string value = Text::toUtf8( ArticleDom( curString ).root.renderAsText() );
for ( auto & key : keys ) { for ( auto & key : keys ) {
unescapeDsl( key ); unescapeDsl( key );
normalizeHeadword( key ); normalizeHeadword( key );
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value; abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
} }
} }
@ -1890,7 +1885,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
} }
bool hasString = false; bool hasString = false;
wstring curString; std::u32string curString;
size_t curOffset; size_t curOffset;
uint32_t articleCount = 0, wordCount = 0; uint32_t articleCount = 0, wordCount = 0;
@ -1924,7 +1919,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Ok, got the headword // Ok, got the headword
list< wstring > allEntryWords; list< std::u32string > allEntryWords;
processUnsortedParts( curString, true ); processUnsortedParts( curString, true );
expandOptionalParts( curString, &allEntryWords ); expandOptionalParts( curString, &allEntryWords );
@ -1977,10 +1972,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
wordCount += allEntryWords.size(); wordCount += allEntryWords.size();
int insideInsided = 0; int insideInsided = 0;
wstring headword; std::u32string headword;
QList< InsidedCard > insidedCards; QList< InsidedCard > insidedCards;
uint32_t offset = curOffset; uint32_t offset = curOffset;
QList< wstring > insidedHeadwords; QList< std::u32string > insidedHeadwords;
unsigned linesInsideCard = 0; unsigned linesInsideCard = 0;
int dogLine = 0; int dogLine = 0;
bool wasEmptyLine = false; bool wasEmptyLine = false;
@ -2023,8 +2018,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Find embedded cards // Find embedded cards
wstring::size_type n = curString.find( L'@' ); std::u32string::size_type n = curString.find( L'@' );
if ( n == wstring::npos || curString[ n - 1 ] == L'\\' ) { if ( n == std::u32string::npos || curString[ n - 1 ] == L'\\' ) {
if ( insideInsided ) { if ( insideInsided ) {
linesInsideCard++; linesInsideCard++;
} }

View file

@ -6,7 +6,7 @@
#include "folding.hh" #include "folding.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "ufile.hh" #include "ufile.hh"
#include "utf8.hh" #include "text.hh"
#include <exception> #include <exception>
#include <stdio.h> #include <stdio.h>
@ -17,9 +17,8 @@
namespace Dsl { namespace Dsl {
namespace Details { namespace Details {
using gd::wstring;
using std::list; using std::list;
using Utf8::Encoding; using Text::Encoding;
static QMap< int, string > lang_codes = { static QMap< int, string > lang_codes = {
{ 1, "en" }, { 1033, "en" }, { 2, "ru" }, { 1049, "ru" }, { 1068, "az" }, { 1025, "ar" }, { 1067, "am" }, { 1, "en" }, { 1033, "en" }, { 2, "ru" }, { 1049, "ru" }, { 1068, "az" }, { 1025, "ar" }, { 1067, "am" },
@ -40,7 +39,7 @@ string findCodeForDslId( int id )
return lang_codes[ id ]; return lang_codes[ id ];
} }
bool isAtSignFirst( wstring const & str ) bool isAtSignFirst( std::u32string const & str )
{ {
// Test if '@' is first in string except spaces and dsl tags // Test if '@' is first in string except spaces and dsl tags
QRegularExpression reg( R"([ \t]*(?:\[[^\]]+\][ \t]*)*@)", QRegularExpression::PatternOption::CaseInsensitiveOption ); QRegularExpression reg( R"([ \t]*(?:\[[^\]]+\][ \t]*)*@)", QRegularExpression::PatternOption::CaseInsensitiveOption );
@ -49,13 +48,13 @@ bool isAtSignFirst( wstring const & str )
/////////////// ArticleDom /////////////// ArticleDom
wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const std::u32string ArticleDom::Node::renderAsText( bool stripTrsTag ) const
{ {
if ( !isTag ) { if ( !isTag ) {
return text; return text;
} }
wstring result; std::u32string result;
for ( const auto & i : *this ) { for ( const auto & i : *this ) {
if ( !stripTrsTag || i.tagName != U"!trs" ) { if ( !stripTrsTag || i.tagName != U"!trs" ) {
@ -69,17 +68,17 @@ wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
namespace { namespace {
/// @return true if @p tagName equals "mN" where N is a digit /// @return true if @p tagName equals "mN" where N is a digit
bool is_mN( wstring const & tagName ) bool is_mN( std::u32string const & tagName )
{ {
return tagName.size() == 2 && tagName[ 0 ] == U'm' && iswdigit( tagName[ 1 ] ); return tagName.size() == 2 && tagName[ 0 ] == U'm' && iswdigit( tagName[ 1 ] );
} }
bool isAnyM( wstring const & tagName ) bool isAnyM( std::u32string const & tagName )
{ {
return tagName == U"m" || is_mN( tagName ); return tagName == U"m" || is_mN( tagName );
} }
bool checkM( wstring const & dest, wstring const & src ) bool checkM( std::u32string const & dest, std::u32string const & src )
{ {
return src == U"m" && is_mN( dest ); return src == U"m" && is_mN( dest );
} }
@ -97,8 +96,8 @@ struct MustTagBeClosed
} // unnamed namespace } // unnamed namespace
ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring const & headword_ ): ArticleDom::ArticleDom( std::u32string const & str, string const & dictName, std::u32string const & headword_ ):
root( Node::Tag(), wstring(), wstring() ), root( Node::Tag(), std::u32string(), std::u32string() ),
stringPos( str.c_str() ), stringPos( str.c_str() ),
lineStartPos( str.c_str() ), lineStartPos( str.c_str() ),
transcriptionCount( 0 ), transcriptionCount( 0 ),
@ -126,7 +125,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
} }
else { else {
// Insided card // Insided card
wstring linkTo; std::u32string linkTo;
nextChar(); nextChar();
for ( ;; nextChar() ) { for ( ;; nextChar() ) {
if ( ch == L'\n' ) { if ( ch == L'\n' ) {
@ -142,13 +141,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
linkTo = Folding::trimWhitespace( linkTo ); linkTo = Folding::trimWhitespace( linkTo );
if ( !linkTo.empty() ) { if ( !linkTo.empty() ) {
list< wstring > allLinkEntries; list< std::u32string > allLinkEntries;
processUnsortedParts( linkTo, true ); processUnsortedParts( linkTo, true );
expandOptionalParts( linkTo, &allLinkEntries ); expandOptionalParts( linkTo, &allLinkEntries );
for ( auto entry = allLinkEntries.begin(); entry != allLinkEntries.end(); ) { for ( auto entry = allLinkEntries.begin(); entry != allLinkEntries.end(); ) {
if ( !textNode ) { if ( !textNode ) {
Node text = Node( Node::Text(), wstring() ); Node text = Node( Node::Text(), std::u32string() );
if ( stack.empty() ) { if ( stack.empty() ) {
root.push_back( text ); root.push_back( text );
@ -168,10 +167,10 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
stack.pop_back(); stack.pop_back();
textNode = 0; textNode = 0;
wstring linkText = Folding::trimWhitespace( *entry ); std::u32string linkText = Folding::trimWhitespace( *entry );
ArticleDom nodeDom( linkText, dictName, headword_ ); ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), U"@", wstring() ); Node link( Node::Tag(), U"@", std::u32string() );
for ( auto & n : nodeDom.root ) { for ( auto & n : nodeDom.root ) {
link.push_back( n ); link.push_back( n );
} }
@ -181,13 +180,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
if ( stack.empty() ) { if ( stack.empty() ) {
root.push_back( link ); root.push_back( link );
if ( entry != allLinkEntries.end() ) { // Add line break before next entry if ( entry != allLinkEntries.end() ) { // Add line break before next entry
root.push_back( Node( Node::Tag(), U"br", wstring() ) ); root.push_back( Node( Node::Tag(), U"br", std::u32string() ) );
} }
} }
else { else {
stack.back()->push_back( link ); stack.back()->push_back( link );
if ( entry != allLinkEntries.end() ) { if ( entry != allLinkEntries.end() ) {
stack.back()->push_back( Node( Node::Tag(), U"br", wstring() ) ); stack.back()->push_back( Node( Node::Tag(), U"br", std::u32string() ) );
} }
} }
} }
@ -208,8 +207,8 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
if ( ch == L'[' && !escaped ) { if ( ch == L'[' && !escaped ) {
// Beginning of a tag. // Beginning of a tag.
bool isClosing; bool isClosing;
wstring name; std::u32string name;
wstring attrs; std::u32string attrs;
try { try {
do { do {
@ -330,7 +329,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
nextChar(); nextChar();
} while ( Folding::isWhitespace( ch ) ); } while ( Folding::isWhitespace( ch ) );
wstring linkTo, linkText; std::u32string linkTo, linkText;
for ( ;; nextChar() ) { for ( ;; nextChar() ) {
// Is it the end? // Is it the end?
@ -373,7 +372,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
processUnsortedParts( linkText, true ); processUnsortedParts( linkText, true );
ArticleDom nodeDom( linkText, dictName, headword_ ); ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), U"ref", wstring() ); Node link( Node::Tag(), U"ref", std::u32string() );
for ( auto & n : nodeDom.root ) { for ( auto & n : nodeDom.root ) {
link.push_back( n ); link.push_back( n );
} }
@ -427,7 +426,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
// If there's currently no text node, open one // If there's currently no text node, open one
if ( !textNode ) { if ( !textNode ) {
Node text = Node( Node::Text(), wstring() ); Node text = Node( Node::Text(), std::u32string() );
if ( stack.empty() ) { if ( stack.empty() ) {
root.push_back( text ); root.push_back( text );
@ -691,7 +690,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
} }
} }
void ArticleDom::openTag( wstring const & name, wstring const & attrs, list< Node * > & stack ) void ArticleDom::openTag( std::u32string const & name, std::u32string const & attrs, list< Node * > & stack )
{ {
list< Node > nodesToReopen; list< Node > nodesToReopen;
@ -746,7 +745,7 @@ void ArticleDom::openTag( wstring const & name, wstring const & attrs, list< Nod
} }
} }
void ArticleDom::closeTag( wstring const & name, list< Node * > & stack, bool warn ) void ArticleDom::closeTag( std::u32string const & name, list< Node * > & stack, bool warn )
{ {
// Find the tag which is to be closed // Find the tag which is to be closed
@ -839,13 +838,13 @@ bool ArticleDom::atSignFirstInLine()
return true; return true;
} }
return isAtSignFirst( wstring( lineStartPos ) ); return isAtSignFirst( std::u32string( lineStartPos ) );
} }
/////////////// DslScanner /////////////// DslScanner
DslScanner::DslScanner( string const & fileName ): DslScanner::DslScanner( string const & fileName ):
encoding( Utf8::Utf8 ), encoding( Text::Utf8 ),
readBufferPtr( readBuffer ), readBufferPtr( readBuffer ),
readBufferLeft( 0 ), readBufferLeft( 0 ),
linesRead( 0 ) linesRead( 0 )
@ -876,19 +875,19 @@ DslScanner::DslScanner( string const & fileName ):
guessedEncoding.has_value() ) { guessedEncoding.has_value() ) {
switch ( guessedEncoding.value() ) { switch ( guessedEncoding.value() ) {
case QStringConverter::Utf8: case QStringConverter::Utf8:
encoding = Utf8::Utf8; encoding = Text::Utf8;
break; break;
case QStringConverter::Utf16LE: case QStringConverter::Utf16LE:
encoding = Utf8::Utf16LE; encoding = Text::Utf16LE;
break; break;
case QStringConverter::Utf16BE: case QStringConverter::Utf16BE:
encoding = Utf8::Utf16BE; encoding = Text::Utf16BE;
break; break;
case QStringConverter::Utf32LE: case QStringConverter::Utf32LE:
encoding = Utf8::Utf16LE; encoding = Text::Utf16LE;
break; break;
case QStringConverter::Utf32BE: case QStringConverter::Utf32BE:
encoding = Utf8::Utf32BE; encoding = Text::Utf32BE;
break; break;
default: default:
break; break;
@ -905,10 +904,10 @@ DslScanner::DslScanner( string const & fileName ):
} }
//iconv.reinit( encoding ); //iconv.reinit( encoding );
lineFeed = Utf8::initLineFeed( encoding ); lineFeed = Text::initLineFeed( encoding );
// We now can use our own readNextLine() function // We now can use our own readNextLine() function
wstring str; std::u32string str;
size_t offset; size_t offset;
for ( ;; ) { for ( ;; ) {
@ -946,7 +945,7 @@ DslScanner::DslScanner( string const & fileName ):
size_t beg = str.find_first_of( L'"' ); size_t beg = str.find_first_of( L'"' );
if ( beg == wstring::npos ) { if ( beg == std::u32string::npos ) {
throw exMalformedDslFile( fileName ); throw exMalformedDslFile( fileName );
} }
@ -956,7 +955,7 @@ DslScanner::DslScanner( string const & fileName ):
throw exMalformedDslFile( fileName ); throw exMalformedDslFile( fileName );
} }
wstring arg( str, beg + 1, end - beg - 1 ); std::u32string arg( str, beg + 1, end - beg - 1 );
if ( isName ) { if ( isName ) {
dictionaryName = arg; dictionaryName = arg;
@ -977,13 +976,13 @@ DslScanner::DslScanner( string const & fileName ):
qWarning( "Warning: encoding was specified in a Unicode file, ignoring." ); qWarning( "Warning: encoding was specified in a Unicode file, ignoring." );
} }
else if ( !arg.compare( U"Latin" ) ) { else if ( !arg.compare( U"Latin" ) ) {
encoding = Utf8::Windows1252; encoding = Text::Windows1252;
} }
else if ( !arg.compare( U"Cyrillic" ) ) { else if ( !arg.compare( U"Cyrillic" ) ) {
encoding = Utf8::Windows1251; encoding = Text::Windows1251;
} }
else if ( !arg.compare( U"EasternEuropean" ) ) { else if ( !arg.compare( U"EasternEuropean" ) ) {
encoding = Utf8::Windows1250; encoding = Text::Windows1250;
} }
else { else {
gzclose( f ); gzclose( f );
@ -1009,7 +1008,7 @@ DslScanner::~DslScanner() noexcept
gzclose( f ); gzclose( f );
} }
bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_word ) bool DslScanner::readNextLine( std::u32string & out, size_t & offset, bool only_head_word )
{ {
offset = gztell( f ) - readBufferLeft /*+pos*/; offset = gztell( f ) - readBufferLeft /*+pos*/;
@ -1036,7 +1035,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
return false; return false;
} }
int pos = Utf8::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length ); int pos = Text::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
if ( pos == -1 ) { if ( pos == -1 ) {
return false; return false;
} }
@ -1057,9 +1056,9 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
} }
} }
bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset, bool only_headword ) bool DslScanner::readNextLineWithoutComments( std::u32string & out, size_t & offset, bool only_headword )
{ {
wstring str; std::u32string str;
bool commentToNextLine = false; bool commentToNextLine = false;
size_t currentOffset; size_t currentOffset;
@ -1087,14 +1086,14 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset, bo
/////////////// DslScanner /////////////// DslScanner
void processUnsortedParts( wstring & str, bool strip ) void processUnsortedParts( std::u32string & str, bool strip )
{ {
int refCount = 0; int refCount = 0;
size_t startPos = 0; size_t startPos = 0;
for ( size_t x = 0; x < str.size(); ) { for ( size_t x = 0; x < str.size(); ) {
wchar ch = str[ x ]; char32_t ch = str[ x ];
if ( ch == L'\\' ) { if ( ch == L'\\' ) {
// Escape code // Escape code
@ -1150,18 +1149,18 @@ void processUnsortedParts( wstring & str, bool strip )
} }
} }
void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, bool inside_recurse ) void expandOptionalParts( std::u32string & str, list< std::u32string > * result, size_t x, bool inside_recurse )
{ {
// if str is too long ,it can never be headwords. // if str is too long ,it can never be headwords.
if ( str.size() > 100 ) { if ( str.size() > 100 ) {
return; return;
} }
list< wstring > expanded; list< std::u32string > expanded;
list< wstring > * headwords; list< std::u32string > * headwords;
headwords = inside_recurse ? result : &expanded; headwords = inside_recurse ? result : &expanded;
for ( ; x < str.size(); ) { for ( ; x < str.size(); ) {
wchar ch = str[ x ]; char32_t ch = str[ x ];
if ( ch == L'\\' ) { if ( ch == L'\\' ) {
// Escape code // Escape code
@ -1174,7 +1173,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
int refCount = 1; int refCount = 1;
for ( size_t y = x + 1; y < str.size(); ++y ) { for ( size_t y = x + 1; y < str.size(); ++y ) {
wchar ch = str[ y ]; char32_t ch = str[ y ];
if ( ch == L'\\' ) { if ( ch == L'\\' ) {
// Escape code // Escape code
@ -1190,7 +1189,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
if ( y != x + 1 ) // Only do for non-empty cases if ( y != x + 1 ) // Only do for non-empty cases
{ {
wstring removed( str, 0, x ); std::u32string removed( str, 0, x );
removed.append( str, y + 1, str.size() - y - 1 ); removed.append( str, y + 1, str.size() - y - 1 );
expandOptionalParts( removed, headwords, x, true ); expandOptionalParts( removed, headwords, x, true );
@ -1204,7 +1203,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
if ( refCount && x != str.size() - 1 ) { if ( refCount && x != str.size() - 1 ) {
// Closing paren not found? Chop it. // Closing paren not found? Chop it.
wstring removed( str, 0, x ); std::u32string removed( str, 0, x );
// Limit the amount of results to avoid excessive resource consumption // Limit the amount of results to avoid excessive resource consumption
if ( headwords->size() < 32 ) { if ( headwords->size() < 32 ) {
@ -1242,10 +1241,10 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
} }
} }
static const wstring openBraces( U"{{" ); static const std::u32string openBraces( U"{{" );
static const wstring closeBraces( U"}}" ); static const std::u32string closeBraces( U"}}" );
void stripComments( wstring & str, bool & nextLine ) void stripComments( std::u32string & str, bool & nextLine )
{ {
string::size_type n = 0, n2 = 0; string::size_type n = 0, n2 = 0;
@ -1269,9 +1268,9 @@ void stripComments( wstring & str, bool & nextLine )
} }
} }
void expandTildes( wstring & str, wstring const & tildeReplacement ) void expandTildes( std::u32string & str, std::u32string const & tildeReplacement )
{ {
wstring tildeValue = Folding::trimWhitespace( tildeReplacement ); std::u32string tildeValue = Folding::trimWhitespace( tildeReplacement );
for ( size_t x = 0; x < str.size(); ) { for ( size_t x = 0; x < str.size(); ) {
if ( str[ x ] == L'\\' ) { if ( str[ x ] == L'\\' ) {
x += 2; x += 2;
@ -1294,7 +1293,7 @@ void expandTildes( wstring & str, wstring const & tildeReplacement )
} }
} }
void unescapeDsl( wstring & str ) void unescapeDsl( std::u32string & str )
{ {
for ( size_t x = 0; x < str.size(); ++x ) { for ( size_t x = 0; x < str.size(); ++x ) {
if ( str[ x ] == L'\\' ) { if ( str[ x ] == L'\\' ) {
@ -1303,7 +1302,7 @@ void unescapeDsl( wstring & str )
} }
} }
void normalizeHeadword( wstring & str ) void normalizeHeadword( std::u32string & str )
{ {
for ( size_t x = str.size(); x-- > 1; ) // >1 -- Don't test the first char for ( size_t x = str.size(); x-- > 1; ) // >1 -- Don't test the first char
{ {
@ -1331,7 +1330,7 @@ void normalizeHeadword( wstring & str )
} }
namespace { namespace {
void cutEnding( wstring & where, wstring const & ending ) void cutEnding( std::u32string & where, std::u32string const & ending )
{ {
if ( where.size() > ending.size() && where.compare( where.size() - ending.size(), ending.size(), ending ) == 0 ) { if ( where.size() > ending.size() && where.compare( where.size() - ending.size(), ending.size(), ending ) == 0 ) {
where.erase( where.size() - ending.size() ); where.erase( where.size() - ending.size() );
@ -1339,17 +1338,17 @@ void cutEnding( wstring & where, wstring const & ending )
} }
} // namespace } // namespace
quint32 dslLanguageToId( wstring const & name ) quint32 dslLanguageToId( std::u32string const & name )
{ {
static wstring newSp( U"newspelling" ); static std::u32string newSp( U"newspelling" );
static wstring st( U"standard" ); static std::u32string st( U"standard" );
static wstring ms( U"modernsort" ); static std::u32string ms( U"modernsort" );
static wstring ts( U"traditionalsort" ); static std::u32string ts( U"traditionalsort" );
static wstring prc( U"prc" ); static std::u32string prc( U"prc" );
// Any of those endings are to be removed // Any of those endings are to be removed
wstring nameStripped = Folding::apply( name ); std::u32string nameStripped = Folding::apply( name );
cutEnding( nameStripped, newSp ); cutEnding( nameStripped, newSp );
cutEnding( nameStripped, st ); cutEnding( nameStripped, st );

View file

@ -11,23 +11,21 @@
#include "iconv.hh" #include "iconv.hh"
#include <QtCore5Compat/QTextCodec> #include <QtCore5Compat/QTextCodec>
#include <QByteArray> #include <QByteArray>
#include "utf8.hh" #include "text.hh"
// Implementation details for Dsl, not part of its interface // Implementation details for Dsl, not part of its interface
namespace Dsl { namespace Dsl {
namespace Details { namespace Details {
using std::string; using std::string;
using gd::wstring;
using gd::wchar;
using std::list; using std::list;
using std::vector; using std::vector;
using Utf8::Encoding; using Text::Encoding;
using Utf8::LineFeed; using Text::LineFeed;
string findCodeForDslId( int id ); string findCodeForDslId( int id );
bool isAtSignFirst( wstring const & str ); bool isAtSignFirst( std::u32string const & str );
/// Parses the DSL language, representing it in its structural DOM form. /// Parses the DSL language, representing it in its structural DOM form.
struct ArticleDom struct ArticleDom
@ -37,23 +35,23 @@ struct ArticleDom
bool isTag; // true if it is a tag with subnodes, false if it's a leaf text bool isTag; // true if it is a tag with subnodes, false if it's a leaf text
// data. // data.
// Those are only used if isTag is true // Those are only used if isTag is true
wstring tagName; std::u32string tagName;
wstring tagAttrs; std::u32string tagAttrs;
wstring text; // This is only used if isTag is false std::u32string text; // This is only used if isTag is false
class Text class Text
{}; {};
class Tag class Tag
{}; {};
Node( Tag, wstring const & name, wstring const & attrs ): Node( Tag, std::u32string const & name, std::u32string const & attrs ):
isTag( true ), isTag( true ),
tagName( name ), tagName( name ),
tagAttrs( attrs ) tagAttrs( attrs )
{ {
} }
Node( Text, wstring const & text_ ): Node( Text, std::u32string const & text_ ):
isTag( false ), isTag( false ),
text( text_ ) text( text_ )
{ {
@ -61,30 +59,32 @@ struct ArticleDom
/// Concatenates all childen text nodes recursively to form all text /// Concatenates all childen text nodes recursively to form all text
/// the node contains stripped of any markup. /// the node contains stripped of any markup.
wstring renderAsText( bool stripTrsTag = false ) const; std::u32string renderAsText( bool stripTrsTag = false ) const;
}; };
/// Does the parse at construction. Refer to the 'root' member variable /// Does the parse at construction. Refer to the 'root' member variable
/// afterwards. /// afterwards.
explicit ArticleDom( wstring const &, string const & dictName = string(), wstring const & headword_ = wstring() ); explicit ArticleDom( std::u32string const &,
string const & dictName = string(),
std::u32string const & headword_ = std::u32string() );
/// Root of DOM's tree /// Root of DOM's tree
Node root; Node root;
private: private:
void openTag( wstring const & name, wstring const & attr, list< Node * > & stack ); void openTag( std::u32string const & name, std::u32string const & attr, list< Node * > & stack );
void closeTag( wstring const & name, list< Node * > & stack, bool warn = true ); void closeTag( std::u32string const & name, list< Node * > & stack, bool warn = true );
bool atSignFirstInLine(); bool atSignFirstInLine();
wchar const *stringPos, *lineStartPos; char32_t const *stringPos, *lineStartPos;
class eot: std::exception class eot: std::exception
{}; {};
wchar ch; char32_t ch;
bool escaped; bool escaped;
unsigned transcriptionCount; // >0 = inside a [t] tag unsigned transcriptionCount; // >0 = inside a [t] tag
unsigned mediaCount; // >0 = inside a [s] tag unsigned mediaCount; // >0 = inside a [s] tag
@ -93,7 +93,7 @@ private:
/// Information for diagnostic purposes /// Information for diagnostic purposes
string dictionaryName; string dictionaryName;
wstring headword; std::u32string headword;
}; };
/// Opens the .dsl or .dsl.dz file and allows line-by-line reading. Auto-detects /// Opens the .dsl or .dsl.dz file and allows line-by-line reading. Auto-detects
@ -103,9 +103,9 @@ class DslScanner
gzFile f; gzFile f;
Encoding encoding; Encoding encoding;
QTextCodec * codec; QTextCodec * codec;
wstring dictionaryName; std::u32string dictionaryName;
wstring langFrom, langTo; std::u32string langFrom, langTo;
wstring soundDictionary; std::u32string soundDictionary;
char readBuffer[ 65536 ]; char readBuffer[ 65536 ];
char * readBufferPtr; char * readBufferPtr;
LineFeed lineFeed; LineFeed lineFeed;
@ -132,25 +132,25 @@ public:
} }
/// Returns the dictionary's name, as was read from file's headers. /// Returns the dictionary's name, as was read from file's headers.
wstring const & getDictionaryName() const std::u32string const & getDictionaryName() const
{ {
return dictionaryName; return dictionaryName;
} }
/// Returns the dictionary's source language, as was read from file's headers. /// Returns the dictionary's source language, as was read from file's headers.
wstring const & getLangFrom() const std::u32string const & getLangFrom() const
{ {
return langFrom; return langFrom;
} }
/// Returns the dictionary's target language, as was read from file's headers. /// Returns the dictionary's target language, as was read from file's headers.
wstring const & getLangTo() const std::u32string const & getLangTo() const
{ {
return langTo; return langTo;
} }
/// Returns the preferred external dictionary with sounds, as was read from file's headers. /// Returns the preferred external dictionary with sounds, as was read from file's headers.
wstring const & getSoundDictionaryName() const std::u32string const & getSoundDictionaryName() const
{ {
return soundDictionary; return soundDictionary;
} }
@ -161,10 +161,10 @@ public:
/// If end of file is reached, false is returned. /// If end of file is reached, false is returned.
/// Reading begins from the first line after the headers (ones which start /// Reading begins from the first line after the headers (ones which start
/// with #). /// with #).
bool readNextLine( wstring &, size_t & offset, bool only_head_word = false ); bool readNextLine( std::u32string &, size_t & offset, bool only_head_word = false );
/// Similar readNextLine but strip all DSL comments {{...}} /// Similar readNextLine but strip all DSL comments {{...}}
bool readNextLineWithoutComments( wstring &, size_t & offset, bool only_headword = false ); bool readNextLineWithoutComments( std::u32string &, size_t & offset, bool only_headword = false );
/// Returns the number of lines read so far from the file. /// Returns the number of lines read so far from the file.
unsigned getLinesRead() const unsigned getLinesRead() const
@ -180,32 +180,35 @@ public:
/// This function either removes parts of string enclosed in braces, or leaves /// This function either removes parts of string enclosed in braces, or leaves
/// them intact. The braces themselves are removed always, though. /// them intact. The braces themselves are removed always, though.
void processUnsortedParts( wstring & str, bool strip ); void processUnsortedParts( std::u32string & str, bool strip );
/// Expands optional parts of a headword (ones marked with parentheses), /// Expands optional parts of a headword (ones marked with parentheses),
/// producing all possible combinations where they are present or absent. /// producing all possible combinations where they are present or absent.
void expandOptionalParts( wstring & str, list< wstring > * result, size_t x = 0, bool inside_recurse = false ); void expandOptionalParts( std::u32string & str,
list< std::u32string > * result,
size_t x = 0,
bool inside_recurse = false );
/// Expands all unescaped tildes, inserting tildeReplacement text instead of /// Expands all unescaped tildes, inserting tildeReplacement text instead of
/// them. /// them.
void expandTildes( wstring & str, wstring const & tildeReplacement ); void expandTildes( std::u32string & str, std::u32string const & tildeReplacement );
/// Unescapes any escaped chars. Be sure to handle all their special meanings /// Unescapes any escaped chars. Be sure to handle all their special meanings
/// before unescaping them. /// before unescaping them.
void unescapeDsl( wstring & str ); void unescapeDsl( std::u32string & str );
/// Normalizes the headword. Currently turns any sequences of consecutive spaces /// Normalizes the headword. Currently turns any sequences of consecutive spaces
/// into a single space. /// into a single space.
void normalizeHeadword( wstring & ); void normalizeHeadword( std::u32string & );
/// Strip DSL {{...}} comments /// Strip DSL {{...}} comments
void stripComments( wstring &, bool & ); void stripComments( std::u32string &, bool & );
inline size_t DslScanner::distanceToBytes( size_t x ) const inline size_t DslScanner::distanceToBytes( size_t x ) const
{ {
switch ( encoding ) { switch ( encoding ) {
case Utf8::Utf16LE: case Text::Utf16LE:
case Utf8::Utf16BE: case Text::Utf16BE:
return x * 2; return x * 2;
default: default:
return x; return x;
@ -214,7 +217,7 @@ inline size_t DslScanner::distanceToBytes( size_t x ) const
/// Converts the given language name taken from Dsl header (i.e. getLangFrom(), /// Converts the given language name taken from Dsl header (i.e. getLangFrom(),
/// getLangTo()) to its proper language id. /// getLangTo()) to its proper language id.
quint32 dslLanguageToId( wstring const & name ); quint32 dslLanguageToId( std::u32string const & name );
} // namespace Details } // namespace Details
} // namespace Dsl } // namespace Dsl

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62 /* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include <QDir> #include <QDir>
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
#include "epwing_book.hh" #include "epwing_book.hh"
#include "epwing.hh" #include "epwing.hh"
@ -29,7 +29,7 @@ using std::multimap;
using std::vector; using std::vector;
using std::set; using std::set;
using std::pair; using std::pair;
using gd::wstring; using std::u32string;
namespace { namespace {
@ -87,12 +87,6 @@ public:
~EpwingDictionary(); ~EpwingDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -115,10 +109,10 @@ public:
QString const & getDescription() override; QString const & getDescription() override;
void getHeadwordPos( wstring const & word_, QList< int > & pg, QList< int > & off ); void getHeadwordPos( u32string const & word_, QList< int > & pg, QList< int > & off );
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -140,16 +134,16 @@ public:
&& ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize ); && ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize );
} }
static int japaneseWriting( gd::wchar ch ); static int japaneseWriting( char32_t ch );
static bool isSign( gd::wchar ch ); static bool isSign( char32_t ch );
static bool isJapanesePunctiation( gd::wchar ch ); static bool isJapanesePunctiation( char32_t ch );
sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long ) override; sptr< Dictionary::WordSearchRequest > prefixMatch( u32string const &, unsigned long ) override;
sptr< Dictionary::WordSearchRequest > sptr< Dictionary::WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ) override; stemmedMatch( u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ) override;
protected: protected:
@ -162,7 +156,7 @@ private:
quint32 address, string & articleHeadword, string & articleText, int & articlePage, int & articleOffset ); quint32 address, string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & word ) override; sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( u32string const & word ) override;
void loadArticleNextPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset ); void loadArticleNextPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
void void
@ -455,7 +449,7 @@ void EpwingDictionary::getArticleText( uint32_t articleAddress, QString & headwo
class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest
{ {
wstring str; u32string str;
EpwingDictionary & dict; EpwingDictionary & dict;
QAtomicInt isCancelled; QAtomicInt isCancelled;
@ -463,7 +457,7 @@ class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest
public: public:
EpwingHeadwordsRequest( wstring const & word_, EpwingDictionary & dict_ ): EpwingHeadwordsRequest( u32string const & word_, EpwingDictionary & dict_ ):
str( word_ ), str( word_ ),
dict( dict_ ) dict( dict_ )
{ {
@ -539,7 +533,7 @@ void EpwingHeadwordsRequest::run()
finish(); finish();
} }
sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym( wstring const & word ) sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym( u32string const & word )
{ {
return synonymSearchEnabled ? std::make_shared< EpwingHeadwordsRequest >( word, *this ) : return synonymSearchEnabled ? std::make_shared< EpwingHeadwordsRequest >( word, *this ) :
Class::findHeadwordsForSynonym( word ); Class::findHeadwordsForSynonym( word );
@ -548,8 +542,8 @@ sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym(
class EpwingArticleRequest: public Dictionary::DataRequest class EpwingArticleRequest: public Dictionary::DataRequest
{ {
wstring word; u32string word;
vector< wstring > alts; vector< u32string > alts;
EpwingDictionary & dict; EpwingDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -558,8 +552,8 @@ class EpwingArticleRequest: public Dictionary::DataRequest
public: public:
EpwingArticleRequest( wstring const & word_, EpwingArticleRequest( u32string const & word_,
vector< wstring > const & alts_, vector< u32string > const & alts_,
EpwingDictionary & dict_, EpwingDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -574,10 +568,10 @@ public:
void run(); void run();
void getBuiltInArticle( wstring const & word_, void getBuiltInArticle( u32string const & word_,
QList< int > & pages, QList< int > & pages,
QList< int > & offsets, QList< int > & offsets,
multimap< wstring, pair< string, string > > & mainArticles ); multimap< u32string, pair< string, string > > & mainArticles );
void cancel() override void cancel() override
{ {
@ -607,13 +601,13 @@ void EpwingArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonims make it that the articles set< quint32 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) if ( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
@ -647,11 +641,11 @@ void EpwingArticleRequest::run()
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword ); u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) if ( ignoreDiacritics )
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
multimap< wstring, pair< string, string > > & mapToUse = multimap< u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -676,7 +670,7 @@ void EpwingArticleRequest::run()
string result = "<div class=\"epwing_article\">"; string result = "<div class=\"epwing_article\">";
multimap< wstring, pair< string, string > >::const_iterator i; multimap< u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += "<h3>"; result += "<h3>";
@ -725,10 +719,10 @@ void EpwingArticleRequest::run()
finish(); finish();
} }
void EpwingArticleRequest::getBuiltInArticle( wstring const & word_, void EpwingArticleRequest::getBuiltInArticle( u32string const & word_,
QList< int > & pages, QList< int > & pages,
QList< int > & offsets, QList< int > & offsets,
multimap< wstring, pair< string, string > > & mainArticles ) multimap< u32string, pair< string, string > > & mainArticles )
{ {
try { try {
string headword, articleText; string headword, articleText;
@ -762,7 +756,7 @@ void EpwingArticleRequest::getBuiltInArticle( wstring const & word_,
} }
} }
void EpwingDictionary::getHeadwordPos( wstring const & word_, QList< int > & pg, QList< int > & off ) void EpwingDictionary::getHeadwordPos( u32string const & word_, QList< int > & pg, QList< int > & off )
{ {
try { try {
QMutexLocker _( &eBook.getLibMutex() ); QMutexLocker _( &eBook.getLibMutex() );
@ -773,9 +767,9 @@ void EpwingDictionary::getHeadwordPos( wstring const & word_, QList< int > & pg,
} }
} }
sptr< Dictionary::DataRequest > EpwingDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > EpwingDictionary::getArticle( u32string const & word,
vector< wstring > const & alts, vector< u32string > const & alts,
wstring const &, u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -888,7 +882,7 @@ sptr< Dictionary::DataRequest > EpwingDictionary::getSearchResults( QString cons
ignoreDiacritics ); ignoreDiacritics );
} }
int EpwingDictionary::japaneseWriting( gd::wchar ch ) int EpwingDictionary::japaneseWriting( char32_t ch )
{ {
if ( ( ch >= 0x30A0 && ch <= 0x30FF ) || ( ch >= 0x31F0 && ch <= 0x31FF ) || ( ch >= 0x3200 && ch <= 0x32FF ) if ( ( ch >= 0x30A0 && ch <= 0x30FF ) || ( ch >= 0x31F0 && ch <= 0x31FF ) || ( ch >= 0x3200 && ch <= 0x32FF )
|| ( ch >= 0xFF00 && ch <= 0xFFEF ) || ( ch == 0x1B000 ) ) || ( ch >= 0xFF00 && ch <= 0xFFEF ) || ( ch == 0x1B000 ) )
@ -901,7 +895,7 @@ int EpwingDictionary::japaneseWriting( gd::wchar ch )
return 0; return 0;
} }
bool EpwingDictionary::isSign( gd::wchar ch ) bool EpwingDictionary::isSign( char32_t ch )
{ {
switch ( ch ) { switch ( ch ) {
case 0x002B: // PLUS SIGN case 0x002B: // PLUS SIGN
@ -921,7 +915,7 @@ bool EpwingDictionary::isSign( gd::wchar ch )
} }
} }
bool EpwingDictionary::isJapanesePunctiation( gd::wchar ch ) bool EpwingDictionary::isJapanesePunctiation( char32_t ch )
{ {
return ch >= 0x3000 && ch <= 0x303F; return ch >= 0x3000 && ch <= 0x303F;
} }
@ -935,7 +929,7 @@ class EpwingWordSearchRequest: public BtreeIndexing::BtreeWordSearchRequest
public: public:
EpwingWordSearchRequest( EpwingDictionary & dict_, EpwingWordSearchRequest( EpwingDictionary & dict_,
wstring const & str_, u32string const & str_,
unsigned minLength_, unsigned minLength_,
int maxSuffixVariation_, int maxSuffixVariation_,
bool allowMiddleMatches_, bool allowMiddleMatches_,
@ -982,13 +976,13 @@ void EpwingWordSearchRequest::findMatches()
finish(); finish();
} }
sptr< Dictionary::WordSearchRequest > EpwingDictionary::prefixMatch( wstring const & str, unsigned long maxResults ) sptr< Dictionary::WordSearchRequest > EpwingDictionary::prefixMatch( u32string const & str, unsigned long maxResults )
{ {
return std::make_shared< EpwingWordSearchRequest >( *this, str, 0, -1, true, maxResults ); return std::make_shared< EpwingWordSearchRequest >( *this, str, 0, -1, true, maxResults );
} }
sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( wstring const & str, sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( u32string const & str,
unsigned minLength, unsigned minLength,
unsigned maxSuffixVariation, unsigned maxSuffixVariation,
unsigned long maxResults ) unsigned long maxResults )
@ -1027,20 +1021,20 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
chunks.addToBlock( &head.page, sizeof( head.page ) ); chunks.addToBlock( &head.page, sizeof( head.page ) );
chunks.addToBlock( &head.offset, sizeof( head.offset ) ); chunks.addToBlock( &head.offset, sizeof( head.offset ) );
wstring hw = head.headword.toStdU32String(); u32string hw = head.headword.toStdU32String();
indexedWords.addWord( hw, offset ); indexedWords.addWord( hw, offset );
wordCount++; wordCount++;
articleCount++; articleCount++;
vector< wstring > words; vector< u32string > words;
// Parse combined kanji/katakana/hiragana headwords // Parse combined kanji/katakana/hiragana headwords
int w_prev = 0; int w_prev = 0;
wstring word; u32string word;
for ( wstring::size_type n = 0; n < hw.size(); n++ ) { for ( u32string::size_type n = 0; n < hw.size(); n++ ) {
gd::wchar ch = hw[ n ]; char32_t ch = hw[ n ];
if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isSign( ch ) if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isSign( ch )
|| EpwingDictionary::isJapanesePunctiation( ch ) ) || EpwingDictionary::isJapanesePunctiation( ch ) )
@ -1050,7 +1044,7 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
if ( w > 0 ) { if ( w > 0 ) {
// Store only separated words // Store only separated words
gd::wchar ch_prev = 0; char32_t ch_prev = 0;
if ( n ) if ( n )
ch_prev = hw[ n - 1 ]; ch_prev = hw[ n - 1 ];
bool needStore = ( n == 0 || Folding::isPunct( ch_prev ) || Folding::isWhitespace( ch_prev ) bool needStore = ( n == 0 || Folding::isPunct( ch_prev ) || Folding::isWhitespace( ch_prev )
@ -1058,7 +1052,7 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
word.push_back( ch ); word.push_back( ch );
w_prev = w; w_prev = w;
wstring::size_type i; u32string::size_type i;
for ( i = n + 1; i < hw.size(); i++ ) { for ( i = n + 1; i < hw.size(); i++ ) {
ch = hw[ i ]; ch = hw[ i ];
if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isJapanesePunctiation( ch ) ) if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isJapanesePunctiation( ch ) )

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62 /* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
#include "epwing_book.hh" #include "epwing_book.hh"
@ -10,8 +10,7 @@
#include <QTextDocumentFragment> #include <QTextDocumentFragment>
#include <QHash> #include <QHash>
#include "audiolink.hh" #include "audiolink.hh"
#include "wstring.hh" #include "text.hh"
#include "wstring_qt.hh"
#include "folding.hh" #include "folding.hh"
#include "epwing_charmap.hh" #include "epwing_charmap.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
@ -1135,7 +1134,7 @@ void EpwingBook::fixHeadword( QString & headword )
// return; // return;
//} //}
gd::wstring folded = Folding::applyPunctOnly( fixed.toStdU32String() ); std::u32string folded = Folding::applyPunctOnly( fixed.toStdU32String() );
//fixed = QString::fromStdU32String( folded ); //fixed = QString::fromStdU32String( folded );
//if( isHeadwordCorrect( fixed ) ) //if( isHeadwordCorrect( fixed ) )
@ -1993,4 +1992,4 @@ QMutex EpwingBook::libMutex;
} // namespace Epwing } // namespace Epwing
#endif #endif

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62 /* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
#include "epwing_charmap.hh" #include "epwing_charmap.hh"

View file

@ -2,14 +2,13 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "forvo.hh" #include "forvo.hh"
#include "wstring_qt.hh"
#include <QNetworkAccessManager> #include <QNetworkAccessManager>
#include <QNetworkReply> #include <QNetworkReply>
#include <QtXml> #include <QtXml>
#include <list> #include <list>
#include "audiolink.hh" #include "audiolink.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
#include "utf8.hh" #include "text.hh"
namespace Forvo { namespace Forvo {
@ -38,11 +37,6 @@ public:
} }
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return 0; return 0;
@ -53,7 +47,7 @@ public:
return 0; return 0;
} }
sptr< WordSearchRequest > prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) override sptr< WordSearchRequest > prefixMatch( std::u32string const & /*word*/, unsigned long /*maxResults*/ ) override
{ {
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >(); sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -62,7 +56,8 @@ public:
return sr; return sr;
} }
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override; sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
protected: protected:
@ -94,8 +89,8 @@ class ForvoArticleRequest: public Dictionary::DataRequest
public: public:
ForvoArticleRequest( wstring const & word, ForvoArticleRequest( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
QString const & apiKey_, QString const & apiKey_,
QString const & languageCode_, QString const & languageCode_,
string const & dictionaryId_, string const & dictionaryId_,
@ -105,14 +100,16 @@ public:
private: private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word ); void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
private slots: private slots:
virtual void requestFinished( QNetworkReply * ); virtual void requestFinished( QNetworkReply * );
}; };
sptr< DataRequest > sptr< DataRequest > ForvoDictionary::getArticle( std::u32string const & word,
ForvoDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ) vector< std::u32string > const & alts,
std::u32string const &,
bool )
{ {
if ( word.size() > 80 || apiKey.isEmpty() ) { if ( word.size() > 80 || apiKey.isEmpty() ) {
@ -142,8 +139,8 @@ void ForvoArticleRequest::cancel()
finish(); finish();
} }
ForvoArticleRequest::ForvoArticleRequest( wstring const & str, ForvoArticleRequest::ForvoArticleRequest( std::u32string const & str,
vector< wstring > const & alts, vector< std::u32string > const & alts,
QString const & apiKey_, QString const & apiKey_,
QString const & languageCode_, QString const & languageCode_,
string const & dictionaryId_, string const & dictionaryId_,
@ -161,7 +158,7 @@ ForvoArticleRequest::ForvoArticleRequest( wstring const & str,
} }
} }
void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const & str ) void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, std::u32string const & str )
{ {
qDebug( "Forvo: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() ); qDebug( "Forvo: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() );
@ -182,7 +179,7 @@ void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const &
sptr< QNetworkReply > netReply = std::shared_ptr< QNetworkReply >( mgr.get( QNetworkRequest( reqUrl ) ) ); sptr< QNetworkReply > netReply = std::shared_ptr< QNetworkReply >( mgr.get( QNetworkRequest( reqUrl ) ) );
netReplies.push_back( NetReply( netReply, Utf8::encode( str ) ) ); netReplies.push_back( NetReply( netReply, Text::toUtf8( str ) ) );
} }
void ForvoArticleRequest::requestFinished( QNetworkReply * r ) void ForvoArticleRequest::requestFinished( QNetworkReply * r )

View file

@ -8,8 +8,7 @@
#include "ufile.hh" #include "ufile.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "wstring_qt.hh"
#include "chunkedstorage.hh" #include "chunkedstorage.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "dictzip.hh" #include "dictzip.hh"
@ -39,14 +38,12 @@ using std::set;
using std::multimap; using std::multimap;
using std::pair; using std::pair;
using gd::wstring;
using gd::wchar;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo; using BtreeIndexing::IndexInfo;
using Utf8::Encoding; using Text::Encoding;
using Utf8::LineFeed; using Text::LineFeed;
/////////////// GlsScanner /////////////// GlsScanner
@ -55,9 +52,9 @@ class GlsScanner
gzFile f; gzFile f;
Encoding encoding; Encoding encoding;
QTextCodec * codec; QTextCodec * codec;
wstring dictionaryName; std::u32string dictionaryName;
wstring dictionaryDecription, dictionaryAuthor; std::u32string dictionaryDecription, dictionaryAuthor;
wstring langFrom, langTo; std::u32string langFrom, langTo;
char readBuffer[ 10000 ]; char readBuffer[ 10000 ];
char * readBufferPtr; char * readBufferPtr;
size_t readBufferLeft; size_t readBufferLeft;
@ -82,31 +79,31 @@ public:
} }
/// Returns the dictionary's name, as was read from file's headers. /// Returns the dictionary's name, as was read from file's headers.
wstring const & getDictionaryName() const std::u32string const & getDictionaryName() const
{ {
return dictionaryName; return dictionaryName;
} }
/// Returns the dictionary's author, as was read from file's headers. /// Returns the dictionary's author, as was read from file's headers.
wstring const & getDictionaryAuthor() const std::u32string const & getDictionaryAuthor() const
{ {
return dictionaryAuthor; return dictionaryAuthor;
} }
/// Returns the dictionary's description, as was read from file's headers. /// Returns the dictionary's description, as was read from file's headers.
wstring const & getDictionaryDescription() const std::u32string const & getDictionaryDescription() const
{ {
return dictionaryDecription; return dictionaryDecription;
} }
/// Returns the dictionary's source language, as was read from file's headers. /// Returns the dictionary's source language, as was read from file's headers.
wstring const & getLangFrom() const std::u32string const & getLangFrom() const
{ {
return langFrom; return langFrom;
} }
/// Returns the dictionary's target language, as was read from file's headers. /// Returns the dictionary's target language, as was read from file's headers.
wstring const & getLangTo() const std::u32string const & getLangTo() const
{ {
return langTo; return langTo;
} }
@ -117,7 +114,7 @@ public:
/// If end of file is reached, false is returned. /// If end of file is reached, false is returned.
/// Reading begins from the first line after the headers (ones which end /// Reading begins from the first line after the headers (ones which end
/// by the "### Glossary section:" line). /// by the "### Glossary section:" line).
bool readNextLine( wstring &, size_t & offset ); bool readNextLine( std::u32string &, size_t & offset );
/// Returns the number of lines read so far from the file. /// Returns the number of lines read so far from the file.
unsigned getLinesRead() const unsigned getLinesRead() const
{ {
@ -126,7 +123,7 @@ public:
}; };
GlsScanner::GlsScanner( string const & fileName ): GlsScanner::GlsScanner( string const & fileName ):
encoding( Utf8::Utf8 ), encoding( Text::Utf8 ),
readBufferPtr( readBuffer ), readBufferPtr( readBuffer ),
readBufferLeft( 0 ), readBufferLeft( 0 ),
linesRead( 0 ) linesRead( 0 )
@ -152,10 +149,10 @@ GlsScanner::GlsScanner( string const & fileName ):
// If the file begins with the dedicated Unicode marker, we just consume // If the file begins with the dedicated Unicode marker, we just consume
// it. If, on the other hand, it's not, we return the bytes back // it. If, on the other hand, it's not, we return the bytes back
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) { if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) {
encoding = Utf8::Utf16LE; encoding = Text::Utf16LE;
} }
else if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) { else if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) {
encoding = Utf8::Utf16BE; encoding = Text::Utf16BE;
} }
else if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) { else if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) {
// Looks like Utf8, read one more byte // Looks like Utf8, read one more byte
@ -164,29 +161,29 @@ GlsScanner::GlsScanner( string const & fileName ):
gzclose( f ); gzclose( f );
throw exMalformedGlsFile( fileName ); throw exMalformedGlsFile( fileName );
} }
encoding = Utf8::Utf8; encoding = Text::Utf8;
} }
else { else {
if ( gzrewind( f ) ) { if ( gzrewind( f ) ) {
gzclose( f ); gzclose( f );
throw exCantOpen( fileName ); throw exCantOpen( fileName );
} }
encoding = Utf8::Utf8; encoding = Text::Utf8;
} }
codec = QTextCodec::codecForName( Utf8::getEncodingNameFor( encoding ) ); codec = QTextCodec::codecForName( Text::getEncodingNameFor( encoding ) );
// We now can use our own readNextLine() function // We now can use our own readNextLine() function
lineFeed = Utf8::initLineFeed( encoding ); lineFeed = Text::initLineFeed( encoding );
wstring str; std::u32string str;
wstring * currentField = 0; std::u32string * currentField = 0;
wstring mark = U"###"; std::u32string mark = U"###";
wstring titleMark = U"### Glossary title:"; std::u32string titleMark = U"### Glossary title:";
wstring authorMark = U"### Author:"; std::u32string authorMark = U"### Author:";
wstring descriptionMark = U"### Description:"; std::u32string descriptionMark = U"### Description:";
wstring langFromMark = U"### Source language:"; std::u32string langFromMark = U"### Source language:";
wstring langToMark = U"### Target language:"; std::u32string langToMark = U"### Target language:";
wstring endOfHeaderMark = U"### Glossary section:"; std::u32string endOfHeaderMark = U"### Glossary section:";
size_t offset; size_t offset;
for ( ;; ) { for ( ;; ) {
@ -199,22 +196,22 @@ GlsScanner::GlsScanner( string const & fileName ):
currentField = 0; currentField = 0;
if ( str.compare( 0, titleMark.size(), titleMark ) == 0 ) { if ( str.compare( 0, titleMark.size(), titleMark ) == 0 ) {
dictionaryName = wstring( str, titleMark.size(), str.size() - titleMark.size() ); dictionaryName = std::u32string( str, titleMark.size(), str.size() - titleMark.size() );
currentField = &dictionaryName; currentField = &dictionaryName;
} }
else if ( str.compare( 0, authorMark.size(), authorMark ) == 0 ) { else if ( str.compare( 0, authorMark.size(), authorMark ) == 0 ) {
dictionaryAuthor = wstring( str, authorMark.size(), str.size() - authorMark.size() ); dictionaryAuthor = std::u32string( str, authorMark.size(), str.size() - authorMark.size() );
currentField = &dictionaryAuthor; currentField = &dictionaryAuthor;
} }
else if ( str.compare( 0, descriptionMark.size(), descriptionMark ) == 0 ) { else if ( str.compare( 0, descriptionMark.size(), descriptionMark ) == 0 ) {
dictionaryDecription = wstring( str, descriptionMark.size(), str.size() - descriptionMark.size() ); dictionaryDecription = std::u32string( str, descriptionMark.size(), str.size() - descriptionMark.size() );
currentField = &dictionaryDecription; currentField = &dictionaryDecription;
} }
else if ( str.compare( 0, langFromMark.size(), langFromMark ) == 0 ) { else if ( str.compare( 0, langFromMark.size(), langFromMark ) == 0 ) {
langFrom = wstring( str, langFromMark.size(), str.size() - langFromMark.size() ); langFrom = std::u32string( str, langFromMark.size(), str.size() - langFromMark.size() );
} }
else if ( str.compare( 0, langToMark.size(), langToMark ) == 0 ) { else if ( str.compare( 0, langToMark.size(), langToMark ) == 0 ) {
langTo = wstring( str, langToMark.size(), str.size() - langToMark.size() ); langTo = std::u32string( str, langToMark.size(), str.size() - langToMark.size() );
} }
else if ( str.compare( 0, endOfHeaderMark.size(), endOfHeaderMark ) == 0 ) { else if ( str.compare( 0, endOfHeaderMark.size(), endOfHeaderMark ) == 0 ) {
break; break;
@ -229,7 +226,7 @@ GlsScanner::GlsScanner( string const & fileName ):
} }
} }
bool GlsScanner::readNextLine( wstring & out, size_t & offset ) bool GlsScanner::readNextLine( std::u32string & out, size_t & offset )
{ {
offset = (size_t)( gztell( f ) - readBufferLeft ); offset = (size_t)( gztell( f ) - readBufferLeft );
@ -256,7 +253,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset )
return false; return false;
} }
int pos = Utf8::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length ); int pos = Text::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
if ( pos == -1 ) { if ( pos == -1 ) {
return false; return false;
} }
@ -349,11 +346,6 @@ public:
~GlsDictionary(); ~GlsDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -374,10 +366,12 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override; sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -508,11 +502,11 @@ QString const & GlsDictionary::getDescription()
try { try {
GlsScanner scanner( getDictionaryFilenames()[ 0 ] ); GlsScanner scanner( getDictionaryFilenames()[ 0 ] );
string str = Utf8::encode( scanner.getDictionaryAuthor() ); string str = Text::toUtf8( scanner.getDictionaryAuthor() );
if ( !str.empty() ) { if ( !str.empty() ) {
dictionaryDescription = QObject::tr( "Author: %1%2" ).arg( QString::fromUtf8( str.c_str() ) ).arg( "\n\n" ); dictionaryDescription = QObject::tr( "Author: %1%2" ).arg( QString::fromUtf8( str.c_str() ) ).arg( "\n\n" );
} }
str = Utf8::encode( scanner.getDictionaryDescription() ); str = Text::toUtf8( scanner.getDictionaryDescription() );
if ( !str.empty() ) { if ( !str.empty() ) {
QString desc = QString::fromUtf8( str.c_str() ); QString desc = QString::fromUtf8( str.c_str() );
desc.replace( "\t", "<br/>" ); desc.replace( "\t", "<br/>" );
@ -597,7 +591,7 @@ void GlsDictionary::loadArticleText( uint32_t address, vector< string > & headwo
} }
else { else {
string articleData = string articleData =
Iconv::toUtf8( Utf8::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize ); Iconv::toUtf8( Text::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
string::size_type start_pos = 0, end_pos = 0; string::size_type start_pos = 0, end_pos = 0;
for ( ;; ) { for ( ;; ) {
@ -626,7 +620,7 @@ void GlsDictionary::loadArticleText( uint32_t address, vector< string > & headwo
end_pos = 0; end_pos = 0;
for ( ;; ) { for ( ;; ) {
end_pos = headword.find( '|', start_pos ); end_pos = headword.find( '|', start_pos );
if ( end_pos == wstring::npos ) { if ( end_pos == std::u32string::npos ) {
string hw = headword.substr( start_pos ); string hw = headword.substr( start_pos );
if ( !hw.empty() ) { if ( !hw.empty() ) {
headwords.push_back( hw ); headwords.push_back( hw );
@ -809,7 +803,7 @@ void GlsDictionary::getArticleText( uint32_t articleAddress, QString & headword,
class GlsHeadwordsRequest: public Dictionary::WordSearchRequest class GlsHeadwordsRequest: public Dictionary::WordSearchRequest
{ {
wstring word; std::u32string word;
GlsDictionary & dict; GlsDictionary & dict;
QAtomicInt isCancelled; QAtomicInt isCancelled;
@ -817,7 +811,7 @@ class GlsHeadwordsRequest: public Dictionary::WordSearchRequest
public: public:
GlsHeadwordsRequest( wstring const & word_, GlsDictionary & dict_ ): GlsHeadwordsRequest( std::u32string const & word_, GlsDictionary & dict_ ):
word( word_ ), word( word_ ),
dict( dict_ ) dict( dict_ )
{ {
@ -850,7 +844,7 @@ void GlsHeadwordsRequest::run()
try { try {
vector< WordArticleLink > chain = dict.findArticles( word ); vector< WordArticleLink > chain = dict.findArticles( word );
wstring caseFolded = Folding::applySimpleCaseOnly( word ); std::u32string caseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) { for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) { if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -863,7 +857,7 @@ void GlsHeadwordsRequest::run()
dict.loadArticleText( x.articleOffset, headwords, articleText ); dict.loadArticleText( x.articleOffset, headwords, articleText );
wstring headwordDecoded = Utf8::decode( headwords.front() ); std::u32string headwordDecoded = Text::toUtf32( headwords.front() );
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) { if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) {
// The headword seems to differ from the input word, which makes the // The headword seems to differ from the input word, which makes the
@ -881,7 +875,7 @@ void GlsHeadwordsRequest::run()
finish(); finish();
} }
sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( wstring const & word ) sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( std::u32string const & word )
{ {
return synonymSearchEnabled ? std::make_shared< GlsHeadwordsRequest >( word, *this ) : return synonymSearchEnabled ? std::make_shared< GlsHeadwordsRequest >( word, *this ) :
@ -894,8 +888,8 @@ sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( ws
class GlsArticleRequest: public Dictionary::DataRequest class GlsArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
GlsDictionary & dict; GlsDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -904,8 +898,8 @@ class GlsArticleRequest: public Dictionary::DataRequest
public: public:
GlsArticleRequest( wstring const & word_, GlsArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
GlsDictionary & dict_, GlsDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -949,13 +943,13 @@ void GlsArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -981,16 +975,16 @@ void GlsArticleRequest::run()
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( Utf8::decode( headword ) ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( Text::toUtf32( headword ) );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, pair< string, string > > & mapToUse = multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( mapToUse.insert(
pair( Folding::applySimpleCaseOnly( Utf8::decode( headword ) ), pair( headword, articleText ) ) ); pair( Folding::applySimpleCaseOnly( Text::toUtf32( headword ) ), pair( headword, articleText ) ) );
articlesIncluded.insert( x.articleOffset ); articlesIncluded.insert( x.articleOffset );
} }
@ -1003,7 +997,7 @@ void GlsArticleRequest::run()
string result; string result;
multimap< wstring, pair< string, string > >::const_iterator i; multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += i->second.second; result += i->second.second;
@ -1024,9 +1018,9 @@ void GlsArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > GlsDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > GlsDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -1102,7 +1096,7 @@ void GlsResourceRequest::run()
if ( dict.resourceZip.isOpen() ) { if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex ); QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) { if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive throw; // Make it fail since we couldn't read the archive
} }
} }
@ -1244,7 +1238,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// which the incident happened. We need alive scanner for that. // which the incident happened. We need alive scanner for that.
// Building the index // Building the index
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) ); initializing.indexingDictionary( Text::toUtf8( scanner.getDictionaryName() ) );
qDebug( "Gls: Building the index for dictionary: %s", qDebug( "Gls: Building the index for dictionary: %s",
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() ); QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
@ -1260,7 +1254,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( idxHeader ); idx.write( idxHeader );
string dictionaryName = Utf8::encode( scanner.getDictionaryName() ); string dictionaryName = Text::toUtf8( scanner.getDictionaryName() );
idx.write( (uint32_t)dictionaryName.size() ); idx.write( (uint32_t)dictionaryName.size() );
idx.write( dictionaryName.data(), dictionaryName.size() ); idx.write( dictionaryName.data(), dictionaryName.size() );
@ -1271,7 +1265,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
ChunkedStorage::Writer chunks( idx ); ChunkedStorage::Writer chunks( idx );
wstring curString; std::u32string curString;
size_t curOffset; size_t curOffset;
uint32_t articleCount = 0, wordCount = 0; uint32_t articleCount = 0, wordCount = 0;
@ -1291,12 +1285,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Parse headwords // Parse headwords
list< wstring > allEntryWords; list< std::u32string > allEntryWords;
wstring::size_type start_pos = 0, end_pos = 0; std::u32string::size_type start_pos = 0, end_pos = 0;
for ( ;; ) { for ( ;; ) {
end_pos = curString.find( '|', start_pos ); end_pos = curString.find( '|', start_pos );
if ( end_pos == wstring::npos ) { if ( end_pos == std::u32string::npos ) {
wstring headword = curString.substr( start_pos ); std::u32string headword = curString.substr( start_pos );
if ( !headword.empty() ) { if ( !headword.empty() ) {
allEntryWords.push_back( headword ); allEntryWords.push_back( headword );
} }

View file

@ -2,7 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "hunspell.hh" #include "hunspell.hh"
#include "utf8.hh" #include "text.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
#include "iconv.hh" #include "iconv.hh"
#include "folding.hh" #include "folding.hh"
@ -15,18 +15,12 @@
#include <set> #include <set>
#include "utils.hh" #include "utils.hh"
#include <QtConcurrentRun> #include <QtConcurrentRun>
#include <hunspell/hunspell.hxx>
#ifndef INCLUDE_LIBRARY_PATH
#include <hunspell.hxx>
#else
#include <hunspell/hunspell.hxx>
#endif
namespace HunspellMorpho { namespace HunspellMorpho {
using namespace Dictionary; using namespace Dictionary;
using gd::wchar;
namespace { namespace {
@ -55,12 +49,6 @@ public:
dictionaryName = name_; dictionaryName = name_;
} }
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return 0; return 0;
@ -71,18 +59,19 @@ public:
return 0; return 0;
} }
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override; sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override; sptr< WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override; sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
bool isLocalDictionary() override bool isLocalDictionary() override
{ {
return true; return true;
} }
vector< wstring > getAlternateWritings( const wstring & word ) noexcept override; vector< std::u32string > getAlternateWritings( const std::u32string & word ) noexcept override;
protected: protected:
@ -105,25 +94,25 @@ private:
/// Encodes the given string to be passed to the hunspell object. May throw /// Encodes the given string to be passed to the hunspell object. May throw
/// Iconv::Ex /// Iconv::Ex
string encodeToHunspell( Hunspell &, wstring const & ); string encodeToHunspell( Hunspell &, std::u32string const & );
/// Decodes the given string returned by the hunspell object. May throw /// Decodes the given string returned by the hunspell object. May throw
/// Iconv::Ex /// Iconv::Ex
wstring decodeFromHunspell( Hunspell &, char const * ); std::u32string decodeFromHunspell( Hunspell &, char const * );
/// Generates suggestions via hunspell /// Generates suggestions via hunspell
QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hunspell ); QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex, Hunspell & hunspell );
/// Generates suggestions for compound expression /// Generates suggestions for compound expression
void getSuggestionsForExpression( wstring const & expression, void getSuggestionsForExpression( std::u32string const & expression,
vector< wstring > & suggestions, vector< std::u32string > & suggestions,
QMutex & hunspellMutex, QMutex & hunspellMutex,
Hunspell & hunspell ); Hunspell & hunspell );
/// Returns true if the string contains whitespace, false otherwise /// Returns true if the string contains whitespace, false otherwise
bool containsWhitespace( wstring const & str ) bool containsWhitespace( std::u32string const & str )
{ {
wchar const * next = str.c_str(); char32_t const * next = str.c_str();
for ( ; *next; ++next ) { for ( ; *next; ++next ) {
if ( Folding::isWhitespace( *next ) ) { if ( Folding::isWhitespace( *next ) ) {
@ -153,9 +142,9 @@ void HunspellDictionary::loadIcon() noexcept
dictionaryIconLoaded = true; dictionaryIconLoaded = true;
} }
vector< wstring > HunspellDictionary::getAlternateWritings( wstring const & word ) noexcept vector< std::u32string > HunspellDictionary::getAlternateWritings( std::u32string const & word ) noexcept
{ {
vector< wstring > results; vector< std::u32string > results;
if ( containsWhitespace( word ) ) { if ( containsWhitespace( word ) ) {
getSuggestionsForExpression( word, results, getHunspellMutex(), hunspell ); getSuggestionsForExpression( word, results, getHunspellMutex(), hunspell );
@ -171,14 +160,14 @@ class HunspellArticleRequest: public Dictionary::DataRequest
QMutex & hunspellMutex; QMutex & hunspellMutex;
Hunspell & hunspell; Hunspell & hunspell;
wstring word; std::u32string word;
QAtomicInt isCancelled; QAtomicInt isCancelled;
QFuture< void > f; QFuture< void > f;
public: public:
HunspellArticleRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ): HunspellArticleRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ), hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ), hunspell( hunspell_ ),
word( word_ ) word( word_ )
@ -212,7 +201,7 @@ void HunspellArticleRequest::run()
vector< string > suggestions; vector< string > suggestions;
try { try {
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word ); std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( containsWhitespace( trimmedWord ) ) { if ( containsWhitespace( trimmedWord ) ) {
// For now we don't analyze whitespace-containing phrases // For now we don't analyze whitespace-containing phrases
@ -237,10 +226,10 @@ void HunspellArticleRequest::run()
string result = "<div class=\"gdspellsuggestion\">" string result = "<div class=\"gdspellsuggestion\">"
+ Html::escape( QCoreApplication::translate( "Hunspell", "Spelling suggestions: " ).toUtf8().data() ); + Html::escape( QCoreApplication::translate( "Hunspell", "Spelling suggestions: " ).toUtf8().data() );
wstring lowercasedWord = Folding::applySimpleCaseOnly( word ); std::u32string lowercasedWord = Folding::applySimpleCaseOnly( word );
for ( vector< string >::size_type x = 0; x < suggestions.size(); ++x ) { for ( vector< string >::size_type x = 0; x < suggestions.size(); ++x ) {
wstring suggestion = decodeFromHunspell( hunspell, suggestions[ x ].c_str() ); std::u32string suggestion = decodeFromHunspell( hunspell, suggestions[ x ].c_str() );
if ( Folding::applySimpleCaseOnly( suggestion ) == lowercasedWord ) { if ( Folding::applySimpleCaseOnly( suggestion ) == lowercasedWord ) {
// If among suggestions we see the same word just with the different // If among suggestions we see the same word just with the different
@ -251,7 +240,7 @@ void HunspellArticleRequest::run()
return; return;
} }
string suggestionUtf8 = Utf8::encode( suggestion ); string suggestionUtf8 = Text::toUtf8( suggestion );
result += "<a href=\"bword:"; result += "<a href=\"bword:";
result += Html::escape( suggestionUtf8 ) + "\">"; result += Html::escape( suggestionUtf8 ) + "\">";
@ -279,8 +268,10 @@ void HunspellArticleRequest::run()
finish(); finish();
} }
sptr< DataRequest > sptr< DataRequest > HunspellDictionary::getArticle( std::u32string const & word,
HunspellDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool ) vector< std::u32string > const &,
std::u32string const &,
bool )
{ {
return std::make_shared< HunspellArticleRequest >( word, getHunspellMutex(), hunspell ); return std::make_shared< HunspellArticleRequest >( word, getHunspellMutex(), hunspell );
@ -293,7 +284,7 @@ class HunspellHeadwordsRequest: public Dictionary::WordSearchRequest
QMutex & hunspellMutex; QMutex & hunspellMutex;
Hunspell & hunspell; Hunspell & hunspell;
wstring word; std::u32string word;
QAtomicInt isCancelled; QAtomicInt isCancelled;
QFuture< void > f; QFuture< void > f;
@ -301,7 +292,7 @@ class HunspellHeadwordsRequest: public Dictionary::WordSearchRequest
public: public:
HunspellHeadwordsRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ): HunspellHeadwordsRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ), hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ), hunspell( hunspell_ ),
word( word_ ) word( word_ )
@ -333,7 +324,7 @@ void HunspellHeadwordsRequest::run()
return; return;
} }
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word ); std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( trimmedWord.size() > 80 ) { if ( trimmedWord.size() > 80 ) {
// We won't do anything for overly long sentences since that would probably // We won't do anything for overly long sentences since that would probably
@ -343,7 +334,7 @@ void HunspellHeadwordsRequest::run()
} }
if ( containsWhitespace( trimmedWord ) ) { if ( containsWhitespace( trimmedWord ) ) {
vector< wstring > results; vector< std::u32string > results;
getSuggestionsForExpression( trimmedWord, results, hunspellMutex, hunspell ); getSuggestionsForExpression( trimmedWord, results, hunspellMutex, hunspell );
@ -353,7 +344,7 @@ void HunspellHeadwordsRequest::run()
} }
} }
else { else {
QList< wstring > suggestions = suggest( trimmedWord, hunspellMutex, hunspell ); QList< std::u32string > suggestions = suggest( trimmedWord, hunspellMutex, hunspell );
if ( !suggestions.empty() ) { if ( !suggestions.empty() ) {
QMutexLocker _( &dataMutex ); QMutexLocker _( &dataMutex );
@ -367,9 +358,9 @@ void HunspellHeadwordsRequest::run()
finish(); finish();
} }
QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hunspell ) QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex, Hunspell & hunspell )
{ {
QList< wstring > result; QList< std::u32string > result;
vector< string > suggestions; vector< string > suggestions;
@ -382,7 +373,7 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
if ( !suggestions.empty() ) { if ( !suggestions.empty() ) {
// There were some suggestions made for us. Make an appropriate output. // There were some suggestions made for us. Make an appropriate output.
wstring lowercasedWord = Folding::applySimpleCaseOnly( word ); std::u32string lowercasedWord = Folding::applySimpleCaseOnly( word );
static QRegularExpression cutStem( R"(^\s*st:(((\s+(?!\w{2}:)(?!-)(?!\+))|\S+)+))" ); static QRegularExpression cutStem( R"(^\s*st:(((\s+(?!\w{2}:)(?!-)(?!\+))|\S+)+))" );
@ -399,7 +390,7 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
auto match = cutStem.match( suggestion.trimmed() ); auto match = cutStem.match( suggestion.trimmed() );
if ( match.hasMatch() ) { if ( match.hasMatch() ) {
wstring alt = match.captured( 1 ).toStdU32String(); std::u32string alt = match.captured( 1 ).toStdU32String();
if ( Folding::applySimpleCaseOnly( alt ) != lowercasedWord ) // No point in providing same word if ( Folding::applySimpleCaseOnly( alt ) != lowercasedWord ) // No point in providing same word
{ {
@ -417,7 +408,7 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
} }
sptr< WordSearchRequest > HunspellDictionary::findHeadwordsForSynonym( wstring const & word ) sptr< WordSearchRequest > HunspellDictionary::findHeadwordsForSynonym( std::u32string const & word )
{ {
return std::make_shared< HunspellHeadwordsRequest >( word, getHunspellMutex(), hunspell ); return std::make_shared< HunspellHeadwordsRequest >( word, getHunspellMutex(), hunspell );
@ -431,14 +422,14 @@ class HunspellPrefixMatchRequest: public Dictionary::WordSearchRequest
QMutex & hunspellMutex; QMutex & hunspellMutex;
Hunspell & hunspell; Hunspell & hunspell;
wstring word; std::u32string word;
QAtomicInt isCancelled; QAtomicInt isCancelled;
QFuture< void > f; QFuture< void > f;
public: public:
HunspellPrefixMatchRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ): HunspellPrefixMatchRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ), hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ), hunspell( hunspell_ ),
word( word_ ) word( word_ )
@ -471,7 +462,7 @@ void HunspellPrefixMatchRequest::run()
} }
try { try {
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word ); std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( trimmedWord.empty() || containsWhitespace( trimmedWord ) ) { if ( trimmedWord.empty() || containsWhitespace( trimmedWord ) ) {
// For now we don't analyze whitespace-containing phrases // For now we don't analyze whitespace-containing phrases
@ -498,14 +489,14 @@ void HunspellPrefixMatchRequest::run()
finish(); finish();
} }
sptr< WordSearchRequest > HunspellDictionary::prefixMatch( wstring const & word, unsigned long /*maxResults*/ ) sptr< WordSearchRequest > HunspellDictionary::prefixMatch( std::u32string const & word, unsigned long /*maxResults*/ )
{ {
return std::make_shared< HunspellPrefixMatchRequest >( word, getHunspellMutex(), hunspell ); return std::make_shared< HunspellPrefixMatchRequest >( word, getHunspellMutex(), hunspell );
} }
void getSuggestionsForExpression( wstring const & expression, void getSuggestionsForExpression( std::u32string const & expression,
vector< wstring > & suggestions, vector< std::u32string > & suggestions,
QMutex & hunspellMutex, QMutex & hunspellMutex,
Hunspell & hunspell ) Hunspell & hunspell )
{ {
@ -513,15 +504,15 @@ void getSuggestionsForExpression( wstring const & expression,
// This is useful for compound expressions where some words is // This is useful for compound expressions where some words is
// in different form, e.g. "dozing off" -> "doze off". // in different form, e.g. "dozing off" -> "doze off".
wstring trimmedWord = Folding::trimWhitespaceOrPunct( expression ); std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( expression );
wstring word, punct; std::u32string word, punct;
QList< wstring > words; QList< std::u32string > words;
suggestions.clear(); suggestions.clear();
// Parse string to separate words // Parse string to separate words
for ( wchar const * c = trimmedWord.c_str();; ++c ) { for ( char32_t const * c = trimmedWord.c_str();; ++c ) {
if ( !*c || Folding::isPunct( *c ) || Folding::isWhitespace( *c ) ) { if ( !*c || Folding::isPunct( *c ) || Folding::isWhitespace( *c ) ) {
if ( word.size() ) { if ( word.size() ) {
words.push_back( word ); words.push_back( word );
@ -552,7 +543,7 @@ void getSuggestionsForExpression( wstring const & expression,
// Combine result strings from suggestions // Combine result strings from suggestions
QList< wstring > results; QList< std::u32string > results;
for ( const auto & i : words ) { for ( const auto & i : words ) {
word = i; word = i;
@ -562,13 +553,13 @@ void getSuggestionsForExpression( wstring const & expression,
} }
} }
else { else {
QList< wstring > sugg = suggest( word, hunspellMutex, hunspell ); QList< std::u32string > sugg = suggest( word, hunspellMutex, hunspell );
int suggNum = sugg.size() + 1; int suggNum = sugg.size() + 1;
if ( suggNum > 3 ) { if ( suggNum > 3 ) {
suggNum = 3; suggNum = 3;
} }
int resNum = results.size(); int resNum = results.size();
wstring resultStr; std::u32string resultStr;
if ( resNum == 0 ) { if ( resNum == 0 ) {
for ( int k = 0; k < suggNum; k++ ) { for ( int k = 0; k < suggNum; k++ ) {
@ -598,12 +589,12 @@ void getSuggestionsForExpression( wstring const & expression,
} }
} }
string encodeToHunspell( Hunspell & hunspell, wstring const & str ) string encodeToHunspell( Hunspell & hunspell, std::u32string const & str )
{ {
Iconv conv( Iconv::GdWchar ); Iconv conv( Iconv::GdWchar );
void const * in = str.data(); void const * in = str.data();
size_t inLeft = str.size() * sizeof( wchar ); size_t inLeft = str.size() * sizeof( char32_t );
vector< char > result( str.size() * 4 + 1 ); // +1 isn't actually needed, vector< char > result( str.size() * 4 + 1 ); // +1 isn't actually needed,
// but then iconv complains on empty // but then iconv complains on empty
@ -616,17 +607,17 @@ string encodeToHunspell( Hunspell & hunspell, wstring const & str )
return convStr.toStdString(); return convStr.toStdString();
} }
wstring decodeFromHunspell( Hunspell & hunspell, char const * str ) std::u32string decodeFromHunspell( Hunspell & hunspell, char const * str )
{ {
Iconv conv( hunspell.get_dic_encoding() ); Iconv conv( hunspell.get_dic_encoding() );
void const * in = str; void const * in = str;
size_t inLeft = strlen( str ); size_t inLeft = strlen( str );
vector< wchar > result( inLeft + 1 ); // +1 isn't needed, but see above vector< char32_t > result( inLeft + 1 ); // +1 isn't needed, but see above
void * out = &result.front(); void * out = &result.front();
size_t outLeft = result.size() * sizeof( wchar ); size_t outLeft = result.size() * sizeof( char32_t );
QString convStr = conv.convert( in, inLeft ); QString convStr = conv.convert( in, inLeft );
return convStr.toStdU32String(); return convStr.toStdU32String();

View file

@ -1,5 +1,5 @@
#include "lingualibre.hh" #include "lingualibre.hh"
#include "utf8.hh" #include "text.hh"
#include "audiolink.hh" #include "audiolink.hh"
#include <QJsonArray> #include <QJsonArray>
@ -40,8 +40,8 @@ class LinguaArticleRequest: public Dictionary::DataRequest
public: public:
LinguaArticleRequest( wstring const & word, LinguaArticleRequest( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
QString const & languageCode_, QString const & languageCode_,
QString const & langWikipediaID_, QString const & langWikipediaID_,
string const & dictionaryId_, string const & dictionaryId_,
@ -51,7 +51,7 @@ public:
private: private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word ); void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
private slots: private slots:
virtual void requestFinished( QNetworkReply * ); virtual void requestFinished( QNetworkReply * );
@ -165,12 +165,6 @@ WHERE {
} }
} }
map< Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return 0; return 0;
@ -181,7 +175,7 @@ WHERE {
return 0; return 0;
} }
sptr< WordSearchRequest > prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) override sptr< WordSearchRequest > prefixMatch( std::u32string const & /*word*/, unsigned long /*maxResults*/ ) override
{ {
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >(); sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -190,7 +184,10 @@ WHERE {
return sr; return sr;
} }
sptr< DataRequest > getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ) override sptr< DataRequest > getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ) override
{ {
if ( word.size() < 50 ) { if ( word.size() < 50 ) {
return std::make_shared< LinguaArticleRequest >( word, alts, languageCode, langWikipediaID, getId(), netMgr ); return std::make_shared< LinguaArticleRequest >( word, alts, languageCode, langWikipediaID, getId(), netMgr );
@ -237,8 +234,8 @@ void LinguaArticleRequest::cancel()
finish(); finish();
} }
LinguaArticleRequest::LinguaArticleRequest( const wstring & str, LinguaArticleRequest::LinguaArticleRequest( const std::u32string & str,
const vector< wstring > & alts, const vector< std::u32string > & alts,
const QString & languageCode_, const QString & languageCode_,
const QString & langWikipediaID, const QString & langWikipediaID,
const string & dictionaryId_, const string & dictionaryId_,
@ -251,7 +248,7 @@ LinguaArticleRequest::LinguaArticleRequest( const wstring & str,
addQuery( mgr, str ); addQuery( mgr, str );
} }
void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const wstring & word ) void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const std::u32string & word )
{ {
// Doc of the <https://www.mediawiki.org/wiki/API:Query> // Doc of the <https://www.mediawiki.org/wiki/API:Query>
@ -279,7 +276,7 @@ void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const wstring
auto netReply = std::shared_ptr< QNetworkReply >( mgr.get( netRequest ) ); auto netReply = std::shared_ptr< QNetworkReply >( mgr.get( netRequest ) );
netReplies.emplace_back( netReply, Utf8::encode( word ) ); netReplies.emplace_back( netReply, Text::toUtf8( word ) );
} }

View file

@ -34,7 +34,7 @@
#include "dict/transliteration/romaji.hh" #include "dict/transliteration/romaji.hh"
#include "dict/transliteration/russian.hh" #include "dict/transliteration/russian.hh"
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
#include "dict/epwing.hh" #include "dict/epwing.hh"
#endif #endif
@ -83,7 +83,7 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
<< "*.zim" << "*.zim"
<< "*.zimaa" << "*.zimaa"
#endif #endif
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
<< "*catalogs" << "*catalogs"
#endif #endif
; ;
@ -181,7 +181,7 @@ void LoadDictionaries::handlePath( Config::Path const & path )
#ifdef MAKE_ZIM_SUPPORT #ifdef MAKE_ZIM_SUPPORT
addDicts( Zim::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this, maxHeadwordToExpand ) ); addDicts( Zim::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this, maxHeadwordToExpand ) );
#endif #endif
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
addDicts( Epwing::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this ) ); addDicts( Epwing::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this ) );
#endif #endif
} }
@ -274,7 +274,7 @@ void loadDictionaries( QWidget * parent,
addDicts( Forvo::makeDictionaries( loadDicts, cfg.forvo, dictNetMgr ) ); addDicts( Forvo::makeDictionaries( loadDicts, cfg.forvo, dictNetMgr ) );
addDicts( Lingua::makeDictionaries( loadDicts, cfg.lingua, dictNetMgr ) ); addDicts( Lingua::makeDictionaries( loadDicts, cfg.lingua, dictNetMgr ) );
addDicts( Programs::makeDictionaries( cfg.programs ) ); addDicts( Programs::makeDictionaries( cfg.programs ) );
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
addDicts( VoiceEngines::makeDictionaries( cfg.voiceEngines ) ); addDicts( VoiceEngines::makeDictionaries( cfg.voiceEngines ) );
#endif #endif
addDicts( DictServer::makeDictionaries( cfg.dictServers ) ); addDicts( DictServer::makeDictionaries( cfg.dictServers ) );

View file

@ -5,7 +5,7 @@
#include "dictfile.hh" #include "dictfile.hh"
#include "iconv.hh" #include "iconv.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "audiolink.hh" #include "audiolink.hh"
@ -24,7 +24,6 @@
namespace Lsa { namespace Lsa {
using std::string; using std::string;
using gd::wstring;
using std::map; using std::map;
using std::multimap; using std::multimap;
using std::set; using std::set;
@ -159,11 +158,6 @@ public:
string getName() noexcept override; string getName() noexcept override;
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.soundsCount; return idxHeader.soundsCount;
@ -174,8 +168,10 @@ public:
return getArticleCount(); return getArticleCount();
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -204,9 +200,9 @@ LsaDictionary::LsaDictionary( string const & id, string const & indexFile, vecto
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex ); openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
} }
sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > LsaDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -220,13 +216,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, string > mainArticles, alternateArticles; multimap< std::u32string, string > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -241,12 +237,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, string > & mapToUse = ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; multimap< std::u32string, string > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.word ) ); mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.word ) );
@ -259,7 +256,7 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
string result; string result;
multimap< wstring, string >::const_iterator i; multimap< std::u32string, string >::const_iterator i;
result += "<table class=\"lsa_play\">"; result += "<table class=\"lsa_play\">";
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -394,7 +391,7 @@ sptr< Dictionary::DataRequest > LsaDictionary::getResource( string const & name
string strippedName = Utils::endsWithIgnoreCase( name, ".wav" ) ? string( name, 0, name.size() - 4 ) : name; string strippedName = Utils::endsWithIgnoreCase( name, ".wav" ) ? string( name, 0, name.size() - 4 ) : name;
vector< WordArticleLink > chain = findArticles( Utf8::decode( strippedName ) ); vector< WordArticleLink > chain = findArticles( Text::toUtf32( strippedName ) );
if ( chain.empty() ) { if ( chain.empty() ) {
return std::make_shared< Dictionary::DataRequestInstant >( false ); // No such resource return std::make_shared< Dictionary::DataRequestInstant >( false ); // No such resource
@ -577,7 +574,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Insert new entry into an index // Insert new entry into an index
indexedWords.addWord( Utf8::decode( e.name ), offset ); indexedWords.addWord( Text::toUtf32( e.name ), offset );
} }
idxHeader.vorbisOffset = f.tell(); idxHeader.vorbisOffset = f.tell();

View file

@ -4,10 +4,9 @@
#include "mdx.hh" #include "mdx.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "dictfile.hh" #include "dictfile.hh"
#include "wstring.hh" #include "text.hh"
#include "wstring_qt.hh"
#include "chunkedstorage.hh" #include "chunkedstorage.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "audiolink.hh" #include "audiolink.hh"
@ -37,8 +36,6 @@ namespace Mdx {
using std::map; using std::map;
using std::multimap; using std::multimap;
using std::set; using std::set;
using gd::wstring;
using gd::wchar;
using std::list; using std::list;
using std::pair; using std::pair;
using std::string; using std::string;
@ -129,7 +126,7 @@ public:
/// Checks whether the given file exists in the mdd file or not. /// Checks whether the given file exists in the mdd file or not.
/// Note that this function is thread-safe, since it does not access mdd file. /// Note that this function is thread-safe, since it does not access mdd file.
bool hasFile( gd::wstring const & name ) bool hasFile( std::u32string const & name )
{ {
if ( !isFileOpen ) { if ( !isFileOpen ) {
return false; return false;
@ -140,7 +137,7 @@ public:
/// Attempts loading the given file into the given vector. Returns true on /// Attempts loading the given file into the given vector. Returns true on
/// success, false otherwise. /// success, false otherwise.
bool loadFile( gd::wstring const & name, std::vector< char > & result ) bool loadFile( std::u32string const & name, std::vector< char > & result )
{ {
if ( !isFileOpen ) { if ( !isFileOpen ) {
return false; return false;
@ -212,12 +209,6 @@ public:
void deferredInit() override; void deferredInit() override;
map< Dictionary::Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -238,8 +229,10 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const & word,
getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
QString const & getDescription() override; QString const & getDescription() override;
@ -287,7 +280,7 @@ private:
friend class MdxArticleRequest; friend class MdxArticleRequest;
friend class MddResourceRequest; friend class MddResourceRequest;
void loadResourceFile( const wstring & resourceName, vector< char > & data ); void loadResourceFile( const std::u32string & resourceName, vector< char > & data );
}; };
MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ): MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
@ -494,8 +487,8 @@ sptr< Dictionary::DataRequest > MdxDictionary::getSearchResults( QString const &
class MdxArticleRequest: public Dictionary::DataRequest class MdxArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
MdxDictionary & dict; MdxDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -504,8 +497,8 @@ class MdxArticleRequest: public Dictionary::DataRequest
public: public:
MdxArticleRequest( wstring const & word_, MdxArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
MdxDictionary & dict_, MdxDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -608,8 +601,8 @@ void MdxArticleRequest::run()
// Handle internal redirects // Handle internal redirects
if ( strncmp( articleBody.c_str(), "@@@LINK=", 8 ) == 0 ) { if ( strncmp( articleBody.c_str(), "@@@LINK=", 8 ) == 0 ) {
wstring target = Utf8::decode( articleBody.c_str() + 8 ); std::u32string target = Text::toUtf32( articleBody.c_str() + 8 );
target = Folding::trimWhitespace( target ); target = Folding::trimWhitespace( target );
// Make an additional query for this redirection // Make an additional query for this redirection
vector< WordArticleLink > altChain = dict.findArticles( target ); vector< WordArticleLink > altChain = dict.findArticles( target );
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
@ -632,9 +625,9 @@ void MdxArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const wstring & word, sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const std::u32string & word,
const vector< wstring > & alts, const vector< std::u32string > & alts,
const wstring &, const std::u32string &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
return std::make_shared< MdxArticleRequest >( word, alts, *this, ignoreDiacritics ); return std::make_shared< MdxArticleRequest >( word, alts, *this, ignoreDiacritics );
@ -644,7 +637,7 @@ sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const wstring & word,
class MddResourceRequest: public Dictionary::DataRequest class MddResourceRequest: public Dictionary::DataRequest
{ {
MdxDictionary & dict; MdxDictionary & dict;
wstring resourceName; std::u32string resourceName;
QAtomicInt isCancelled; QAtomicInt isCancelled;
QFuture< void > f; QFuture< void > f;
@ -653,7 +646,7 @@ public:
MddResourceRequest( MdxDictionary & dict_, string const & resourceName_ ): MddResourceRequest( MdxDictionary & dict_, string const & resourceName_ ):
Dictionary::DataRequest( &dict_ ), Dictionary::DataRequest( &dict_ ),
dict( dict_ ), dict( dict_ ),
resourceName( Utf8::decode( resourceName_ ) ) resourceName( Text::toUtf32( resourceName_ ) )
{ {
f = QtConcurrent::run( [ this ]() { f = QtConcurrent::run( [ this ]() {
this->run(); this->run();
@ -728,7 +721,7 @@ void MddResourceRequest::run()
} }
// In order to prevent recursive internal redirection... // In order to prevent recursive internal redirection...
set< wstring, std::less<> > resourceIncluded; set< std::u32string, std::less<> > resourceIncluded;
for ( ;; ) { for ( ;; ) {
// Some runnables linger enough that they are cancelled before they start // Some runnables linger enough that they are cancelled before they start
@ -736,7 +729,7 @@ void MddResourceRequest::run()
finish(); finish();
return; return;
} }
string u8ResourceName = Utf8::encode( resourceName ); string u8ResourceName = Text::toUtf8( resourceName );
if ( !resourceIncluded.insert( resourceName ).second ) { if ( !resourceIncluded.insert( resourceName ).second ) {
finish(); finish();
return; return;
@ -1157,11 +1150,11 @@ QString MdxDictionary::getCachedFileName( QString filename )
qWarning( R"(Mdx: file "%s" creating error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() ); qWarning( R"(Mdx: file "%s" creating error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
return QString(); return QString();
} }
gd::wstring resourceName = filename.toStdU32String(); std::u32string resourceName = filename.toStdU32String();
vector< char > data; vector< char > data;
// In order to prevent recursive internal redirection... // In order to prevent recursive internal redirection...
set< wstring, std::less<> > resourceIncluded; set< std::u32string, std::less<> > resourceIncluded;
for ( ;; ) { for ( ;; ) {
if ( !resourceIncluded.insert( resourceName ).second ) { if ( !resourceIncluded.insert( resourceName ).second ) {
@ -1200,10 +1193,10 @@ QString MdxDictionary::getCachedFileName( QString filename )
return fullName; return fullName;
} }
void MdxDictionary::loadResourceFile( const wstring & resourceName, vector< char > & data ) void MdxDictionary::loadResourceFile( const std::u32string & resourceName, vector< char > & data )
{ {
wstring newResourceName = resourceName; std::u32string newResourceName = resourceName;
string u8ResourceName = Utf8::encode( resourceName ); string u8ResourceName = Text::toUtf8( resourceName );
// Convert to the Windows separator // Convert to the Windows separator
std::replace( newResourceName.begin(), newResourceName.end(), '/', '\\' ); std::replace( newResourceName.begin(), newResourceName.end(), '/', '\\' );

View file

@ -2,7 +2,6 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "mediawiki.hh" #include "mediawiki.hh"
#include "wstring_qt.hh"
#include <QNetworkAccessManager> #include <QNetworkAccessManager>
#include <QNetworkReply> #include <QNetworkReply>
#include <QUrl> #include <QUrl>
@ -56,11 +55,6 @@ public:
return name; return name;
} }
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return 0; return 0;
@ -71,9 +65,10 @@ public:
return 0; return 0;
} }
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override; sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override; sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
quint32 getLangFrom() const override quint32 getLangFrom() const override
{ {
@ -138,7 +133,10 @@ class MediaWikiWordSearchRequest: public MediaWikiWordSearchRequestSlots
public: public:
MediaWikiWordSearchRequest( wstring const &, QString const & url, QString const & lang, QNetworkAccessManager & mgr ); MediaWikiWordSearchRequest( std::u32string const &,
QString const & url,
QString const & lang,
QNetworkAccessManager & mgr );
~MediaWikiWordSearchRequest(); ~MediaWikiWordSearchRequest();
@ -149,7 +147,7 @@ private:
void downloadFinished() override; void downloadFinished() override;
}; };
MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( wstring const & str, MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( std::u32string const & str,
QString const & url, QString const & url,
QString const & lang, QString const & lang,
QNetworkAccessManager & mgr ): QNetworkAccessManager & mgr ):
@ -395,8 +393,8 @@ class MediaWikiArticleRequest: public MediaWikiDataRequestSlots
public: public:
MediaWikiArticleRequest( wstring const & word, MediaWikiArticleRequest( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
QString const & url, QString const & url,
QString const & lang, QString const & lang,
QNetworkAccessManager & mgr, QNetworkAccessManager & mgr,
@ -406,7 +404,7 @@ public:
private: private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word ); void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
void requestFinished( QNetworkReply * ) override; void requestFinished( QNetworkReply * ) override;
@ -440,8 +438,8 @@ void MediaWikiArticleRequest::cancel()
finish(); finish();
} }
MediaWikiArticleRequest::MediaWikiArticleRequest( wstring const & str, MediaWikiArticleRequest::MediaWikiArticleRequest( std::u32string const & str,
vector< wstring > const & alts, vector< std::u32string > const & alts,
QString const & url_, QString const & url_,
QString const & lang_, QString const & lang_,
QNetworkAccessManager & mgr, QNetworkAccessManager & mgr,
@ -463,7 +461,7 @@ MediaWikiArticleRequest::MediaWikiArticleRequest( wstring const & str,
} }
} }
void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const & str ) void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, std::u32string const & str )
{ {
qDebug( "MediaWiki: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() ); qDebug( "MediaWiki: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() );
@ -710,7 +708,7 @@ void MediaWikiArticleRequest::requestFinished( QNetworkReply * r )
} }
} }
sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( wstring const & word, unsigned long maxResults ) sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( std::u32string const & word, unsigned long maxResults )
{ {
(void)maxResults; (void)maxResults;
@ -724,8 +722,10 @@ sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( wstring const & word
} }
} }
sptr< DataRequest > sptr< DataRequest > MediaWikiDictionary::getArticle( std::u32string const & word,
MediaWikiDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ) vector< std::u32string > const & alts,
std::u32string const &,
bool )
{ {
if ( word.size() > 80 ) { if ( word.size() > 80 ) {

View file

@ -4,8 +4,7 @@
#include "programs.hh" #include "programs.hh"
#include "audiolink.hh" #include "audiolink.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
#include "utf8.hh" #include "text.hh"
#include "wstring_qt.hh"
#include "iconv.hh" #include "iconv.hh"
#include "utils.hh" #include "utils.hh"
#include "globalbroadcaster.hh" #include "globalbroadcaster.hh"
@ -36,11 +35,6 @@ public:
return prg.name.toUtf8().data(); return prg.name.toUtf8().data();
} }
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return 0; return 0;
@ -51,16 +45,17 @@ public:
return 0; return 0;
} }
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long maxResults ) override; sptr< WordSearchRequest > prefixMatch( std::u32string const & word, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override; sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
protected: protected:
void loadIcon() noexcept override; void loadIcon() noexcept override;
}; };
sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( wstring const & word, unsigned long /*maxResults*/ ) sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( std::u32string const & word, unsigned long /*maxResults*/ )
{ {
if ( prg.type == Config::Program::PrefixMatch ) { if ( prg.type == Config::Program::PrefixMatch ) {
@ -75,8 +70,10 @@ sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( wstring const & word,
} }
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > ProgramsDictionary::getArticle( std::u32string const & word,
ProgramsDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool ) vector< std::u32string > const &,
std::u32string const &,
bool )
{ {
switch ( prg.type ) { switch ( prg.type ) {
@ -84,7 +81,7 @@ ProgramsDictionary::getArticle( wstring const & word, vector< wstring > const &,
// Audio results are instantaneous // Audio results are instantaneous
string result; string result;
string wordUtf8( Utf8::encode( word ) ); string wordUtf8( Text::toUtf8( word ) );
result += "<table class=\"programs_play\"><tr>"; result += "<table class=\"programs_play\"><tr>";

View file

@ -6,14 +6,13 @@
#include <QProcess> #include <QProcess>
#include "dictionary.hh" #include "dictionary.hh"
#include "config.hh" #include "config.hh"
#include "wstring.hh" #include "text.hh"
/// Support for arbitrary programs. /// Support for arbitrary programs.
namespace Programs { namespace Programs {
using std::vector; using std::vector;
using std::string; using std::string;
using gd::wstring;
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Programs const & ); vector< sptr< Dictionary::Class > > makeDictionaries( Config::Programs const & );

View file

@ -9,7 +9,7 @@
#include "htmlescape.hh" #include "htmlescape.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "sdict.hh" #include "sdict.hh"
#include "utf8.hh" #include "text.hh"
#include <map> #include <map>
#include <QAtomicInt> #include <QAtomicInt>
#include <QDir> #include <QDir>
@ -26,7 +26,6 @@ using std::multimap;
using std::pair; using std::pair;
using std::set; using std::set;
using std::string; using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
@ -113,11 +112,6 @@ public:
~SdictDictionary(); ~SdictDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -138,8 +132,10 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override; QString const & getDescription() override;
@ -421,8 +417,8 @@ SdictDictionary::getSearchResults( QString const & searchString, int searchMode,
class SdictArticleRequest: public Dictionary::DataRequest class SdictArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
SdictDictionary & dict; SdictDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -432,8 +428,8 @@ class SdictArticleRequest: public Dictionary::DataRequest
public: public:
SdictArticleRequest( wstring const & word_, SdictArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
SdictDictionary & dict_, SdictDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -477,13 +473,13 @@ void SdictArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -512,12 +508,12 @@ void SdictArticleRequest::run()
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, pair< string, string > > & mapToUse = multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -537,7 +533,7 @@ void SdictArticleRequest::run()
string result; string result;
multimap< wstring, pair< string, string > >::const_iterator i; multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += dict.isFromLanguageRTL() ? "<h3 dir=\"rtl\">" : "<h3>"; result += dict.isFromLanguageRTL() ? "<h3 dir=\"rtl\">" : "<h3>";
@ -566,9 +562,9 @@ void SdictArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > SdictDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > SdictDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -746,7 +742,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Insert new entry // Insert new entry
indexedWords.addWord( Utf8::decode( string( data.data(), size ) ), articleOffset ); indexedWords.addWord( Text::toUtf32( string( data.data(), size ) ), articleOffset );
pos += el.nextWord; pos += el.nextWord;
} }

View file

@ -6,7 +6,7 @@
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "decompress.hh" #include "decompress.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "ftshelpers.hh" #include "ftshelpers.hh"
@ -40,7 +40,6 @@ using std::vector;
using std::multimap; using std::multimap;
using std::pair; using std::pair;
using std::set; using std::set;
using gd::wstring;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
@ -610,12 +609,6 @@ public:
~SlobDictionary(); ~SlobDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -636,8 +629,10 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -859,7 +854,7 @@ void SlobDictionary::loadResource( std::string & resourceName, string & data )
vector< WordArticleLink > link; vector< WordArticleLink > link;
RefEntry entry; RefEntry entry;
link = resourceIndex.findArticles( Utf8::decode( resourceName ) ); link = resourceIndex.findArticles( Text::toUtf32( resourceName ) );
if ( link.empty() ) { if ( link.empty() ) {
return; return;
@ -995,8 +990,8 @@ SlobDictionary::getSearchResults( QString const & searchString, int searchMode,
class SlobArticleRequest: public Dictionary::DataRequest class SlobArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
SlobDictionary & dict; SlobDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -1005,8 +1000,8 @@ class SlobArticleRequest: public Dictionary::DataRequest
public: public:
SlobArticleRequest( wstring const & word_, SlobArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
SlobDictionary & dict_, SlobDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -1051,13 +1046,13 @@ void SlobArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint64 > articlesIncluded; // Some synonims make it that the articles set< quint64 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -1090,12 +1085,12 @@ void SlobArticleRequest::run()
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, pair< string, string > > & mapToUse = multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -1111,7 +1106,7 @@ void SlobArticleRequest::run()
string result; string result;
multimap< wstring, pair< string, string > >::const_iterator i; multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += R"(<div class="slobdict"><h3 class="slobdict_headword">)"; result += R"(<div class="slobdict"><h3 class="slobdict_headword">)";
@ -1134,9 +1129,9 @@ void SlobArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > SlobDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > SlobDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {

View file

@ -3,13 +3,12 @@
#include "sounddir.hh" #include "sounddir.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "chunkedstorage.hh" #include "chunkedstorage.hh"
#include "filetype.hh" #include "filetype.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
#include "audiolink.hh" #include "audiolink.hh"
#include "wstring_qt.hh"
#include "utils.hh" #include "utils.hh"
@ -21,7 +20,6 @@
namespace SoundDir { namespace SoundDir {
using std::string; using std::string;
using gd::wstring;
using std::map; using std::map;
using std::multimap; using std::multimap;
using std::set; using std::set;
@ -75,12 +73,6 @@ public:
vector< string > const & dictionaryFiles, vector< string > const & dictionaryFiles,
QString const & iconFilename_ ); QString const & iconFilename_ );
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.soundsCount; return idxHeader.soundsCount;
@ -91,8 +83,10 @@ public:
return getArticleCount(); return getArticleCount();
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -120,9 +114,9 @@ SoundDirDictionary::SoundDirDictionary( string const & id,
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex ); openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
} }
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics ); vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics );
@ -136,13 +130,13 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
} }
// maps to the chain number // maps to the chain number
multimap< wstring, unsigned > mainArticles, alternateArticles; multimap< std::u32string, unsigned > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -157,12 +151,12 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, unsigned > & mapToUse = multimap< std::u32string, unsigned > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( chain[ x ].word ), x ) ); mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( chain[ x ].word ), x ) );
@ -176,7 +170,7 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
string result; string result;
multimap< wstring, uint32_t >::const_iterator i; multimap< std::u32string, uint32_t >::const_iterator i;
string displayedName; string displayedName;
vector< char > chunk; vector< char > chunk;
@ -405,11 +399,11 @@ void addDir( QDir const & baseDir,
const uint32_t articleOffset = chunks.startNewBlock(); const uint32_t articleOffset = chunks.startNewBlock();
chunks.addToBlock( fileName.c_str(), fileName.size() + 1 ); chunks.addToBlock( fileName.c_str(), fileName.size() + 1 );
wstring name = i->fileName().toStdU32String(); std::u32string name = i->fileName().toStdU32String();
const wstring::size_type pos = name.rfind( L'.' ); const std::u32string::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos ) { if ( pos != std::u32string::npos ) {
name.erase( pos ); name.erase( pos );
} }

View file

@ -16,7 +16,7 @@ Sources::Sources( QWidget * parent, Config::Class const & cfg ):
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT #ifdef MAKE_CHINESE_CONVERSION_SUPPORT
chineseConversion( new ChineseConversion( this, cfg.transliteration.chinese ) ), chineseConversion( new ChineseConversion( this, cfg.transliteration.chinese ) ),
#endif #endif
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
textToSpeechSource( nullptr ), textToSpeechSource( nullptr ),
#endif #endif
itemDelegate( new QItemDelegate( this ) ), itemDelegate( new QItemDelegate( this ) ),
@ -129,7 +129,7 @@ Sources::Sources( QWidget * parent, Config::Class const & cfg ):
ui.forvoLanguageCodes->setText( forvo.languageCodes ); ui.forvoLanguageCodes->setText( forvo.languageCodes );
// Text to speech // Text to speech
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
if ( !cfg.notts ) { if ( !cfg.notts ) {
textToSpeechSource = new TextToSpeechSource( this, cfg.voiceEngines ); textToSpeechSource = new TextToSpeechSource( this, cfg.voiceEngines );
ui.tabWidget->addTab( textToSpeechSource, QIcon( ":/icons/text2speech.svg" ), tr( "Text to Speech" ) ); ui.tabWidget->addTab( textToSpeechSource, QIcon( ":/icons/text2speech.svg" ), tr( "Text to Speech" ) );
@ -325,7 +325,7 @@ void Sources::on_removeProgram_clicked()
} }
} }
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
Config::VoiceEngines Sources::getVoiceEngines() const Config::VoiceEngines Sources::getVoiceEngines() const
{ {
if ( !textToSpeechSource ) if ( !textToSpeechSource )

View file

@ -295,7 +295,7 @@ public:
{ {
return programsModel.getCurrentPrograms(); return programsModel.getCurrentPrograms();
} }
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
Config::VoiceEngines getVoiceEngines() const; Config::VoiceEngines getVoiceEngines() const;
#endif #endif
Config::Hunspell getHunspell() const; Config::Hunspell getHunspell() const;
@ -317,7 +317,7 @@ private:
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT #ifdef MAKE_CHINESE_CONVERSION_SUPPORT
ChineseConversion * chineseConversion; ChineseConversion * chineseConversion;
#endif #endif
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
TextToSpeechSource * textToSpeechSource; TextToSpeechSource * textToSpeechSource;
#endif #endif
QItemDelegate * itemDelegate; QItemDelegate * itemDelegate;

View file

@ -4,7 +4,7 @@
#include "stardict.hh" #include "stardict.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "chunkedstorage.hh" #include "chunkedstorage.hh"
#include "dictzip.hh" #include "dictzip.hh"
#include "xdxf2html.hh" #include "xdxf2html.hh"
@ -42,7 +42,6 @@ using std::multimap;
using std::pair; using std::pair;
using std::set; using std::set;
using std::string; using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
@ -122,7 +121,7 @@ class StardictDictionary: public BtreeIndexing::BtreeDictionary
File::Index idx; File::Index idx;
IdxHeader idxHeader; IdxHeader idxHeader;
string sameTypeSequence; string sameTypeSequence;
ChunkedStorage::Reader chunks; std::unique_ptr< ChunkedStorage::Reader > chunks;
QMutex dzMutex; QMutex dzMutex;
dictData * dz; dictData * dz;
QMutex resourceZipMutex; QMutex resourceZipMutex;
@ -134,11 +133,6 @@ public:
~StardictDictionary(); ~StardictDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.wordCount; return idxHeader.wordCount;
@ -159,10 +153,12 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override; sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -215,12 +211,14 @@ StardictDictionary::StardictDictionary( string const & id,
string const & indexFile, string const & indexFile,
vector< string > const & dictionaryFiles ): vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ), BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, QIODevice::ReadOnly ), idx( indexFile, QIODevice::ReadOnly )
idxHeader( idx.read< IdxHeader >() ),
sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ),
chunks( idx, idxHeader.chunksOffset )
{ {
dictionaryName = loadString( idxHeader.bookNameSize ); // reading headers, note that reading order matters
idxHeader = idx.read< IdxHeader >();
dictionaryName = loadString( idxHeader.bookNameSize );
sameTypeSequence = loadString( idxHeader.sameTypeSequenceSize );
chunks = std::make_unique< ChunkedStorage::Reader >( idx, idxHeader.chunksOffset );
// Open the .dict file // Open the .dict file
DZ_ERRORS error; DZ_ERRORS error;
@ -301,7 +299,7 @@ void StardictDictionary::getArticleProps( uint32_t articleAddress,
QMutexLocker _( &idxMutex ); QMutexLocker _( &idxMutex );
char * articleData = chunks.getBlock( articleAddress, chunk ); char * articleData = chunks->getBlock( articleAddress, chunk );
memcpy( &offset, articleData, sizeof( uint32_t ) ); memcpy( &offset, articleData, sizeof( uint32_t ) );
articleData += sizeof( uint32_t ); articleData += sizeof( uint32_t );
@ -1167,7 +1165,7 @@ sptr< Dictionary::DataRequest > StardictDictionary::getSearchResults( QString co
class StardictHeadwordsRequest: public Dictionary::WordSearchRequest class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
{ {
wstring word; std::u32string word;
StardictDictionary & dict; StardictDictionary & dict;
QAtomicInt isCancelled; QAtomicInt isCancelled;
@ -1175,7 +1173,7 @@ class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
public: public:
StardictHeadwordsRequest( wstring const & word_, StardictDictionary & dict_ ): StardictHeadwordsRequest( std::u32string const & word_, StardictDictionary & dict_ ):
word( word_ ), word( word_ ),
dict( dict_ ) dict( dict_ )
{ {
@ -1210,7 +1208,7 @@ void StardictHeadwordsRequest::run()
//limited the synomys to at most 10 entries //limited the synomys to at most 10 entries
vector< WordArticleLink > chain = dict.findArticles( word, false, 10 ); vector< WordArticleLink > chain = dict.findArticles( word, false, 10 );
wstring caseFolded = Folding::applySimpleCaseOnly( word ); std::u32string caseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) { for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) { if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -1222,7 +1220,7 @@ void StardictHeadwordsRequest::run()
dict.loadArticle( x.articleOffset, headword, articleText ); dict.loadArticle( x.articleOffset, headword, articleText );
wstring headwordDecoded = Utf8::decode( headword ); std::u32string headwordDecoded = Text::toUtf32( headword );
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) { if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) {
// The headword seems to differ from the input word, which makes the // The headword seems to differ from the input word, which makes the
@ -1240,7 +1238,7 @@ void StardictHeadwordsRequest::run()
finish(); finish();
} }
sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( wstring const & word ) sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( std::u32string const & word )
{ {
return synonymSearchEnabled ? std::make_shared< StardictHeadwordsRequest >( word, *this ) : return synonymSearchEnabled ? std::make_shared< StardictHeadwordsRequest >( word, *this ) :
Class::findHeadwordsForSynonym( word ); Class::findHeadwordsForSynonym( word );
@ -1253,8 +1251,8 @@ sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynony
class StardictArticleRequest: public Dictionary::DataRequest class StardictArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
StardictDictionary & dict; StardictDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -1264,8 +1262,8 @@ class StardictArticleRequest: public Dictionary::DataRequest
public: public:
StardictArticleRequest( wstring const & word_, StardictArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
StardictDictionary & dict_, StardictDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -1315,13 +1313,13 @@ void StardictArticleRequest::run()
} }
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -1348,12 +1346,12 @@ void StardictArticleRequest::run()
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, pair< string, string > > & mapToUse = multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -1369,7 +1367,7 @@ void StardictArticleRequest::run()
string result; string result;
multimap< wstring, pair< string, string > >::const_iterator i; multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner(); string cleaner = Utils::Html::getHtmlCleaner();
@ -1412,9 +1410,9 @@ void StardictArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > StardictDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > StardictDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -1572,7 +1570,7 @@ void StardictResourceRequest::run()
if ( dict.resourceZip.isOpen() ) { if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex ); QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) { if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive throw; // Make it fail since we couldn't read the archive
} }
} }
@ -1804,10 +1802,10 @@ static void handleIdxSynFile( string const & fileName,
// Insert new entry into an index // Insert new entry into an index
if ( parseHeadwords ) { if ( parseHeadwords ) {
indexedWords.addWord( Utf8::decode( word ), offset ); indexedWords.addWord( Text::toUtf32( word ), offset );
} }
else { else {
indexedWords.addSingleWord( Utf8::decode( word ), offset ); indexedWords.addSingleWord( Text::toUtf32( word ), offset );
} }
} }

View file

@ -7,7 +7,7 @@
#include <opencc/opencc.h> #include <opencc/opencc.h>
#include "folding.hh" #include "folding.hh"
#include "transliteration.hh" #include "transliteration.hh"
#include "utf8.hh" #include "text.hh"
namespace ChineseTranslit { namespace ChineseTranslit {
@ -27,7 +27,7 @@ public:
QString const & openccConfig ); QString const & openccConfig );
~CharacterConversionDictionary(); ~CharacterConversionDictionary();
std::vector< gd::wstring > getAlternateWritings( gd::wstring const & ) noexcept override; std::vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept override;
}; };
CharacterConversionDictionary::CharacterConversionDictionary( std::string const & id, CharacterConversionDictionary::CharacterConversionDictionary( std::string const & id,
@ -68,15 +68,15 @@ CharacterConversionDictionary::~CharacterConversionDictionary()
// #endif // #endif
} }
std::vector< gd::wstring > CharacterConversionDictionary::getAlternateWritings( gd::wstring const & str ) noexcept std::vector< std::u32string > CharacterConversionDictionary::getAlternateWritings( std::u32string const & str ) noexcept
{ {
std::vector< gd::wstring > results; std::vector< std::u32string > results;
if ( converter != NULL ) { if ( converter != NULL ) {
gd::wstring folded = Folding::applySimpleCaseOnly( str ); std::u32string folded = Folding::applySimpleCaseOnly( str );
std::string input = Utf8::encode( folded ); std::string input = Text::toUtf8( folded );
std::string output; std::string output;
gd::wstring result; std::u32string result;
try { try {
// #ifdef Q_OS_MAC // #ifdef Q_OS_MAC
@ -93,7 +93,7 @@ std::vector< gd::wstring > CharacterConversionDictionary::getAlternateWritings(
// #else // #else
// output = converter->Convert( input ); // output = converter->Convert( input );
// #endif // #endif
result = Utf8::decode( output ); result = Text::toUtf32( output );
} }
catch ( std::exception & ex ) { catch ( std::exception & ex ) {
qWarning( "OpenCC: conversion failed %s", ex.what() ); qWarning( "OpenCC: conversion failed %s", ex.what() );

View file

@ -2,12 +2,11 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "transliteration.hh" #include "transliteration.hh"
#include "utf8.hh" #include "text.hh"
#include "folding.hh" #include "folding.hh"
namespace Transliteration { namespace Transliteration {
using gd::wchar;
BaseTransliterationDictionary::BaseTransliterationDictionary( string const & id, BaseTransliterationDictionary::BaseTransliterationDictionary( string const & id,
string const & name_, string const & name_,
@ -26,11 +25,6 @@ string BaseTransliterationDictionary::getName() noexcept
return name; return name;
} }
map< Dictionary::Property, string > BaseTransliterationDictionary::getProperties() noexcept
{
return map< Dictionary::Property, string >();
}
unsigned long BaseTransliterationDictionary::getArticleCount() noexcept unsigned long BaseTransliterationDictionary::getArticleCount() noexcept
{ {
return 0; return 0;
@ -41,24 +35,28 @@ unsigned long BaseTransliterationDictionary::getWordCount() noexcept
return 0; return 0;
} }
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( wstring const &, unsigned long ) sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( std::u32string const &,
unsigned long )
{ {
return std::make_shared< Dictionary::WordSearchRequestInstant >(); return std::make_shared< Dictionary::WordSearchRequestInstant >();
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > BaseTransliterationDictionary::getArticle( std::u32string const &,
BaseTransliterationDictionary::getArticle( wstring const &, vector< wstring > const &, wstring const &, bool ) vector< std::u32string > const &,
std::u32string const &,
bool )
{ {
return std::make_shared< Dictionary::DataRequestInstant >( false ); return std::make_shared< Dictionary::DataRequestInstant >( false );
} }
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwordsForSynonym( wstring const & str ) sptr< Dictionary::WordSearchRequest >
BaseTransliterationDictionary::findHeadwordsForSynonym( std::u32string const & str )
{ {
sptr< Dictionary::WordSearchRequestInstant > result = std::make_shared< Dictionary::WordSearchRequestInstant >(); sptr< Dictionary::WordSearchRequestInstant > result = std::make_shared< Dictionary::WordSearchRequestInstant >();
vector< wstring > alts = getAlternateWritings( str ); vector< std::u32string > alts = getAlternateWritings( str );
qDebug( "alts = %u", (unsigned)alts.size() ); qDebug( "alts = %u", (unsigned)alts.size() );
@ -72,13 +70,13 @@ sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwor
void Table::ins( char const * from, char const * to ) void Table::ins( char const * from, char const * to )
{ {
wstring fr = Utf8::decode( std::string( from ) ); std::u32string fr = Text::toUtf32( std::string( from ) );
if ( fr.size() > maxEntrySize ) { if ( fr.size() > maxEntrySize ) {
maxEntrySize = fr.size(); maxEntrySize = fr.size();
} }
insert( std::pair< wstring, wstring >( fr, Utf8::decode( std::string( to ) ) ) ); insert( std::pair< std::u32string, std::u32string >( fr, Text::toUtf32( std::string( to ) ) ) );
} }
@ -89,12 +87,12 @@ TransliterationDictionary::TransliterationDictionary(
{ {
} }
vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const & str ) noexcept vector< std::u32string > TransliterationDictionary::getAlternateWritings( std::u32string const & str ) noexcept
{ {
vector< wstring > results; vector< std::u32string > results;
wstring result, folded; std::u32string result, folded;
wstring const * target; std::u32string const * target;
if ( caseSensitive ) { if ( caseSensitive ) {
// Don't do any transform -- the transliteration is case-sensitive // Don't do any transform -- the transliteration is case-sensitive
@ -105,8 +103,8 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
target = &folded; target = &folded;
} }
wchar const * ptr = target->c_str(); char32_t const * ptr = target->c_str();
size_t left = target->size(); size_t left = target->size();
Table::const_iterator i; Table::const_iterator i;
@ -115,7 +113,7 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
for ( x = table.getMaxEntrySize(); x >= 1; --x ) { for ( x = table.getMaxEntrySize(); x >= 1; --x ) {
if ( left >= x ) { if ( left >= x ) {
i = table.find( wstring( ptr, x ) ); i = table.find( std::u32string( ptr, x ) );
if ( i != table.end() ) { if ( i != table.end() ) {
result.append( i->second ); result.append( i->second );

View file

@ -9,7 +9,6 @@
namespace Transliteration { namespace Transliteration {
using std::map; using std::map;
using gd::wstring;
using std::string; using std::string;
using std::vector; using std::vector;
@ -28,24 +27,22 @@ public:
virtual string getName() noexcept; virtual string getName() noexcept;
virtual map< Dictionary::Property, string > getProperties() noexcept;
virtual unsigned long getArticleCount() noexcept; virtual unsigned long getArticleCount() noexcept;
virtual unsigned long getWordCount() noexcept; virtual unsigned long getWordCount() noexcept;
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept = 0; virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept = 0;
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ); virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long ); virtual sptr< Dictionary::WordSearchRequest > prefixMatch( std::u32string const &, unsigned long );
virtual sptr< Dictionary::DataRequest > virtual sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const &, wstring const &, bool ); getArticle( std::u32string const &, vector< std::u32string > const &, std::u32string const &, bool );
}; };
class Table: public map< wstring, wstring > class Table: public map< std::u32string, std::u32string >
{ {
unsigned maxEntrySize; unsigned maxEntrySize;
@ -79,7 +76,7 @@ public:
TransliterationDictionary( TransliterationDictionary(
string const & id, string const & name, QIcon icon, Table const & table, bool caseSensitive = true ); string const & id, string const & name, QIcon icon, Table const & table, bool caseSensitive = true );
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept; virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept;
}; };
} // namespace Transliteration } // namespace Transliteration

View file

@ -4,9 +4,8 @@
#include "indexedzip.hh" #include "indexedzip.hh"
#include "zipfile.hh" #include "zipfile.hh"
#include <zlib.h> #include <zlib.h>
#include "utf8.hh" #include "text.hh"
#include "iconv.hh" #include "iconv.hh"
#include "wstring_qt.hh"
#include <QtCore5Compat/QTextCodec> #include <QtCore5Compat/QTextCodec>
#include <QMutexLocker> #include <QMutexLocker>
@ -23,7 +22,7 @@ bool IndexedZip::openZipFile( QString const & name )
return zipIsOpen; return zipIsOpen;
} }
bool IndexedZip::hasFile( gd::wstring const & name ) bool IndexedZip::hasFile( std::u32string const & name )
{ {
if ( !zipIsOpen ) { if ( !zipIsOpen ) {
return false; return false;
@ -34,7 +33,7 @@ bool IndexedZip::hasFile( gd::wstring const & name )
return !links.empty(); return !links.empty();
} }
bool IndexedZip::loadFile( gd::wstring const & name, vector< char > & data ) bool IndexedZip::loadFile( std::u32string const & name, vector< char > & data )
{ {
if ( !zipIsOpen ) { if ( !zipIsOpen ) {
return false; return false;
@ -180,7 +179,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
if ( !hasNonAscii ) { if ( !hasNonAscii ) {
// Add entry as is // Add entry as is
zipFileNames.addSingleWord( Utf8::decode( entry.fileName.data() ), entry.localHeaderOffset ); zipFileNames.addSingleWord( Text::toUtf32( entry.fileName.data() ), entry.localHeaderOffset );
if ( filesCount ) { if ( filesCount ) {
*filesCount += 1; *filesCount += 1;
} }
@ -192,7 +191,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// Utf8 // Utf8
try { try {
wstring decoded = Utf8::decode( entry.fileName.constData() ); std::u32string decoded = Text::toUtf32( entry.fileName.constData() );
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset ); zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
if ( filesCount != 0 && !alreadyCounted ) { if ( filesCount != 0 && !alreadyCounted ) {
@ -200,12 +199,12 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
alreadyCounted = true; alreadyCounted = true;
} }
} }
catch ( Utf8::exCantDecode & ) { catch ( Text::exCantDecode & ) {
// Failed to decode // Failed to decode
} }
if ( !entry.fileNameInUTF8 ) { if ( !entry.fileNameInUTF8 ) {
wstring nameInSystemLocale; std::u32string nameInSystemLocale;
// System locale // System locale
if ( localeCodec ) { if ( localeCodec ) {
@ -224,7 +223,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// CP866 // CP866
try { try {
wstring decoded = Iconv::toWstring( "CP866", entry.fileName.constData(), entry.fileName.size() ); std::u32string decoded = Iconv::toWstring( "CP866", entry.fileName.constData(), entry.fileName.size() );
if ( nameInSystemLocale != decoded ) { if ( nameInSystemLocale != decoded ) {
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset ); zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
@ -241,7 +240,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// CP1251 // CP1251
try { try {
wstring decoded = Iconv::toWstring( "CP1251", entry.fileName.constData(), entry.fileName.size() ); std::u32string decoded = Iconv::toWstring( "CP1251", entry.fileName.constData(), entry.fileName.size() );
if ( nameInSystemLocale != decoded ) { if ( nameInSystemLocale != decoded ) {
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset ); zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );

View file

@ -37,11 +37,11 @@ public:
/// Checks whether the given file exists in the zip file or not. /// Checks whether the given file exists in the zip file or not.
/// Note that this function is thread-safe, since it does not access zip file. /// Note that this function is thread-safe, since it does not access zip file.
bool hasFile( gd::wstring const & name ); bool hasFile( std::u32string const & name );
/// Attempts loading the given file into the given vector. Returns true on /// Attempts loading the given file into the given vector. Returns true on
/// success, false otherwise. /// success, false otherwise.
bool loadFile( gd::wstring const & name, std::vector< char > & ); bool loadFile( std::u32string const & name, std::vector< char > & );
bool loadFile( uint32_t offset, std::vector< char > & ); bool loadFile( uint32_t offset, std::vector< char > & );
/// Index compressed files in zip file /// Index compressed files in zip file

View file

@ -1,12 +1,11 @@
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com> /* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
#include "voiceengines.hh" #include "voiceengines.hh"
#include "audiolink.hh" #include "audiolink.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
#include "utf8.hh" #include "text.hh"
#include "wstring_qt.hh"
#include <string> #include <string>
#include <map> #include <map>
@ -21,6 +20,7 @@ namespace VoiceEngines {
using namespace Dictionary; using namespace Dictionary;
using std::string; using std::string;
using std::u32string;
using std::map; using std::map;
inline string toMd5( QByteArray const & b ) inline string toMd5( QByteArray const & b )
@ -47,10 +47,6 @@ public:
return voiceEngine.name.toUtf8().data(); return voiceEngine.name.toUtf8().data();
} }
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
@ -62,16 +58,18 @@ public:
return 0; return 0;
} }
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long maxResults ) override; sptr< WordSearchRequest > prefixMatch( u32string const & word, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override; sptr< DataRequest >
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ) override;
protected: protected:
void loadIcon() noexcept override; void loadIcon() noexcept override;
}; };
sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( u32string const & /*word*/,
unsigned long /*maxResults*/ )
{ {
WordSearchRequestInstant * sr = new WordSearchRequestInstant(); WordSearchRequestInstant * sr = new WordSearchRequestInstant();
@ -80,11 +78,11 @@ sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( wstring const & /
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest >
VoiceEnginesDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool ) VoiceEnginesDictionary::getArticle( u32string const & word, vector< u32string > const &, u32string const &, bool )
{ {
string result; string result;
string wordUtf8( Utf8::encode( word ) ); string wordUtf8( Text::toUtf8( word ) );
result += "<table class=\"voiceengines_play\"><tr>"; result += "<table class=\"voiceengines_play\"><tr>";
@ -139,4 +137,4 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const
} // namespace VoiceEngines } // namespace VoiceEngines
#endif #endif

View file

@ -1,20 +1,17 @@
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com> /* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once #pragma once
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
#include "dictionary.hh" #include "dictionary.hh"
#include "config.hh" #include "config.hh"
#include "wstring.hh" #include "text.hh"
#include <QCryptographicHash> #include <QCryptographicHash>
namespace VoiceEngines { namespace VoiceEngines {
using std::vector; using std::vector;
using std::string; using std::string;
using gd::wstring;
vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const & voiceEngines ); vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const & voiceEngines );

View file

@ -2,8 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "website.hh" #include "website.hh"
#include "wstring_qt.hh" #include "text.hh"
#include "utf8.hh"
#include <QUrl> #include <QUrl>
#include <QTextCodec> #include <QTextCodec>
#include <QDir> #include <QDir>
@ -52,12 +51,6 @@ public:
dictionaryDescription = urlTemplate_; dictionaryDescription = urlTemplate_;
} }
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return 0; return 0;
@ -68,10 +61,12 @@ public:
return 0; return 0;
} }
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long ) override; sptr< WordSearchRequest > prefixMatch( std::u32string const & word, unsigned long ) override;
sptr< DataRequest > sptr< DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const & context, bool ) override; vector< std::u32string > const & alts,
std::u32string const & context,
bool ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -96,7 +91,7 @@ protected slots:
virtual void requestFinished( QNetworkReply * ) {} virtual void requestFinished( QNetworkReply * ) {}
}; };
sptr< WordSearchRequest > WebSiteDictionary::prefixMatch( wstring const & /*word*/, unsigned long ) sptr< WordSearchRequest > WebSiteDictionary::prefixMatch( std::u32string const & /*word*/, unsigned long )
{ {
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >(); sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -314,9 +309,9 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
finish(); finish();
} }
sptr< DataRequest > WebSiteDictionary::getArticle( wstring const & str, sptr< DataRequest > WebSiteDictionary::getArticle( std::u32string const & str,
vector< wstring > const & /*alts*/, vector< std::u32string > const & /*alts*/,
wstring const & context, std::u32string const & context,
bool /*ignoreDiacritics*/ ) bool /*ignoreDiacritics*/ )
{ {
QString urlString = Utils::WebSite::urlReplaceWord( QString( urlTemplate ), QString::fromStdU32String( str ) ); QString urlString = Utils::WebSite::urlReplaceWord( QString( urlTemplate ), QString::fromStdU32String( str ) );

View file

@ -4,7 +4,7 @@
#include "xdxf.hh" #include "xdxf.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "chunkedstorage.hh" #include "chunkedstorage.hh"
#include "dictzip.hh" #include "dictzip.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
@ -39,7 +39,6 @@ using std::multimap;
using std::pair; using std::pair;
using std::set; using std::set;
using std::string; using std::string;
using gd::wstring;
using std::vector; using std::vector;
using std::list; using std::list;
@ -140,12 +139,6 @@ public:
~XdxfDictionary(); ~XdxfDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -166,8 +159,10 @@ public:
return idxHeader.langTo; return idxHeader.langTo;
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -417,8 +412,8 @@ XdxfDictionary::getSearchResults( QString const & searchString, int searchMode,
class XdxfArticleRequest: public Dictionary::DataRequest class XdxfArticleRequest: public Dictionary::DataRequest
{ {
wstring word; std::u32string word;
vector< wstring > alts; vector< std::u32string > alts;
XdxfDictionary & dict; XdxfDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -427,8 +422,8 @@ class XdxfArticleRequest: public Dictionary::DataRequest
public: public:
XdxfArticleRequest( wstring const & word_, XdxfArticleRequest( std::u32string const & word_,
vector< wstring > const & alts_, vector< std::u32string > const & alts_,
XdxfDictionary & dict_, XdxfDictionary & dict_,
bool ignoreDiacritics_ ): bool ignoreDiacritics_ ):
word( word_ ), word( word_ ),
@ -473,13 +468,13 @@ void XdxfArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -508,12 +503,12 @@ void XdxfArticleRequest::run()
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, pair< string, string > > & mapToUse = multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -533,7 +528,7 @@ void XdxfArticleRequest::run()
string result; string result;
multimap< wstring, pair< string, string > >::const_iterator i; multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner(); string cleaner = Utils::Html::getHtmlCleaner();
@ -560,9 +555,9 @@ void XdxfArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > XdxfDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > XdxfDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -979,7 +974,7 @@ void XdxfResourceRequest::run()
if ( dict.resourceZip.isOpen() ) { if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex ); QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) { if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive throw; // Make it fail since we couldn't read the archive
} }
} }
@ -1200,7 +1195,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
else if ( stream.name() == u"abbreviations" ) { else if ( stream.name() == u"abbreviations" ) {
QString s; QString s;
string value; string value;
list< wstring > keys; list< std::u32string > keys;
while ( !( stream.isEndElement() && stream.name() == u"abbreviations" ) && !stream.atEnd() ) { while ( !( stream.isEndElement() && stream.name() == u"abbreviations" ) && !stream.atEnd() ) {
if ( !stream.readNextStartElement() ) { if ( !stream.readNextStartElement() ) {
break; break;
@ -1216,7 +1211,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
s = readElementText( stream ); s = readElementText( stream );
value = Folding::trimWhitespace( s ).toStdString(); value = Folding::trimWhitespace( s ).toStdString();
for ( const auto & key : keys ) { for ( const auto & key : keys ) {
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value; abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
} }
keys.clear(); keys.clear();
} }
@ -1236,7 +1231,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
s = readElementText( stream ); s = readElementText( stream );
value = Folding::trimWhitespace( s ).toStdString(); value = Folding::trimWhitespace( s ).toStdString();
for ( const auto & key : keys ) { for ( const auto & key : keys ) {
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value; abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
} }
keys.clear(); keys.clear();
} }

View file

@ -3,8 +3,7 @@
#include "xdxf2html.hh" #include "xdxf2html.hh"
#include <QtXml> #include <QtXml>
#include "utf8.hh" #include "text.hh"
#include "wstring_qt.hh"
#include "folding.hh" #include "folding.hh"
#include "audiolink.hh" #include "audiolink.hh"
@ -442,7 +441,7 @@ string convert( string const & in,
if ( i != pAbrv->end() ) { if ( i != pAbrv->end() ) {
string title; string title;
if ( Utf8::decode( i->second ).size() < 70 ) { if ( Text::toUtf32( i->second ).size() < 70 ) {
// Replace all spaces with non-breakable ones, since that's how Lingvo shows tooltips // Replace all spaces with non-breakable ones, since that's how Lingvo shows tooltips
title.reserve( i->second.size() ); title.reserve( i->second.size() );
@ -466,7 +465,7 @@ string convert( string const & in,
else { else {
title = i->second; title = i->second;
} }
el.setAttribute( "title", QString::fromStdU32String( Utf8::decode( title ) ) ); el.setAttribute( "title", QString::fromStdU32String( Text::toUtf32( title ) ) );
} }
} }
} }
@ -628,7 +627,7 @@ string convert( string const & in,
// if( type == XDXF && dictPtr != NULL && !el.hasAttribute( "start" ) ) // if( type == XDXF && dictPtr != NULL && !el.hasAttribute( "start" ) )
if ( dictPtr != NULL && !el.hasAttribute( "start" ) ) { if ( dictPtr != NULL && !el.hasAttribute( "start" ) ) {
string filename = Utf8::encode( el.text().toStdU32String() ); string filename = Text::toUtf8( el.text().toStdU32String() );
if ( Filetype::isNameOfPicture( filename ) ) { if ( Filetype::isNameOfPicture( filename ) ) {
QUrl url; QUrl url;

View file

@ -6,7 +6,7 @@
#include "zim.hh" #include "zim.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "langcoder.hh" #include "langcoder.hh"
#include "filetype.hh" #include "filetype.hh"
#include "dictfile.hh" #include "dictfile.hh"
@ -38,12 +38,12 @@
namespace Zim { namespace Zim {
using std::string; using std::string;
using std::u32string;
using std::map; using std::map;
using std::vector; using std::vector;
using std::multimap; using std::multimap;
using std::pair; using std::pair;
using std::set; using std::set;
using gd::wstring;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
@ -161,11 +161,6 @@ public:
~ZimDictionary() = default; ~ZimDictionary() = default;
map< Dictionary::Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
return idxHeader.articleCount; return idxHeader.articleCount;
@ -187,7 +182,7 @@ public:
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -524,8 +519,8 @@ ZimDictionary::getSearchResults( QString const & searchString, int searchMode, b
class ZimArticleRequest: public Dictionary::DataRequest class ZimArticleRequest: public Dictionary::DataRequest
{ {
wstring word; u32string word;
vector< wstring > alts; vector< u32string > alts;
ZimDictionary & dict; ZimDictionary & dict;
bool ignoreDiacritics; bool ignoreDiacritics;
@ -534,7 +529,10 @@ class ZimArticleRequest: public Dictionary::DataRequest
public: public:
ZimArticleRequest( wstring word_, vector< wstring > const & alts_, ZimDictionary & dict_, bool ignoreDiacritics_ ): ZimArticleRequest( u32string word_,
vector< u32string > const & alts_,
ZimDictionary & dict_,
bool ignoreDiacritics_ ):
word( std::move( word_ ) ), word( std::move( word_ ) ),
alts( alts_ ), alts( alts_ ),
dict( dict_ ), dict( dict_ ),
@ -576,13 +574,13 @@ void ZimArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, pair< string, string > > mainArticles, alternateArticles; multimap< u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonyms make it that the articles set< quint32 > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -619,12 +617,12 @@ void ZimArticleRequest::run()
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword ); u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, pair< string, string > > & mapToUse = multimap< u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) ); mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -643,7 +641,7 @@ void ZimArticleRequest::run()
// See Issue #271: A mechanism to clean-up invalid HTML cards. // See Issue #271: A mechanism to clean-up invalid HTML cards.
string cleaner = Utils::Html::getHtmlCleaner(); string cleaner = Utils::Html::getHtmlCleaner();
multimap< wstring, pair< string, string > >::const_iterator i; multimap< u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -671,9 +669,9 @@ void ZimArticleRequest::run()
finish(); finish();
} }
sptr< Dictionary::DataRequest > ZimDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > ZimDictionary::getArticle( u32string const & word,
vector< wstring > const & alts, vector< u32string > const & alts,
wstring const &, u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -771,7 +769,7 @@ sptr< Dictionary::DataRequest > ZimDictionary::getResource( string const & name
return std::make_shared< ZimResourceRequest >( *this, noLeadingDot.toStdString() ); return std::make_shared< ZimResourceRequest >( *this, noLeadingDot.toStdString() );
} }
wstring normalizeWord( const std::string & url ); u32string normalizeWord( const std::string & url );
vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames, vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames,
string const & indicesDir, string const & indicesDir,
Dictionary::Initializing & initializing, Dictionary::Initializing & initializing,
@ -854,7 +852,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( maxHeadwordsToExpand > 0 && ( articleCount >= maxHeadwordsToExpand ) ) { if ( maxHeadwordsToExpand > 0 && ( articleCount >= maxHeadwordsToExpand ) ) {
if ( !title.empty() ) { if ( !title.empty() ) {
wstring word = Utf8::decode( title ); u32string word = Text::toUtf32( title );
indexedWords.addSingleWord( word, index ); indexedWords.addSingleWord( word, index );
} }
else if ( !url.empty() ) { else if ( !url.empty() ) {
@ -863,7 +861,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
} }
else { else {
if ( !title.empty() ) { if ( !title.empty() ) {
auto word = Utf8::decode( title ); auto word = Text::toUtf32( title );
indexedWords.addWord( word, index ); indexedWords.addWord( word, index );
wordCount++; wordCount++;
} }
@ -908,7 +906,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
} }
return dictionaries; return dictionaries;
} }
wstring normalizeWord( const std::string & url ) u32string normalizeWord( const std::string & url )
{ {
auto formattedUrl = QString::fromStdString( url ).remove( RX::Zim::leadingDotSlash ); auto formattedUrl = QString::fromStdString( url ).remove( RX::Zim::leadingDotSlash );
return formattedUrl.toStdU32String(); return formattedUrl.toStdU32String();

View file

@ -4,7 +4,7 @@
#include "zipsounds.hh" #include "zipsounds.hh"
#include "dictfile.hh" #include "dictfile.hh"
#include "folding.hh" #include "folding.hh"
#include "utf8.hh" #include "text.hh"
#include "btreeidx.hh" #include "btreeidx.hh"
#include "audiolink.hh" #include "audiolink.hh"
@ -24,7 +24,6 @@
namespace ZipSounds { namespace ZipSounds {
using std::string; using std::string;
using gd::wstring;
using std::map; using std::map;
using std::multimap; using std::multimap;
using std::set; using std::set;
@ -64,19 +63,19 @@ bool indexIsOldOrBad( string const & indexFile )
|| header.formatVersion != CurrentFormatVersion; || header.formatVersion != CurrentFormatVersion;
} }
wstring stripExtension( string const & str ) std::u32string stripExtension( string const & str )
{ {
wstring name; std::u32string name;
try { try {
name = Utf8::decode( str ); name = Text::toUtf32( str );
} }
catch ( Utf8::exCantDecode & ) { catch ( Text::exCantDecode & ) {
return name; return name;
} }
if ( Filetype::isNameOfSound( str ) ) { if ( Filetype::isNameOfSound( str ) ) {
wstring::size_type pos = name.rfind( L'.' ); std::u32string::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos ) { if ( pos != std::u32string::npos ) {
name.erase( pos ); name.erase( pos );
} }
@ -107,10 +106,6 @@ public:
string getName() noexcept override; string getName() noexcept override;
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override unsigned long getArticleCount() noexcept override
{ {
@ -122,8 +117,10 @@ public:
return getArticleCount(); return getArticleCount();
} }
sptr< Dictionary::DataRequest > sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override; vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override; sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -161,9 +158,9 @@ string ZipSoundsDictionary::getName() noexcept
return result; return result;
} }
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const & word, sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( std::u32string const & word,
vector< wstring > const & alts, vector< std::u32string > const & alts,
wstring const &, std::u32string const &,
bool ignoreDiacritics ) bool ignoreDiacritics )
{ {
@ -177,13 +174,13 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
chain.insert( chain.end(), altChain.begin(), altChain.end() ); chain.insert( chain.end(), altChain.begin(), altChain.end() );
} }
multimap< wstring, uint32_t > mainArticles, alternateArticles; multimap< std::u32string, uint32_t > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this // appear several times. We combat this
// by only allowing them to appear once. // by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded ); wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
} }
@ -198,12 +195,12 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
// We do the case-folded comparison here. // We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word ); std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) { if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped ); headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
} }
multimap< wstring, uint32_t > & mapToUse = multimap< std::u32string, uint32_t > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.articleOffset ) ); mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.articleOffset ) );
@ -217,7 +214,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
string result; string result;
multimap< wstring, uint32_t >::const_iterator i; multimap< std::u32string, uint32_t >::const_iterator i;
result += "<table class=\"lsa_play\">"; result += "<table class=\"lsa_play\">";
@ -248,7 +245,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
nameBlock += sz; nameBlock += sz;
string displayedName = string displayedName =
mainArticles.size() + alternateArticles.size() > 1 ? name : Utf8::encode( stripExtension( name ) ); mainArticles.size() + alternateArticles.size() > 1 ? name : Text::toUtf8( stripExtension( name ) );
result += "<tr>"; result += "<tr>";
@ -290,7 +287,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
nameBlock += sz; nameBlock += sz;
string displayedName = string displayedName =
mainArticles.size() + alternateArticles.size() > 1 ? name : Utf8::encode( stripExtension( name ) ); mainArticles.size() + alternateArticles.size() > 1 ? name : Text::toUtf8( stripExtension( name ) );
result += "<tr>"; result += "<tr>";
@ -320,7 +317,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getResource( string const &
{ {
// Remove extension for sound files (like in sound dirs) // Remove extension for sound files (like in sound dirs)
wstring strippedName = stripExtension( name ); std::u32string strippedName = stripExtension( name );
vector< WordArticleLink > chain = findArticles( strippedName ); vector< WordArticleLink > chain = findArticles( strippedName );
@ -434,7 +431,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Remove extension for sound files (like in sound dirs) // Remove extension for sound files (like in sound dirs)
wstring word = stripExtension( link.word ); std::u32string word = stripExtension( link.word );
if ( !word.empty() ) { if ( !word.empty() ) {
names.addWord( word, offset ); names.addWord( word, offset );
} }

View file

@ -5,7 +5,6 @@
#include <cstdlib> #include <cstdlib>
#include "fulltextsearch.hh" #include "fulltextsearch.hh"
#include "ftshelpers.hh" #include "ftshelpers.hh"
#include "wstring_qt.hh"
#include "dictfile.hh" #include "dictfile.hh"
#include "folding.hh" #include "folding.hh"
#include "utils.hh" #include "utils.hh"

View file

@ -7,7 +7,6 @@
#include "btreeidx.hh" #include "btreeidx.hh"
#include "fulltextsearch.hh" #include "fulltextsearch.hh"
#include "folding.hh" #include "folding.hh"
#include "wstring_qt.hh"
namespace FtsHelpers { namespace FtsHelpers {
@ -44,7 +43,7 @@ public:
{ {
if ( ignoreDiacritics_ ) if ( ignoreDiacritics_ )
searchString = searchString =
QString::fromStdU32String( Folding::applyDiacriticsOnly( gd::removeTrailingZero( searchString_ ) ) ); QString::fromStdU32String( Folding::applyDiacriticsOnly( Text::removeTrailingZero( searchString_ ) ) );
foundHeadwords = new QList< FTS::FtsHeadword >; foundHeadwords = new QList< FTS::FtsHeadword >;
results = 0; results = 0;

View file

@ -1,5 +1,4 @@
#include "headwordsmodel.hh" #include "headwordsmodel.hh"
#include "wstring_qt.hh"
HeadwordListModel::HeadwordListModel( QObject * parent ): HeadwordListModel::HeadwordListModel( QObject * parent ):
QAbstractListModel( parent ), QAbstractListModel( parent ),
@ -67,7 +66,7 @@ void HeadwordListModel::setFilter( const QRegularExpression & reg )
} }
} }
filterWords.clear(); filterWords.clear();
auto sr = _dict->prefixMatch( gd::removeTrailingZero( reg.pattern() ), maxFilterResults ); auto sr = _dict->prefixMatch( Text::removeTrailingZero( reg.pattern() ), maxFilterResults );
connect( sr.get(), &Dictionary::Request::finished, this, &HeadwordListModel::requestFinished, Qt::QueuedConnection ); connect( sr.get(), &Dictionary::Request::finished, this, &HeadwordListModel::requestFinished, Qt::QueuedConnection );
queuedRequests.push_back( sr ); queuedRequests.push_back( sr );
} }

View file

@ -3,7 +3,7 @@
#include "langcoder.hh" #include "langcoder.hh"
#include "language.hh" #include "language.hh"
#include "utf8.hh" #include "text.hh"
#include <QFileInfo> #include <QFileInfo>
#include <QLocale> #include <QLocale>
@ -226,9 +226,9 @@ QString LangCoder::intToCode2( quint32 val )
return QString::fromLatin1( ba ); return QString::fromLatin1( ba );
} }
quint32 LangCoder::findIdForLanguage( gd::wstring const & lang ) quint32 LangCoder::findIdForLanguage( std::u32string const & lang )
{ {
const auto langFolded = QByteArrayView( Utf8::encode( lang ) ); const auto langFolded = QByteArrayView( Text::toUtf8( lang ) );
for ( auto const & lc : LANG_CODE_MAP ) { for ( auto const & lc : LANG_CODE_MAP ) {
if ( langFolded.compare( lc.lang, Qt::CaseInsensitive ) == 0 ) { if ( langFolded.compare( lc.lang, Qt::CaseInsensitive ) == 0 ) {

View file

@ -2,7 +2,7 @@
#include <QString> #include <QString>
#include <QIcon> #include <QIcon>
#include "wstring.hh" #include "text.hh"
struct GDLangCode struct GDLangCode
{ {
@ -34,7 +34,7 @@ public:
/// Finds the id for the given language name, written in english. The search /// Finds the id for the given language name, written in english. The search
/// is case- and punctuation insensitive. /// is case- and punctuation insensitive.
static quint32 findIdForLanguage( gd::wstring const & ); static quint32 findIdForLanguage( std::u32string const & );
static quint32 findIdForLanguageCode3( std::string const & ); static quint32 findIdForLanguageCode3( std::string const & );

View file

@ -465,7 +465,7 @@ BabylonLang getBabylonLangByIndex( int index )
return BabylonDb[ index ]; return BabylonDb[ index ];
} }
quint32 findBlgLangIDByEnglishName( gd::wstring const & lang ) quint32 findBlgLangIDByEnglishName( std::u32string const & lang )
{ {
QString enName = QString::fromStdU32String( lang ); QString enName = QString::fromStdU32String( lang );
for ( const auto & idx : BabylonDb ) { for ( const auto & idx : BabylonDb ) {

View file

@ -4,7 +4,6 @@
#pragma once #pragma once
#include <QString> #include <QString>
#include "wstring_qt.hh"
/// Language-specific stuff - codes, names, ids etc. /// Language-specific stuff - codes, names, ids etc.
namespace Language { namespace Language {
@ -47,5 +46,5 @@ struct BabylonLang
const char * localizedName; const char * localizedName;
}; };
BabylonLang getBabylonLangByIndex( int index ); BabylonLang getBabylonLangByIndex( int index );
quint32 findBlgLangIDByEnglishName( gd::wstring const & lang ); quint32 findBlgLangIDByEnglishName( std::u32string const & lang );
} // namespace Language } // namespace Language

View file

@ -510,7 +510,7 @@ int main( int argc, char ** argv )
if ( gdcl.notts ) { if ( gdcl.notts ) {
cfg.notts = true; cfg.notts = true;
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
cfg.voiceEngines.clear(); cfg.voiceEngines.clear();
#endif #endif
} }

View file

@ -1,4 +1,4 @@
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
#include "speechclient.hh" #include "speechclient.hh"

View file

@ -1,5 +1,5 @@
#pragma once #pragma once
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
#include <QObject> #include <QObject>
#include "config.hh" #include "config.hh"

View file

@ -15,7 +15,6 @@
<file>qt-lingvo.css</file> <file>qt-lingvo.css</file>
<file>qt-modern.css</file> <file>qt-modern.css</file>
<file>qt-style-win.css</file> <file>qt-style-win.css</file>
<file>qt-style.css</file>
<file>article-style-darkmode.css</file> <file>article-style-darkmode.css</file>
</qresource> </qresource>
</RCC> </RCC>

View file

@ -1,6 +1,6 @@
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com> /* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
#include "texttospeechsource.hh" #include "texttospeechsource.hh"
#include <QVariant> #include <QVariant>

View file

@ -3,7 +3,7 @@
#pragma once #pragma once
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
#include "ui_texttospeechsource.h" #include "ui_texttospeechsource.h"
#include "config.hh" #include "config.hh"

View file

@ -10,7 +10,6 @@
#include "utils.hh" #include "utils.hh"
#include "webmultimediadownload.hh" #include "webmultimediadownload.hh"
#include "wildcard.hh" #include "wildcard.hh"
#include "wstring_qt.hh"
#include <QBuffer> #include <QBuffer>
#include <QClipboard> #include <QClipboard>
#include <QCryptographicHash> #include <QCryptographicHash>
@ -1048,7 +1047,7 @@ void ArticleView::openLink( QUrl const & url, QUrl const & ref, QString const &
QMessageBox::critical( this, "GoldenDict", tr( "The referenced audio program doesn't exist." ) ); QMessageBox::critical( this, "GoldenDict", tr( "The referenced audio program doesn't exist." ) );
} }
else if ( url.scheme() == "gdtts" ) { else if ( url.scheme() == "gdtts" ) {
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
// Text to speech // Text to speech
QString md5Id = Utils::Url::queryItemValue( url, "engine" ); QString md5Id = Utils::Url::queryItemValue( url, "engine" );
QString text( url.path().mid( 1 ) ); QString text( url.path().mid( 1 ) );

View file

@ -173,7 +173,7 @@ bool EditDictionaries::isSourcesChanged() const
|| sources.getLingua() != cfg.lingua || sources.getForvo() != cfg.forvo || sources.getMediaWikis() != cfg.mediawikis || sources.getLingua() != cfg.lingua || sources.getForvo() != cfg.forvo || sources.getMediaWikis() != cfg.mediawikis
|| sources.getWebSites() != cfg.webSites || sources.getDictServers() != cfg.dictServers || sources.getWebSites() != cfg.webSites || sources.getDictServers() != cfg.dictServers
|| sources.getPrograms() != cfg.programs || sources.getPrograms() != cfg.programs
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
|| sources.getVoiceEngines() != cfg.voiceEngines || sources.getVoiceEngines() != cfg.voiceEngines
#endif #endif
; ;
@ -197,7 +197,7 @@ void EditDictionaries::acceptChangedSources( bool rebuildGroups )
cfg.webSites = sources.getWebSites(); cfg.webSites = sources.getWebSites();
cfg.dictServers = sources.getDictServers(); cfg.dictServers = sources.getDictServers();
cfg.programs = sources.getPrograms(); cfg.programs = sources.getPrograms();
#ifndef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
cfg.voiceEngines = sources.getVoiceEngines(); cfg.voiceEngines = sources.getVoiceEngines();
#endif #endif
setUpdatesEnabled( false ); setUpdatesEnabled( false );

View file

@ -3,7 +3,7 @@
#include <Qt> #include <Qt>
#include <QScopeGuard> #include <QScopeGuard>
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
#include "dict/epwing_book.hh" #include "dict/epwing_book.hh"
#endif #endif
@ -209,7 +209,7 @@ MainWindow::MainWindow( Config::Class & cfg_ ):
+ " GoldenDict/WebEngine" ); + " GoldenDict/WebEngine" );
} }
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
Epwing::initialize(); Epwing::initialize();
#endif #endif
@ -1173,7 +1173,7 @@ MainWindow::~MainWindow()
scanPopup = nullptr; scanPopup = nullptr;
} }
#ifndef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
Epwing::finalize(); Epwing::finalize();
#endif #endif
} }
@ -1374,10 +1374,8 @@ void MainWindow::updateAppearances( QString const & addonStyle,
} }
#endif #endif
QFile builtInCssFile( ":qt-style.css" );
builtInCssFile.open( QFile::ReadOnly );
QByteArray css = builtInCssFile.readAll();
QByteArray css{};
#if defined( Q_OS_WIN ) #if defined( Q_OS_WIN )
QFile winCssFile( ":qt-style-win.css" ); QFile winCssFile( ":qt-style-win.css" );
winCssFile.open( QFile::ReadOnly ); winCssFile.open( QFile::ReadOnly );

View file

@ -185,6 +185,7 @@ Preferences::Preferences( QWidget * parent, Config::Class & cfg_ ):
ui.doubleClickTranslates->setChecked( p.doubleClickTranslates ); ui.doubleClickTranslates->setChecked( p.doubleClickTranslates );
ui.selectBySingleClick->setChecked( p.selectWordBySingleClick ); ui.selectBySingleClick->setChecked( p.selectWordBySingleClick );
ui.autoScrollToTargetArticle->setChecked( p.autoScrollToTargetArticle ); ui.autoScrollToTargetArticle->setChecked( p.autoScrollToTargetArticle );
ui.targetArticleAtFirst->setChecked( p.targetArticleAtFirst );
ui.escKeyHidesMainWindow->setChecked( p.escKeyHidesMainWindow ); ui.escKeyHidesMainWindow->setChecked( p.escKeyHidesMainWindow );
ui.darkMode->addItem( tr( "On" ), QVariant::fromValue( Config::Dark::On ) ); ui.darkMode->addItem( tr( "On" ), QVariant::fromValue( Config::Dark::On ) );
@ -390,7 +391,7 @@ Preferences::Preferences( QWidget * parent, Config::Class & cfg_ ):
#ifndef MAKE_ZIM_SUPPORT #ifndef MAKE_ZIM_SUPPORT
ui.allowZim->hide(); ui.allowZim->hide();
#endif #endif
#ifdef NO_EPWING_SUPPORT #ifndef EPWING_SUPPORT
ui.allowEpwing->hide(); ui.allowEpwing->hide();
#endif #endif
ui.maxDictionarySize->setValue( p.fts.maxDictionarySize ); ui.maxDictionarySize->setValue( p.fts.maxDictionarySize );
@ -441,6 +442,7 @@ Config::Preferences Preferences::getPreferences()
p.doubleClickTranslates = ui.doubleClickTranslates->isChecked(); p.doubleClickTranslates = ui.doubleClickTranslates->isChecked();
p.selectWordBySingleClick = ui.selectBySingleClick->isChecked(); p.selectWordBySingleClick = ui.selectBySingleClick->isChecked();
p.autoScrollToTargetArticle = ui.autoScrollToTargetArticle->isChecked(); p.autoScrollToTargetArticle = ui.autoScrollToTargetArticle->isChecked();
p.targetArticleAtFirst = ui.targetArticleAtFirst->isChecked();
p.escKeyHidesMainWindow = ui.escKeyHidesMainWindow->isChecked(); p.escKeyHidesMainWindow = ui.escKeyHidesMainWindow->isChecked();
p.darkMode = ui.darkMode->currentData().value< Config::Dark >(); p.darkMode = ui.darkMode->currentData().value< Config::Dark >();

View file

@ -169,6 +169,16 @@ however, the article from the topmost dictionary is shown.</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="1">
<widget class="QCheckBox" name="targetArticleAtFirst">
<property name="text">
<string>Place the target article at the first place.</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0"> <item row="1" column="0">
<widget class="QGroupBox" name="enableTrayIcon"> <widget class="QGroupBox" name="enableTrayIcon">
<property name="toolTip"> <property name="toolTip">

View file

@ -8,8 +8,8 @@ const QLatin1String flags = QLatin1String(
#ifdef MAKE_ZIM_SUPPORT #ifdef MAKE_ZIM_SUPPORT
" MAKE_ZIM_SUPPORT" " MAKE_ZIM_SUPPORT"
#endif #endif
#ifdef NO_EPWING_SUPPORT #ifdef EPWING_SUPPORT
" NO_EPWING_SUPPORT" " EPWING_SUPPORT"
#endif #endif
#ifdef USE_ICONV #ifdef USE_ICONV
" USE_ICONV" " USE_ICONV"
@ -17,8 +17,8 @@ const QLatin1String flags = QLatin1String(
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT #ifdef MAKE_CHINESE_CONVERSION_SUPPORT
" MAKE_CHINESE_CONVERSION_SUPPORT" " MAKE_CHINESE_CONVERSION_SUPPORT"
#endif #endif
#ifdef NO_TTS_SUPPORT #ifdef TTS_SUPPORT
" NO_TTS_SUPPORT" " TTS_SUPPORT"
#endif #endif
#ifndef MAKE_FFMPEG_PLAYER #ifndef MAKE_FFMPEG_PLAYER
" no_ffmpeg_player" " no_ffmpeg_player"

View file

@ -3,14 +3,11 @@
#include "wordfinder.hh" #include "wordfinder.hh"
#include "folding.hh" #include "folding.hh"
#include "wstring_qt.hh"
#include <map> #include <map>
using std::vector; using std::vector;
using std::list; using std::list;
using gd::wstring;
using gd::wchar;
using std::map; using std::map;
using std::pair; using std::pair;
@ -134,7 +131,7 @@ void WordFinder::startSearch()
allWordWritings[ 0 ] = inputWord.toStdU32String(); allWordWritings[ 0 ] = inputWord.toStdU32String();
for ( const auto & inputDict : *inputDicts ) { for ( const auto & inputDict : *inputDicts ) {
vector< wstring > writings = inputDict->getAlternateWritings( allWordWritings[ 0 ] ); vector< std::u32string > writings = inputDict->getAlternateWritings( allWordWritings[ 0 ] );
allWordWritings.insert( allWordWritings.end(), writings.begin(), writings.end() ); allWordWritings.insert( allWordWritings.end(), writings.begin(), writings.end() );
} }
@ -255,7 +252,9 @@ unsigned saturated( unsigned x )
/// both sides by either whitespace, punctuation or begin/end of string. /// both sides by either whitespace, punctuation or begin/end of string.
/// If true is returned, pos holds the offset in the haystack. If the offset /// If true is returned, pos holds the offset in the haystack. If the offset
/// is larger than 255, it is set to 255. /// is larger than 255, it is set to 255.
bool hasSurroundedWithWs( wstring const & haystack, wstring const & needle, wstring::size_type & pos ) bool hasSurroundedWithWs( std::u32string const & haystack,
std::u32string const & needle,
std::u32string::size_type & pos )
{ {
if ( haystack.size() < needle.size() ) { if ( haystack.size() < needle.size() ) {
return false; // Needle won't even fit into a haystack return false; // Needle won't even fit into a haystack
@ -264,7 +263,7 @@ bool hasSurroundedWithWs( wstring const & haystack, wstring const & needle, wstr
for ( pos = 0;; ++pos ) { for ( pos = 0;; ++pos ) {
pos = haystack.find( needle, pos ); pos = haystack.find( needle, pos );
if ( pos == wstring::npos ) { if ( pos == std::u32string::npos ) {
return false; // Not found return false; // Not found
} }
@ -290,13 +289,13 @@ void WordFinder::updateResults()
updateResultsTimer.stop(); // Can happen when we were done before it'd expire updateResultsTimer.stop(); // Can happen when we were done before it'd expire
} }
wstring original = Folding::applySimpleCaseOnly( allWordWritings[ 0 ] ); std::u32string original = Folding::applySimpleCaseOnly( allWordWritings[ 0 ] );
for ( auto i = finishedRequests.begin(); i != finishedRequests.end(); ) { for ( auto i = finishedRequests.begin(); i != finishedRequests.end(); ) {
for ( size_t count = ( *i )->matchesCount(), x = 0; x < count; ++x ) { for ( size_t count = ( *i )->matchesCount(), x = 0; x < count; ++x ) {
wstring match = ( **i )[ x ].word; std::u32string match = ( **i )[ x ].word;
int weight = ( **i )[ x ].weight; int weight = ( **i )[ x ].weight;
wstring lowerCased = Folding::applySimpleCaseOnly( match ); std::u32string lowerCased = Folding::applySimpleCaseOnly( match );
if ( searchType == ExpressionMatch ) { if ( searchType == ExpressionMatch ) {
unsigned ws; unsigned ws;
@ -320,7 +319,7 @@ void WordFinder::updateResults()
weight = ws; weight = ws;
} }
auto insertResult = auto insertResult =
resultsIndex.insert( pair< wstring, ResultsArray::iterator >( lowerCased, resultsArray.end() ) ); resultsIndex.insert( pair< std::u32string, ResultsArray::iterator >( lowerCased, resultsArray.end() ) );
if ( !insertResult.second ) { if ( !insertResult.second ) {
// Wasn't inserted since there was already an item -- check the case // Wasn't inserted since there was already an item -- check the case
@ -369,16 +368,16 @@ void WordFinder::updateResults()
}; };
for ( const auto & allWordWriting : allWordWritings ) { for ( const auto & allWordWriting : allWordWritings ) {
wstring target = Folding::applySimpleCaseOnly( allWordWriting ); std::u32string target = Folding::applySimpleCaseOnly( allWordWriting );
wstring targetNoFullCase = Folding::applyFullCaseOnly( target ); std::u32string targetNoFullCase = Folding::applyFullCaseOnly( target );
wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase ); std::u32string targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia ); std::u32string targetNoPunct = Folding::applyPunctOnly( targetNoDia );
wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct ); std::u32string targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
wstring::size_type matchPos = 0; std::u32string::size_type matchPos = 0;
for ( const auto & i : resultsIndex ) { for ( const auto & i : resultsIndex ) {
wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs; std::u32string resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
int rank; int rank;
@ -441,14 +440,14 @@ void WordFinder::updateResults()
// only the first one, storing it in rank. Then we sort the results using // only the first one, storing it in rank. Then we sort the results using
// SortByRankAndLength. // SortByRankAndLength.
for ( const auto & allWordWriting : allWordWritings ) { for ( const auto & allWordWriting : allWordWritings ) {
wstring target = Folding::apply( allWordWriting ); std::u32string target = Folding::apply( allWordWriting );
for ( const auto & i : resultsIndex ) { for ( const auto & i : resultsIndex ) {
wstring resultFolded = Folding::apply( i.first ); std::u32string resultFolded = Folding::apply( i.first );
int charsInCommon = 0; int charsInCommon = 0;
for ( wchar const *t = target.c_str(), *r = resultFolded.c_str(); *t && *t == *r; for ( char32_t const *t = target.c_str(), *r = resultFolded.c_str(); *t && *t == *r;
++t, ++r, ++charsInCommon ) { ++t, ++r, ++charsInCommon ) {
; ;
} }

View file

@ -48,11 +48,11 @@ private:
std::vector< sptr< Dictionary::Class > > const * inputDicts; std::vector< sptr< Dictionary::Class > > const * inputDicts;
std::vector< gd::wstring > allWordWritings; // All writings of the inputWord std::vector< std::u32string > allWordWritings; // All writings of the inputWord
struct OneResult struct OneResult
{ {
gd::wstring word; std::u32string word;
int rank; int rank;
bool wasSuggested; bool wasSuggested;
}; };
@ -60,7 +60,7 @@ private:
// Maps lowercased string to the original one. This catches all duplicates // Maps lowercased string to the original one. This catches all duplicates
// without case sensitivity. Made as an array and a map indexing that array. // without case sensitivity. Made as an array and a map indexing that array.
using ResultsArray = std::list< OneResult >; using ResultsArray = std::list< OneResult >;
using ResultsIndex = std::map< gd::wstring, ResultsArray::iterator >; using ResultsIndex = std::map< std::u32string, ResultsArray::iterator >;
ResultsArray resultsArray; ResultsArray resultsArray;
ResultsIndex resultsIndex; ResultsIndex resultsIndex;