Merge pull request #2007 from xiaoyifang/staged
Some checks failed
Release All / build_macOS (macos-13, 6.6.3) (push) Has been cancelled
Release All / build_macOS (macos-13, 6.7.2) (push) Has been cancelled
Release All / build_macOS (macos-14, 6.6.3) (push) Has been cancelled
Release All / build_macOS (macos-14, 6.7.2) (push) Has been cancelled
Release All / build_Windows (windows-2022, 6.6.3) (push) Has been cancelled
Release All / build_Windows (windows-2022, 6.7.2) (push) Has been cancelled
Release All / generate_other_staffs (push) Has been cancelled
Release All / publish (push) Has been cancelled

daily merge,staged->dev
This commit is contained in:
xiaoyifang 2024-12-03 13:34:28 +08:00 committed by GitHub
commit bed76f366b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
154 changed files with 2863 additions and 3653 deletions

View file

@ -26,6 +26,7 @@ Checks: >
-google-readability-casting,
-hicpp-deprecated-headers,
-hicpp-no-array-decay,
-misc-confusable-identifiers,
-misc-const-correctness,
-misc-include-cleaner,
-misc-non-private-member-variables-in-classes,
@ -33,6 +34,7 @@ Checks: >
-modernize-deprecated-headers,
-modernize-use-nodiscard,
-modernize-use-trailing-return-type,
-performance-enum-size,
-readability-function-cognitive-complexity,
-readability-identifier-length,
-readability-magic-numbers,

View file

@ -17,4 +17,10 @@ a11c9e3aeca4329e1982d8fe26bacbb21ab50ddf
d15081e723756eef053550dc9e06e31d7828dec3
# remove gd::toWString
c8af0450f1f7f8188004db96e3f53e7e33e2ccad
c8af0450f1f7f8188004db96e3f53e7e33e2ccad
# remove gddebug.hh and associated functions
76aaed116bdc3aeb53fd61553aedb877baf9b510
# wstring & wchar -> std::u32string & char32_t
f1e158578f62c96059bef1a616b75495adb6e2c6

View file

@ -51,6 +51,9 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: false
- name: Update brew
run: |
brew update
- name: Install dependencies
run: |
brew install \
@ -66,11 +69,11 @@ jobs:
xapian \
libzim \
qt
wget ftp://ftp.sra.co.jp/pub/misc/eb/eb-4.4.3.tar.bz2
- name: Install eb
run: |
wget https://github.com/mistydemeo/eb/releases/download/v4.4.3/eb-4.4.3.tar.bz2
tar xvjf eb-4.4.3.tar.bz2
cd eb-4.4.3 && ./configure && make -j 8 && sudo make install && cd ..
- name: Run build
run: |
mkdir build_dir

View file

@ -26,6 +26,9 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: true
- name: Update brew
run: |
brew update
- name: Install dependencies
run: |
brew install \
@ -40,7 +43,8 @@ jobs:
ninja \
opencc \
xapian
- name: Install eb
run: |
git clone https://github.com/xiaoyifang/eb.git
cd eb && ./configure && make -j 8 && sudo make install && cd ..
- uses: jurplel/install-qt-action@v4
@ -61,6 +65,9 @@ jobs:
- name: Package
run: |
cmake --install build_dir/
- name: Print package content
run: |
ls -Rl ./build_dir/redist
- uses: actions/upload-artifact@v4
with:
name: macOS-${{ matrix.os }}-Qt${{ matrix.qt_ver }}

View file

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.25) # ubuntu 23.04 Fedora 36
cmake_minimum_required(VERSION 3.25) # Debian 11 Ubuntu 24.04 Fedora 36
option(WITH_FFMPEG_PLAYER "Enable support for FFMPEG player" ON)
option(WITH_EPWING_SUPPORT "Enable epwing support" ON)
@ -9,20 +9,12 @@ option(WITH_TTS "enable QTexttoSpeech support" OFF)
option(USE_SYSTEM_FMT "use system fmt instead of bundled one" OFF)
option(USE_SYSTEM_TOML "use system toml++ instead of bundled one" OFF)
option(WITH_VCPKG_BREAKPAD "build with Breakpad support for VCPKG build only" OFF)
## Change binary & resources folder to parallel install with original GD.
## This flag should be avoided because it leads to small regressions:
## 1. There are personal scripts assuming the binary name to be "goldendict" -> require everyone to change the name in their script
## 2. There are icon themes that assuming the icon name to be "goldendict" -> invalidate the GD icon when using a icon theme
## 3. There are dictionary packages that install files to "/usr/share/goldendict/content" -> nullify the auto dict discovery
option(USE_ALTERNATIVE_NAME "Force the name goldendict-ng " OFF)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}") # to put staff in the ./cmake folder
## This should be avoided because of small regressions, as some scripts and icons themes assume the binary name and resources folder to be `goldendict`
option(USE_ALTERNATIVE_NAME "For Linux, change the binary name and resource folder to goldendict-ng to parallel install with the original GD" OFF)
# vcpkg handling code, must be placed before project()
if (WIN32)
option(WITH_VCPKG_BREAKPAD "build with Breakpad support for VCPKG build only" OFF)
if (DEFINED CMAKE_TOOLCHAIN_FILE)
message(STATUS "Using toolchain file: ${CMAKE_TOOLCHAIN_FILE}")
else ()
@ -37,11 +29,9 @@ if (WIN32)
set(VCPKG_MANIFEST_MODE OFF CACHE BOOL "disable existing manifest mode caused by the existrance of vcpkg.json" FORCE)
set(CMAKE_TOOLCHAIN_FILE "${CMAKE_BINARY_DIR}/_deps/vcpkg-export-src/scripts/buildsystems/vcpkg.cmake")
endif ()
endif ()
if (WITH_VCPKG_BREAKPAD)
list(APPEND VCPKG_MANIFEST_FEATURES "breakpad")
if (WITH_VCPKG_BREAKPAD)
list(APPEND VCPKG_MANIFEST_FEATURES "breakpad")
endif ()
endif ()
include(FeatureSummary)
@ -49,7 +39,7 @@ include(FeatureSummary)
project(goldendict-ng
VERSION 24.11.0
LANGUAGES CXX C)
if (APPLE)
enable_language(OBJCXX)
set(CMAKE_OBJCXX_STANDARD 17)
@ -60,13 +50,12 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(GOLDENDICT "goldendict") # binary/executable name
if (USE_ALTERNATIVE_NAME )
if (USE_ALTERNATIVE_NAME)
set(GOLDENDICT "goldendict-ng")
endif ()
if (APPLE)
set(GOLDENDICT "GoldenDict-ng")
endif()
endif ()
#### Qt
@ -78,11 +67,10 @@ endif ()
find_package(Qt6 REQUIRED COMPONENTS ${GD_QT_COMPONENTS})
qt_standard_project_setup() # availiable after find_package(Qt6 .... Core
qt_standard_project_setup()
set(CMAKE_AUTORCC ON) # not included in the qt_standard_project_setup
#### Things required during configuration
block() # generate version.txt
string(TIMESTAMP build_time UTC)
find_package(Git)
@ -163,11 +151,8 @@ target_link_libraries(${GOLDENDICT} PRIVATE
Qt6::WebEngineWidgets
Qt6::Widgets
Qt6::Svg
)
if (WITH_TTS)
target_link_libraries(${GOLDENDICT} PRIVATE Qt6::TextToSpeech)
endif ()
$<$<BOOL:${WITH_TTS}>:Qt6::TextToSpeech>
)
target_include_directories(${GOLDENDICT} PRIVATE
${PROJECT_SOURCE_DIR}/thirdparty/qtsingleapplication/src
@ -176,11 +161,7 @@ target_include_directories(${GOLDENDICT} PRIVATE
${PROJECT_SOURCE_DIR}/src/dict
${PROJECT_SOURCE_DIR}/src/dict/utils
${PROJECT_SOURCE_DIR}/src/ui
)
if (WIN32)
target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/src/windows)
endif ()
)
if (NOT USE_SYSTEM_TOML)
target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/thirdparty/tomlplusplus)
@ -199,45 +180,22 @@ target_compile_definitions(${GOLDENDICT} PRIVATE
)
target_compile_definitions(${GOLDENDICT} PUBLIC
CMAKE_USED_HACK # temporal hack to avoid breaking qmake build
MAKE_QTMULTIMEDIA_PLAYER
MAKE_CHINESE_CONVERSION_SUPPORT
)
if (WIN32)
target_compile_definitions(${GOLDENDICT} PUBLIC
__WIN32
INCLUDE_LIBRARY_PATH
)
endif ()
if (WITH_FFMPEG_PLAYER)
target_compile_definitions(${GOLDENDICT} PUBLIC MAKE_FFMPEG_PLAYER)
endif ()
if(NOT WITH_TTS)
target_compile_definitions(${GOLDENDICT} PUBLIC NO_TTS_SUPPORT)
endif()
if (NOT WITH_EPWING_SUPPORT)
target_compile_definitions(${GOLDENDICT} PUBLIC NO_EPWING_SUPPORT)
endif ()
if (WITH_ZIM)
target_compile_definitions(${GOLDENDICT} PUBLIC MAKE_ZIM_SUPPORT)
endif ()
if (WITH_VCPKG_BREAKPAD)
target_compile_definitions(${GOLDENDICT} PUBLIC USE_BREAKPAD)
endif ()
$<$<BOOL:${WIN32}>:__WIN32>
$<$<BOOL:${WITH_FFMPEG_PLAYER}>:MAKE_FFMPEG_PLAYER>
$<$<BOOL:${WITH_TTS}>:TTS_SUPPORT>
$<$<BOOL:${WITH_EPWING_SUPPORT}>:EPWING_SUPPORT>
$<$<BOOL:${WITH_ZIM}>:MAKE_ZIM_SUPPORT>
$<$<BOOL:${WITH_VCPKG_BREAKPAD}>:USE_BREAKPAD>
)
#### libraries linking && includes for Win or Unix
if (WIN32)
include(Deps_Vcpkg)
include(cmake/Deps_Vcpkg.cmake)
else ()
include(Deps_Unix)
include(cmake/Deps_Unix.cmake)
endif ()
#### add translations
@ -261,155 +219,11 @@ add_dependencies(${GOLDENDICT} "release_translations")
#### installation or assemble redistribution
if (APPLE)
set(PLIST_FILE "${CMAKE_BINARY_DIR}/info_generated.plist")
configure_file("${CMAKE_SOURCE_DIR}/redist/mac_info_plist_template_cmake.plist" "${PLIST_FILE}" @ONLY)
set_target_properties(${GOLDENDICT} PROPERTIES
MACOSX_BUNDLE TRUE
MACOSX_BUNDLE_INFO_PLIST "${PLIST_FILE}"
)
set(Assembling_Dir "${CMAKE_BINARY_DIR}/redist")
set(App_Name "${GOLDENDICT}.app")
set(Redistributable_APP "${Assembling_Dir}/${App_Name}")
# if anything wrong, delete this and affect lines, and see what's Qt will generate by default.
set(QtConfPath "${Redistributable_APP}/Contents/Resources/qt.conf")
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "
set(QT_DEPLOY_PREFIX \"${Redistributable_APP}\")
set(QT_DEPLOY_TRANSLATIONS_DIR \"Contents/Resources/translations\")
qt_deploy_runtime_dependencies(
EXECUTABLE \"${Redistributable_APP}\"
GENERATE_QT_CONF
NO_APP_STORE_COMPLIANCE)
qt_deploy_translations()
qt_deploy_qt_conf(\"${QtConfPath}\"
PLUGINS_DIR PlugIns
TRANSLATIONS_DIR Resources/translations)
"
)
install(TARGETS ${GOLDENDICT} BUNDLE DESTINATION "${Assembling_Dir}")
install(FILES ${qm_files} DESTINATION "${Redistributable_APP}/Contents/MacOS/locale")
if (IS_READABLE "/opt/homebrew/share/opencc/")
set(OPENCC_DATA_PATH "/opt/homebrew/share/opencc/" CACHE PATH "opencc's data path")
elseif (IS_READABLE "/usr/local/share/opencc/")
set(OPENCC_DATA_PATH "/usr/local/share/opencc/" CACHE PATH "opencc's data path")
else ()
message(FATAL_ERROR "Cannot find opencc's data folder!")
endif ()
file(REAL_PATH "${OPENCC_DATA_PATH}" OPENCC_DATA_PATH_FOR_REAL)
message(STATUS "OPENCC data is found -> ${OPENCC_DATA_PATH_FOR_REAL}")
install(DIRECTORY "${OPENCC_DATA_PATH_FOR_REAL}" DESTINATION "${Redistributable_APP}/Contents/MacOS")
install(SCRIPT ${deploy_script})
install(CODE "execute_process(COMMAND codesign --force --deep -s - ${Redistributable_APP})")
find_program(CREATE-DMG "create-dmg")
if (CREATE-DMG)
install(CODE "
execute_process(COMMAND ${CREATE-DMG} \
--skip-jenkins \
--format \"ULMO\"
--volname ${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${CMAKE_SYSTEM_PROCESSOR} \
--volicon ${CMAKE_SOURCE_DIR}/icons/macicon.icns \
--icon \"${App_Name}\" 100 100
--app-drop-link 300 100 \
\"GoldenDict-ng-${CMAKE_PROJECT_VERSION}-Qt${Qt6_VERSION}-macOS-${CMAKE_SYSTEM_PROCESSOR}.dmg\" \
\"${Assembling_Dir}\")"
)
else ()
message(WARNING "create-dmg not found. No .dmg will be created")
endif ()
endif ()
if (LINUX OR BSD)
install(TARGETS ${GOLDENDICT})
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop DESTINATION share/applications)
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.metainfo.xml DESTINATION share/metainfo)
if (NOT USE_ALTERNATIVE_NAME)
# see: config.cc -> getProgramDataDir
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps)
install(FILES ${qm_files} DESTINATION share/goldendict/locale)
else ()
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict-ng")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps
RENAME goldendict-ng.png)
install(FILES ${qm_files} DESTINATION share/goldendict-ng/locale)
block() # patch the desktop file to adapt the binary & icon file's name change
file(READ "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" DESKTOP_FILE_CONTENT)
string(REGEX REPLACE "\nIcon=goldendict\n" "\nIcon=goldendict-ng\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
string(REGEX REPLACE "\nExec=goldendict %u\n" "\nExec=goldendict-ng %u\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
file(WRITE "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" "${DESKTOP_FILE_CONTENT}")
endblock()
endif ()
endif ()
if (WIN32)
set_target_properties(${GOLDENDICT}
PROPERTIES
WIN32_EXECUTABLE TRUE
RUNTIME_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
LIBRARY_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
)
set(CMAKE_INSTALL_PREFIX "${GD_WIN_OUTPUT_DIR}" CACHE PATH "If you see this message, don't change this unless you want look into CMake build script. If you are an expert, yes, this is wrong. Help welcomed." FORCE)
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "qt_deploy_runtime_dependencies(
EXECUTABLE \"${CMAKE_INSTALL_PREFIX}/goldendict.exe\"
BIN_DIR .
LIB_DIR .
)"
)
install(SCRIPT ${deploy_script})
install(DIRECTORY "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/share/opencc" DESTINATION .)
# TODO: do we really need to carry a copy of openSSL?
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libssl-3-x64.dll" DESTINATION .)
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libcrypto-3-x64.dll" DESTINATION .)
# trick CPack to make the output folder as NSIS installer
install(DIRECTORY "${GD_WIN_OUTPUT_DIR}/"
DESTINATION .
FILES_MATCHING
PATTERN "*"
PATTERN "*.pdb" EXCLUDE
PATTERN "*.ilk" EXCLUDE)
set(CPACK_PACKAGE_FILE_NAME "GoldenDict-ng-${PROJECT_VERSION}-Qt${Qt6Widgets_VERSION}")
set(CPACK_GENERATOR "7Z;NSIS64")
# override the default install path, which is $PROGRAMFILES64\${project-name} ${project-version} in NSIS
set(CPACK_PACKAGE_INSTALL_DIRECTORY "GoldenDict-ng")
# NSIS specificS
set(CPACK_NSIS_MANIFEST_DPI_AWARE ON)
set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/icons/programicon.ico")
set(CPACK_NSIS_PACKAGE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_DISPLAY_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_URL_INFO_ABOUT [=[https://xiaoyifang.github.io/goldendict-ng/]=])
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
set(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\GoldenDict-ng.lnk' '$INSTDIR\\\\${GOLDENDICT}.exe'")
set(CPACK_NSIS_DELETE_ICONS_EXTRA "Delete '$SMPROGRAMS\\\\$START_MENU\\\\GoldenDict-ng.lnk'")
include(CPack)
include(cmake/Package_macOS.cmake)
elseif (LINUX OR BSD)
include(cmake/Package_Linux.cmake)
elseif (WIN32)
include(cmake/Package_Windows.cmake)
endif ()
feature_summary(WHAT ALL DESCRIPTION "Build configuration:")

View file

@ -1,13 +1,7 @@
#### Various workarounds
if (APPLE)
# old & new homebrew's include paths
target_include_directories(${GOLDENDICT} PRIVATE /usr/local/include /opt/homebrew/include)
# libzim depends on ICU, but the ICU from homebrew is "key-only", we need to manually prioritize it
# See `brew info icu4c` if this no longer works
# Note: Remove icu4c@75 if it fails again
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/opt/icu4c@75/lib/pkgconfig:/opt/homebrew/opt/icu4c@75/lib/pkgconfig:/usr/local/opt/icu4c/lib/pkgconfig:/opt/homebrew/opt/icu4c/lib/pkgconfig")
endif ()
target_include_directories(${GOLDENDICT} PRIVATE
@ -34,25 +28,20 @@ endif ()
##### Finding packages from package manager
find_package(PkgConfig REQUIRED)
find_package(ZLIB REQUIRED)
find_package(BZip2 REQUIRED)
# Consider all PkgConfig dependencies as one
pkg_check_modules(PKGCONFIG_DEPS IMPORTED_TARGET
# Import all PkgConfig dependencies as one
pkg_check_modules(DEPS REQUIRED IMPORTED_TARGET
hunspell
liblzma
lzo2
opencc
vorbis # .ogg
vorbisfile
liblzma
xapian-core
zlib
)
target_link_libraries(${GOLDENDICT} PRIVATE
PkgConfig::PKGCONFIG_DEPS
BZip2::BZip2
ZLIB::ZLIB
)
target_link_libraries(${GOLDENDICT} PRIVATE PkgConfig::DEPS BZip2::BZip2)
# On FreeBSD, there are two iconv, libc iconv & GNU libiconv.
# The system one is good enough, the following is a workaround to use libc iconv on freeBSD.
@ -82,14 +71,27 @@ if (WITH_EPWING_SUPPORT)
endif ()
if (WITH_ZIM)
if (APPLE)
# ICU from homebrew is "key-only", we need to manually prioritize it -> see `brew info icu4c`
# And we needs to find the correct one if multiple versions co exists.
set(ENV{PATH} "$ENV{PATH}:/usr/local/bin/:/opt/homebrew/bin") # add brew command into PATH
execute_process(
COMMAND sh -c [=[brew --prefix $(brew deps libzim | grep icu4c)]=]
OUTPUT_VARIABLE ICU_REQUIRED_BY_ZIM_PREFIX
OUTPUT_STRIP_TRAILING_WHITESPACE
COMMAND_ERROR_IS_FATAL ANY)
message(STATUS "Found correct homebrew icu path -> ${ICU_REQUIRED_BY_ZIM_PREFIX}")
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/pkgconfig")
message(STATUS "Updated pkg_config_path -> $ENV{PKG_CONFIG_PATH}")
# icu4c as transitive dependency of libzim may not be automatically copied into app bundle
# so we manually discover the icu4c from homebrew, then find the relevent dylibs
set(BREW_ICU_ADDITIONAL_DYLIBS "${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/libicudata.dylib ${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/libicui18n.dylib ${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/libicuuc.dylib")
message(STATUS "Additional ICU `.dylib`s -> ${BREW_ICU_ADDITIONAL_DYLIBS}")
endif ()
pkg_check_modules(ZIM REQUIRED IMPORTED_TARGET libzim)
target_link_libraries(${GOLDENDICT} PRIVATE PkgConfig::ZIM)
if (APPLE)
# For some reason, icu4c as transitive dependency of libzim may not be copied into app bundle,
# so we directly depends on it to help macdeployqt or whatever
pkg_check_modules(BREW_ICU_FOR_LIBZIM_FORCE_LINK REQUIRED IMPORTED_TARGET icu-i18n icu-uc)
target_link_libraries(${GOLDENDICT} PUBLIC PkgConfig::BREW_ICU_FOR_LIBZIM_FORCE_LINK)
endif ()
endif ()
if (USE_SYSTEM_FMT)

22
cmake/Package_Linux.cmake Normal file
View file

@ -0,0 +1,22 @@
install(TARGETS ${GOLDENDICT})
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop DESTINATION share/applications)
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.metainfo.xml DESTINATION share/metainfo)
if (NOT USE_ALTERNATIVE_NAME)
# see: config.cc -> getProgramDataDir
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps)
install(FILES ${qm_files} DESTINATION share/goldendict/locale)
else ()
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict-ng")
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps
RENAME goldendict-ng.png)
install(FILES ${qm_files} DESTINATION share/goldendict-ng/locale)
block() # patch the desktop file to adapt the binary & icon file's name change
file(READ "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" DESKTOP_FILE_CONTENT)
string(REGEX REPLACE "\nIcon=goldendict\n" "\nIcon=goldendict-ng\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
string(REGEX REPLACE "\nExec=goldendict %u\n" "\nExec=goldendict-ng %u\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
file(WRITE "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" "${DESKTOP_FILE_CONTENT}")
endblock()
endif ()

View file

@ -0,0 +1,55 @@
set_target_properties(${GOLDENDICT}
PROPERTIES
WIN32_EXECUTABLE TRUE
RUNTIME_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
LIBRARY_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
)
# TODO: this breaks "Multi-Config" build systems like VisualStudio.
set(CMAKE_INSTALL_PREFIX "${GD_WIN_OUTPUT_DIR}" CACHE PATH "If you see this message, don't change this unless you want look into CMake build script. If you are an expert, yes, this is wrong. Help welcomed." FORCE)
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "qt_deploy_runtime_dependencies(
EXECUTABLE \"${CMAKE_INSTALL_PREFIX}/goldendict.exe\"
BIN_DIR .
LIB_DIR .
)"
)
install(SCRIPT ${deploy_script})
install(DIRECTORY "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/share/opencc" DESTINATION .)
# Note: This is runtime dependency that aren't copied automatically
# See Qt's network -> SSDL documentation https://doc.qt.io/qt-6/ssl.html#considerations-while-packaging-your-application
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libssl-3-x64.dll" DESTINATION .)
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libcrypto-3-x64.dll" DESTINATION .)
# trick CPack to make the output folder as NSIS installer
install(DIRECTORY "${GD_WIN_OUTPUT_DIR}/"
DESTINATION .
FILES_MATCHING
PATTERN "*"
PATTERN "*.pdb" EXCLUDE
PATTERN "*.ilk" EXCLUDE)
set(CPACK_PACKAGE_FILE_NAME "GoldenDict-ng-${PROJECT_VERSION}-Qt${Qt6Widgets_VERSION}")
set(CPACK_GENERATOR "7Z;NSIS64")
# override the default install path, which is $PROGRAMFILES64\${project-name} ${project-version} in NSIS
set(CPACK_PACKAGE_INSTALL_DIRECTORY "GoldenDict-ng")
# NSIS specificS
set(CPACK_NSIS_MANIFEST_DPI_AWARE ON)
set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/icons/programicon.ico")
set(CPACK_NSIS_PACKAGE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_NSIS_DISPLAY_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
# Copied from https://crascit.com/2015/08/07/cmake_cpack_nsis_shortcuts_with_parameters/
set(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\GoldenDict-ng.lnk' '$INSTDIR\\\\${GOLDENDICT}.exe'")
set(CPACK_NSIS_DELETE_ICONS_EXTRA "Delete '$SMPROGRAMS\\\\$START_MENU\\\\GoldenDict-ng.lnk'")
include(CPack)

69
cmake/Package_macOS.cmake Normal file
View file

@ -0,0 +1,69 @@
set(PLIST_FILE "${CMAKE_BINARY_DIR}/info_generated.plist")
configure_file("${CMAKE_SOURCE_DIR}/redist/mac_info_plist_template_cmake.plist" "${PLIST_FILE}" @ONLY)
set_target_properties(${GOLDENDICT} PROPERTIES
MACOSX_BUNDLE TRUE
MACOSX_BUNDLE_INFO_PLIST "${PLIST_FILE}"
)
set(Assembling_Dir "${CMAKE_BINARY_DIR}/redist")
set(App_Name "${GOLDENDICT}.app")
set(Redistributable_APP "${Assembling_Dir}/${App_Name}")
# if anything wrong, delete this and affect lines, and see what's Qt will generate by default.
set(QtConfPath "${Redistributable_APP}/Contents/Resources/qt.conf")
qt_generate_deploy_script(
TARGET ${GOLDENDICT}
OUTPUT_SCRIPT deploy_script
CONTENT "
set(QT_DEPLOY_PREFIX \"${Redistributable_APP}\")
set(QT_DEPLOY_TRANSLATIONS_DIR \"Contents/Resources/translations\")
qt_deploy_runtime_dependencies(
EXECUTABLE \"${Redistributable_APP}\"
ADDITIONAL_LIBRARIES ${BREW_ICU_ADDITIONAL_DYLIBS}
GENERATE_QT_CONF
NO_APP_STORE_COMPLIANCE)
qt_deploy_translations()
qt_deploy_qt_conf(\"${QtConfPath}\"
PLUGINS_DIR PlugIns
TRANSLATIONS_DIR Resources/translations)
"
)
install(TARGETS ${GOLDENDICT} BUNDLE DESTINATION "${Assembling_Dir}")
install(FILES ${qm_files} DESTINATION "${Redistributable_APP}/Contents/MacOS/locale")
if (IS_READABLE "/opt/homebrew/share/opencc/")
set(OPENCC_DATA_PATH "/opt/homebrew/share/opencc/" CACHE PATH "opencc's data path")
elseif (IS_READABLE "/usr/local/share/opencc/")
set(OPENCC_DATA_PATH "/usr/local/share/opencc/" CACHE PATH "opencc's data path")
else ()
message(FATAL_ERROR "Cannot find opencc's data folder!")
endif ()
file(REAL_PATH "${OPENCC_DATA_PATH}" OPENCC_DATA_PATH_FOR_REAL)
message(STATUS "OPENCC data is found -> ${OPENCC_DATA_PATH_FOR_REAL}")
install(DIRECTORY "${OPENCC_DATA_PATH_FOR_REAL}" DESTINATION "${Redistributable_APP}/Contents/MacOS")
install(SCRIPT ${deploy_script})
install(CODE "execute_process(COMMAND codesign --force --deep -s - ${Redistributable_APP})")
find_program(CREATE-DMG "create-dmg")
if (CREATE-DMG)
install(CODE "
execute_process(COMMAND ${CREATE-DMG} \
--skip-jenkins \
--format \"ULMO\"
--volname ${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${CMAKE_SYSTEM_PROCESSOR} \
--volicon ${CMAKE_SOURCE_DIR}/icons/macicon.icns \
--icon \"${App_Name}\" 100 100
--app-drop-link 300 100 \
\"GoldenDict-ng-${CMAKE_PROJECT_VERSION}-Qt${Qt6_VERSION}-macOS-${CMAKE_SYSTEM_PROCESSOR}.dmg\" \
\"${Assembling_Dir}\")"
)
else ()
message(WARNING "create-dmg not found. No .dmg will be created")
endif ()

View file

@ -4,13 +4,11 @@
#include "article_maker.hh"
#include "config.hh"
#include "folding.hh"
#include "gddebug.hh"
#include "globalbroadcaster.hh"
#include "globalregex.hh"
#include "htmlescape.hh"
#include "langcoder.hh"
#include "utils.hh"
#include "wstring_qt.hh"
#include <QDir>
#include <QFile>
#include <QTextDocumentFragment>
@ -22,7 +20,6 @@
using std::vector;
using std::string;
using gd::wstring;
using std::set;
using std::list;
@ -136,19 +133,6 @@ std::string ArticleMaker::makeHtmlHeader( QString const & word, QString const &
R"(<link rel="icon" type="image/png" href="qrc:///flags/)" + Html::escape( icon.toUtf8().data() ) + "\" >\n";
}
result += QString::fromUtf8( R"(
<script>
function tr(key) {
var tr_map = {
"Expand article": "%1", "Collapse article": "%2"
};
return tr_map[key] || '';
}
</script>
)" )
.arg( tr( "Expand article" ), tr( "Collapse article" ) )
.toStdString();
result += R"(<script src="qrc:///scripts/gd-builtin.js"></script>)";
result += R"(<script src="qrc:///scripts/mark.min.js"></script>)";
@ -162,7 +146,10 @@ std::string ArticleMaker::makeHtmlHeader( QString const & word, QString const &
#if QT_VERSION >= QT_VERSION_CHECK( 6, 5, 0 )
if ( GlobalBroadcaster::instance()->getPreference()->darkReaderMode == Config::Dark::Auto
&& QGuiApplication::styleHints()->colorScheme() == Qt::ColorScheme::Dark ) {
#if !defined( Q_OS_WINDOWS ) // not properly works on Windows.
&& QGuiApplication::styleHints()->colorScheme() == Qt::ColorScheme::Dark
#endif
&& GlobalBroadcaster::instance()->getPreference()->darkMode == Config::Dark::On ) {
darkReaderModeEnabled = true;
}
#endif
@ -482,7 +469,7 @@ ArticleRequest::ArticleRequest( QString const & word,
// Accumulate main forms
for ( const auto & activeDict : activeDicts ) {
auto const s = activeDict->findHeadwordsForSynonym( gd::removeTrailingZero( word ) );
auto const s = activeDict->findHeadwordsForSynonym( Text::removeTrailingZero( word ) );
connect( s.get(), &Dictionary::Request::finished, this, &ArticleRequest::altSearchFinished, Qt::QueuedConnection );
@ -519,9 +506,9 @@ void ArticleRequest::altSearchFinished()
altsDone = true; // So any pending signals in queued mode won't mess us up
vector< wstring > altsVector( alts.begin(), alts.end() );
vector< std::u32string > altsVector( alts.begin(), alts.end() );
wstring wordStd = word.toStdU32String();
std::u32string wordStd = word.toStdU32String();
if ( activeDicts.size() <= 1 ) {
articleSizeLimit = -1; // Don't collapse article if only one dictionary presented
@ -532,7 +519,7 @@ void ArticleRequest::altSearchFinished()
sptr< Dictionary::DataRequest > r = activeDict->getArticle(
wordStd,
altsVector,
gd::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
Text::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
ignoreDiacritics );
connect( r.get(), &Dictionary::Request::finished, this, &ArticleRequest::bodyFinished, Qt::QueuedConnection );
@ -540,7 +527,7 @@ void ArticleRequest::altSearchFinished()
bodyRequests.push_back( r );
}
catch ( std::exception & e ) {
gdWarning( "getArticle request error (%s) in \"%s\"\n", e.what(), activeDict->getName().c_str() );
qWarning( "getArticle request error (%s) in \"%s\"", e.what(), activeDict->getName().c_str() );
}
}
@ -618,7 +605,7 @@ void ArticleRequest::bodyFinished()
return;
}
GD_DPRINTF( "some body finished" );
qDebug( "some body finished" );
bool wasUpdated = false;
@ -628,7 +615,7 @@ void ArticleRequest::bodyFinished()
if ( bodyRequests.front()->isFinished() ) {
// Good
GD_DPRINTF( "one finished." );
qDebug( "one finished." );
Dictionary::DataRequest & req = *bodyRequests.front();
@ -676,11 +663,11 @@ void ArticleRequest::bodyFinished()
</div>)" ),
dictId,
collapse ? R"(style="cursor:pointer;")" : "",
collapse ? tr( "Expand article" ).toStdString() : "",
"",
Html::escape( tr( "From " ).toStdString() ),
Html::escape( activeDict->getName() ),
collapse ? "gdexpandicon" : "gdcollapseicon",
collapse ? "" : tr( "Collapse article" ).toStdString() );
"" );
head += R"(<div class="gddictnamebodyseparator"></div>)";
@ -718,7 +705,7 @@ void ArticleRequest::bodyFinished()
}
}
catch ( std::exception & e ) {
gdWarning( "getDataSlice error: %s\n", e.what() );
qWarning( "getDataSlice error: %s", e.what() );
}
wasUpdated = true;
@ -728,12 +715,12 @@ void ArticleRequest::bodyFinished()
//signal finished dictionary for pronounciation
GlobalBroadcaster::instance()->pronounce_engine.finishDictionary( dictId );
}
GD_DPRINTF( "erasing.." );
qDebug( "erasing.." );
bodyRequests.pop_front();
GD_DPRINTF( "erase done.." );
qDebug( "erase done.." );
}
else {
GD_DPRINTF( "one not finished." );
qDebug( "one not finished." );
break;
}
}
@ -976,7 +963,7 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
// Look it up
// GD_DPRINTF( "Looking up %s\n", qPrintable( currentSplittedWordCompound ) );
// qDebug( "Looking up %s", qPrintable( currentSplittedWordCompound ) );
stemmedWordFinder->expressionMatch( currentSplittedWordCompound,
activeDicts,
@ -1006,7 +993,7 @@ void ArticleRequest::individualWordFinished()
WordFinder::SearchResults const & results = stemmedWordFinder->getResults();
if ( results.size() ) {
wstring source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );
std::u32string source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );
bool hadSomething = false;
@ -1020,7 +1007,7 @@ void ArticleRequest::individualWordFinished()
// Prefix match found. Check if the aliases are acceptable.
wstring result( Folding::applySimpleCaseOnly( results[ x ].first ) );
std::u32string result( Folding::applySimpleCaseOnly( results[ x ].first ) );
if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 ) {
// The resulting string begins with the source one

View file

@ -88,7 +88,7 @@ class ArticleRequest: public Dictionary::DataRequest
QMap< QString, QString > contexts;
std::vector< sptr< Dictionary::Class > > activeDicts;
std::set< gd::wstring, std::less<> > alts; // Accumulated main forms
std::set< std::u32string, std::less<> > alts; // Accumulated main forms
std::list< sptr< Dictionary::WordSearchRequest > > altSearches;
std::list< sptr< Dictionary::DataRequest > > bodyRequests;
bool altsDone{ false };

View file

@ -4,7 +4,6 @@
#include <stdint.h>
#include <QUrl>
#include "article_netmgr.hh"
#include "gddebug.hh"
#include "utils.hh"
#include <QNetworkAccessManager>
#include "globalbroadcaster.hh"
@ -93,7 +92,7 @@ QNetworkReply * ArticleNetworkAccessManager::getArticleReply( QNetworkRequest co
//if not external url,can be blocked from here. no need to continue execute the following code.
//such as bres://upload.wikimedia.... etc .
if ( !Utils::isExternalLink( url ) ) {
gdWarning( R"(Blocking element "%s" as built-in link )", req.url().toEncoded().data() );
qWarning( R"(Blocking element "%s" as built-in link )", req.url().toEncoded().data() );
return new BlockedNetworkReply( this );
}
@ -108,7 +107,7 @@ QNetworkReply * ArticleNetworkAccessManager::getArticleReply( QNetworkRequest co
if ( !url.host().endsWith( refererUrl.host() )
&& Utils::Url::getHostBaseFromUrl( url ) != Utils::Url::getHostBaseFromUrl( refererUrl )
&& !url.scheme().startsWith( "data" ) ) {
gdWarning( R"(Blocking element "%s" due to not same domain)", url.toEncoded().data() );
qWarning( R"(Blocking element "%s" due to not same domain)", url.toEncoded().data() );
return new BlockedNetworkReply( this );
}
@ -238,7 +237,7 @@ sptr< Dictionary::DataRequest > ArticleNetworkAccessManager::getResource( QUrl c
return dictionary->getResource( Utils::Url::path( url ).mid( 1 ).toUtf8().data() );
}
catch ( std::exception & e ) {
gdWarning( "getResource request error (%s) in \"%s\"\n", e.what(), dictionary->getName().c_str() );
qWarning( "getResource request error (%s) in \"%s\"", e.what(), dictionary->getName().c_str() );
return {};
}
}
@ -284,7 +283,7 @@ ArticleResourceReply::ArticleResourceReply( QObject * parent,
if ( req->isFinished() ) {
emit finishedSignal();
GD_DPRINTF( "In-place finish.\n" );
qDebug( "In-place finish." );
}
}
}
@ -353,11 +352,11 @@ qint64 ArticleResourceReply::readData( char * out, qint64 maxSize )
return 0;
}
GD_DPRINTF( "====reading %lld of (%lld) bytes, %lld bytes readed . Finish status: %d",
toRead,
avail,
alreadyRead,
finished );
qDebug( "====reading %lld of (%lld) bytes, %lld bytes readed . Finish status: %d",
toRead,
avail,
alreadyRead,
finished );
try {
req->getDataSlice( alreadyRead, toRead, out );

View file

@ -1,3 +1 @@
Code to support GD's internal/external audio players.
Only `audioplayerinterface.hh` is supposed to be used outside this folder.

View file

@ -1,7 +1,6 @@
#include "audiooutput.hh"
#include <QAudioFormat>
#include <QtConcurrent/qtconcurrentrun.h>
#include <QtConcurrentRun>
#include <QFuture>
#include <QWaitCondition>
#include <QCoreApplication>

View file

@ -3,40 +3,42 @@
#include <QScopedPointer>
#include <QObject>
#include <utility>
#include "audioplayerfactory.hh"
#include "ffmpegaudioplayer.hh"
#include "multimediaaudioplayer.hh"
#include "externalaudioplayer.hh"
#include "gddebug.hh"
AudioPlayerFactory::AudioPlayerFactory( Config::Preferences const & p ):
useInternalPlayer( p.useInternalPlayer ),
internalPlayerBackend( p.internalPlayerBackend ),
audioPlaybackProgram( p.audioPlaybackProgram )
AudioPlayerFactory::AudioPlayerFactory( bool useInternalPlayer,
InternalPlayerBackend internalPlayerBackend,
QString audioPlaybackProgram ):
useInternalPlayer( useInternalPlayer ),
internalPlayerBackend( std::move( internalPlayerBackend ) ),
audioPlaybackProgram( std::move( audioPlaybackProgram ) )
{
reset();
}
void AudioPlayerFactory::setPreferences( Config::Preferences const & p )
void AudioPlayerFactory::setPreferences( bool new_useInternalPlayer,
const InternalPlayerBackend & new_internalPlayerBackend,
const QString & new_audioPlaybackProgram )
{
if ( p.useInternalPlayer != useInternalPlayer ) {
useInternalPlayer = p.useInternalPlayer;
internalPlayerBackend = p.internalPlayerBackend;
audioPlaybackProgram = p.audioPlaybackProgram;
if ( useInternalPlayer != new_useInternalPlayer ) {
useInternalPlayer = new_useInternalPlayer;
internalPlayerBackend = new_internalPlayerBackend;
audioPlaybackProgram = new_audioPlaybackProgram;
reset();
}
else if ( useInternalPlayer && p.internalPlayerBackend != internalPlayerBackend ) {
internalPlayerBackend = p.internalPlayerBackend;
else if ( useInternalPlayer && internalPlayerBackend != new_internalPlayerBackend ) {
internalPlayerBackend = new_internalPlayerBackend;
reset();
}
else if ( !useInternalPlayer && p.audioPlaybackProgram != audioPlaybackProgram ) {
audioPlaybackProgram = p.audioPlaybackProgram;
else if ( !useInternalPlayer && new_audioPlaybackProgram != audioPlaybackProgram ) {
audioPlaybackProgram = new_audioPlaybackProgram;
ExternalAudioPlayer * const externalPlayer = qobject_cast< ExternalAudioPlayer * >( playerPtr.data() );
if ( externalPlayer ) {
setAudioPlaybackProgram( *externalPlayer );
}
else {
gdWarning( "External player was expected, but it does not exist.\n" );
qWarning( "External player was expected, but it does not exist." );
}
}
}
@ -44,29 +46,8 @@ void AudioPlayerFactory::setPreferences( Config::Preferences const & p )
void AudioPlayerFactory::reset()
{
if ( useInternalPlayer ) {
// qobject_cast checks below account for the case when an unsupported backend
// is stored in config. After this backend is replaced with the default one
// upon preferences saving, the code below does not reset playerPtr with
// another object of the same type.
#ifdef MAKE_FFMPEG_PLAYER
Q_ASSERT( Config::InternalPlayerBackend::defaultBackend().isFfmpeg()
&& "Adjust the code below after changing the default backend." );
if ( !internalPlayerBackend.isQtmultimedia() ) {
if ( !playerPtr || !qobject_cast< Ffmpeg::AudioPlayer * >( playerPtr.data() ) ) {
playerPtr.reset( new Ffmpeg::AudioPlayer );
}
return;
}
#endif
#ifdef MAKE_QTMULTIMEDIA_PLAYER
if ( !playerPtr || !qobject_cast< MultimediaAudioPlayer * >( playerPtr.data() ) ) {
playerPtr.reset( new MultimediaAudioPlayer );
}
playerPtr.reset( internalPlayerBackend.getActualPlayer() );
return;
#endif
}
std::unique_ptr< ExternalAudioPlayer > externalPlayer( new ExternalAudioPlayer );

View file

@ -4,7 +4,7 @@
#pragma once
#include "audioplayerinterface.hh"
#include "config.hh"
#include "internalplayerbackend.hh"
class ExternalAudioPlayer;
@ -13,8 +13,12 @@ class AudioPlayerFactory
Q_DISABLE_COPY( AudioPlayerFactory )
public:
explicit AudioPlayerFactory( Config::Preferences const & );
void setPreferences( Config::Preferences const & );
explicit AudioPlayerFactory( bool useInternalPlayer,
InternalPlayerBackend internalPlayerBackend,
QString audioPlaybackProgram );
void setPreferences( bool new_useInternalPlayer,
const InternalPlayerBackend & new_internalPlayerBackend,
const QString & new_audioPlaybackProgram );
/// The returned reference to a smart pointer is valid as long as this object
/// exists. The pointer to the owned AudioPlayerInterface may change after the
/// call to setPreferences(), but it is guaranteed to never be null.
@ -28,7 +32,7 @@ private:
void setAudioPlaybackProgram( ExternalAudioPlayer & externalPlayer );
bool useInternalPlayer;
Config::InternalPlayerBackend internalPlayerBackend;
InternalPlayerBackend internalPlayerBackend;
QString audioPlaybackProgram;
AudioPlayerPtr playerPtr;
};

View file

@ -2,7 +2,6 @@
#include "audiooutput.hh"
#include "ffmpegaudio.hh"
#include "gddebug.hh"
#include "utils.hh"
#include <QAudioDevice>
#include <QDataStream>
@ -88,7 +87,7 @@ static int readAudioData( void * opaque, unsigned char * buffer, int bufferSize
// QDataStream::readRawData() returns 0 at EOF => return AVERROR_EOF in this case.
// An error is unlikely here, so just print a warning and return AVERROR_EOF too.
if ( bytesRead < 0 ) {
gdWarning( "readAudioData: error while reading raw data." );
qWarning( "readAudioData: error while reading raw data." );
}
return bytesRead > 0 ? bytesRead : AVERROR_EOF;
}
@ -170,11 +169,11 @@ bool DecoderContext::openCodec( QString & errorString )
// 61 = FFmpeg 7.0 -> https://github.com/FFmpeg/FFmpeg/blob/release/7.0/libavcodec/version_major.h
#if LIBAVCODEC_VERSION_MAJOR >= 61
gdDebug( "Codec open: %s: channels: %d, rate: %d, format: %s\n",
codec_->long_name,
codecContext_->ch_layout.nb_channels,
codecContext_->sample_rate,
av_get_sample_fmt_name( codecContext_->sample_fmt ) );
qDebug( "Codec open: %s: channels: %d, rate: %d, format: %s",
codec_->long_name,
codecContext_->ch_layout.nb_channels,
codecContext_->sample_rate,
av_get_sample_fmt_name( codecContext_->sample_fmt ) );
if ( !av_channel_layout_check( &codecContext_->ch_layout ) ) {
av_channel_layout_default( &codecContext_->ch_layout, codecContext_->ch_layout.nb_channels );
@ -193,11 +192,11 @@ bool DecoderContext::openCodec( QString & errorString )
qDebug() << "swr_alloc_set_opts2 failed.";
}
#else
gdDebug( "Codec open: %s: channels: %d, rate: %d, format: %s\n",
codec_->long_name,
codecContext_->channels,
codecContext_->sample_rate,
av_get_sample_fmt_name( codecContext_->sample_fmt ) );
qDebug( "Codec open: %s: channels: %d, rate: %d, format: %s",
codec_->long_name,
codecContext_->channels,
codecContext_->sample_rate,
av_get_sample_fmt_name( codecContext_->sample_fmt ) );
auto layout = codecContext_->channel_layout;
if ( !layout ) {
@ -367,7 +366,7 @@ bool DecoderContext::normalizeAudio( AVFrame * frame, vector< uint8_t > & sample
return false;
}
else {
// qDebug( "out_count:%d, out_nb_samples:%d, frame->nb_samples:%d \n", out_count, out_nb_samples, frame->nb_samples );
// qDebug( "out_count:%d, out_nb_samples:%d, frame->nb_samples:%d ", out_count, out_nb_samples, frame->nb_samples );
}
int actual_size = av_samples_get_buffer_size( nullptr, dst_channels, out_nb_samples, AV_SAMPLE_FMT_S16, 1 );

View file

@ -0,0 +1,54 @@
#include "internalplayerbackend.hh"
#include "ffmpegaudioplayer.hh"
#include "multimediaaudioplayer.hh"
#ifdef MAKE_FFMPEG_PLAYER
constexpr auto ffmpeg = "FFmpeg";
#endif
#ifdef MAKE_QTMULTIMEDIA_PLAYER
constexpr auto qtmultimedia = "Qt Multimedia";
#endif
bool InternalPlayerBackend::anyAvailable()
{
#if defined( MAKE_FFMPEG_PLAYER ) || defined( MAKE_QTMULTIMEDIA_PLAYER )
return true;
#else
static_assert( false, "No audio player backend. Please enable one." );
return false;
#endif
}
QStringList InternalPlayerBackend::availableBackends()
{
QStringList result;
#ifdef MAKE_QTMULTIMEDIA_PLAYER
result.push_back( qtmultimedia );
#endif
#ifdef MAKE_FFMPEG_PLAYER
result.push_back( ffmpeg );
#endif
return result;
}
AudioPlayerInterface * InternalPlayerBackend::getActualPlayer()
{
// The one in user's config is not availiable,
// fall back to the default one
if ( name.isEmpty() || !availableBackends().contains( name ) ) {
name = availableBackends().constFirst();
}
#ifdef MAKE_FFMPEG_PLAYER
if ( name == ffmpeg ) {
return new Ffmpeg::AudioPlayer();
};
#endif
#ifdef MAKE_QTMULTIMEDIA_PLAYER
if ( name == qtmultimedia ) {
return new MultimediaAudioPlayer();
};
#endif
qCritical( "Impossible situation. If ever reached, fix elsewhere. " );
return nullptr;
}

View file

@ -0,0 +1,41 @@
#pragma once
#include "audioplayerinterface.hh"
#include "ffmpegaudioplayer.hh"
#include "multimediaaudioplayer.hh"
#include <QScopedPointer>
#include <QStringList>
/// Overly engineered dummy/helper/wrapper "backend", which is not, to manage backends.
class InternalPlayerBackend
{
public:
/// Returns true if at least one backend is available.
static bool anyAvailable();
AudioPlayerInterface * getActualPlayer();
/// Returns the name list of supported backends.
/// The first one willl be the default one
static QStringList availableBackends();
QString const & getName() const
{
return name;
}
void setName( QString const & name_ )
{
name = name_;
}
bool operator==( InternalPlayerBackend const & other ) const
{
return name == other.name;
}
bool operator!=( InternalPlayerBackend const & other ) const
{
return !operator==( other );
}
private:
QString name;
};

View file

@ -2,7 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "filetype.hh"
#include "utf8.hh"
#include "text.hh"
#include <ctype.h>
namespace Filetype {
@ -26,13 +26,13 @@ string simplifyString( string const & str, bool lowercase )
size_t beginPos = 0;
while ( beginPos < str.size() && Utf8::isspace( str[ beginPos ] ) ) {
while ( beginPos < str.size() && Text::isspace( str[ beginPos ] ) ) {
++beginPos;
}
size_t endPos = str.size();
while ( endPos && Utf8::isspace( str[ endPos - 1 ] ) ) {
while ( endPos && Text::isspace( str[ endPos - 1 ] ) ) {
--endPos;
}

View file

@ -3,7 +3,7 @@
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "globalregex.hh"
#include "inc_case_folding.hh"
@ -13,12 +13,12 @@ namespace Folding {
/// caught by the diacritics folding table, but they are only handled there
/// when they come with their main characters, not by themselves. The rest
/// are caught here.
bool isCombiningMark( wchar ch )
bool isCombiningMark( char32_t ch )
{
return QChar::isMark( ch );
}
wstring apply( wstring const & in, bool preserveWildcards )
std::u32string apply( std::u32string const & in, bool preserveWildcards )
{
// remove diacritics (normalization), white space, punt,
auto temp = QString::fromStdU32String( in )
@ -32,7 +32,7 @@ wstring apply( wstring const & in, bool preserveWildcards )
// case folding
std::u32string caseFolded;
caseFolded.reserve( temp.size() );
wchar buf[ foldCaseMaxOut ];
char32_t buf[ foldCaseMaxOut ];
for ( const char32_t ch : temp ) {
auto n = foldCase( ch, buf );
caseFolded.append( buf, n );
@ -40,11 +40,11 @@ wstring apply( wstring const & in, bool preserveWildcards )
return caseFolded;
}
wstring applySimpleCaseOnly( wstring const & in )
std::u32string applySimpleCaseOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wstring out;
std::u32string out;
out.reserve( in.size() );
@ -55,27 +55,27 @@ wstring applySimpleCaseOnly( wstring const & in )
return out;
}
wstring applySimpleCaseOnly( QString const & in )
std::u32string applySimpleCaseOnly( QString const & in )
{
//qt only support simple case folding.
return in.toCaseFolded().toStdU32String();
}
wstring applySimpleCaseOnly( std::string const & in )
std::u32string applySimpleCaseOnly( std::string const & in )
{
return applySimpleCaseOnly( Utf8::decode( in ) );
return applySimpleCaseOnly( Text::toUtf32( in ) );
// return QString::fromStdString( in ).toCaseFolded().toStdU32String();
}
wstring applyFullCaseOnly( wstring const & in )
std::u32string applyFullCaseOnly( std::u32string const & in )
{
wstring caseFolded;
std::u32string caseFolded;
caseFolded.reserve( in.size() * foldCaseMaxOut );
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wchar buf[ foldCaseMaxOut ];
char32_t buf[ foldCaseMaxOut ];
for ( size_t left = in.size(); left--; ) {
caseFolded.append( buf, foldCase( *nextChar++, buf ) );
@ -84,17 +84,17 @@ wstring applyFullCaseOnly( wstring const & in )
return caseFolded;
}
wstring applyDiacriticsOnly( wstring const & in )
std::u32string applyDiacriticsOnly( std::u32string const & in )
{
auto noAccent = QString::fromStdU32String( in ).normalized( QString::NormalizationForm_KD ).remove( RX::accentMark );
return noAccent.toStdU32String();
}
wstring applyPunctOnly( wstring const & in )
std::u32string applyPunctOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wstring out;
std::u32string out;
out.reserve( in.size() );
@ -119,11 +119,11 @@ QString applyPunctOnly( QString const & in )
return out;
}
wstring applyWhitespaceOnly( wstring const & in )
std::u32string applyWhitespaceOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wstring out;
std::u32string out;
out.reserve( in.size() );
@ -136,11 +136,11 @@ wstring applyWhitespaceOnly( wstring const & in )
return out;
}
wstring applyWhitespaceAndPunctOnly( wstring const & in )
std::u32string applyWhitespaceAndPunctOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();
wstring out;
std::u32string out;
out.reserve( in.size() );
@ -153,26 +153,26 @@ wstring applyWhitespaceAndPunctOnly( wstring const & in )
return out;
}
bool isWhitespace( wchar ch )
bool isWhitespace( char32_t ch )
{
//invisible character should be treated as whitespace as well.
return QChar::isSpace( ch ) || !QChar::isPrint( ch );
}
bool isWhitespaceOrPunct( wchar ch )
bool isWhitespaceOrPunct( char32_t ch )
{
return isWhitespace( ch ) || QChar::isPunct( ch );
}
bool isPunct( wchar ch )
bool isPunct( char32_t ch )
{
return QChar::isPunct( ch );
}
wstring trimWhitespaceOrPunct( wstring const & in )
std::u32string trimWhitespaceOrPunct( std::u32string const & in )
{
wchar const * wordBegin = in.c_str();
wstring::size_type wordSize = in.size();
char32_t const * wordBegin = in.c_str();
std::u32string::size_type wordSize = in.size();
// Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespaceOrPunct( *wordBegin ) ) {
@ -185,7 +185,7 @@ wstring trimWhitespaceOrPunct( wstring const & in )
--wordSize;
}
return wstring( wordBegin, wordSize );
return std::u32string( wordBegin, wordSize );
}
QString trimWhitespaceOrPunct( QString const & in )
@ -209,13 +209,13 @@ QString trimWhitespaceOrPunct( QString const & in )
return in.mid( wordBegin, wordSize );
}
wstring trimWhitespace( wstring const & in )
std::u32string trimWhitespace( std::u32string const & in )
{
if ( in.empty() ) {
return in;
}
wchar const * wordBegin = in.c_str();
wstring::size_type wordSize = in.size();
char32_t const * wordBegin = in.c_str();
std::u32string::size_type wordSize = in.size();
// Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
@ -228,7 +228,7 @@ wstring trimWhitespace( wstring const & in )
--wordSize;
}
return wstring( wordBegin, wordSize );
return std::u32string( wordBegin, wordSize );
}
QString trimWhitespace( QString const & in )

View file

@ -3,7 +3,7 @@
#pragma once
#include "wstring.hh"
#include "text.hh"
#include <QString>
/// Folding provides means to translate several possible ways to write a
@ -17,8 +17,6 @@
namespace Folding {
using gd::wstring;
using gd::wchar;
/// The algorithm's version.
enum {
@ -27,48 +25,48 @@ enum {
/// Applies the folding algorithm to each character in the given string,
/// making another one as a result.
wstring apply( wstring const &, bool preserveWildcards = false );
std::u32string apply( std::u32string const &, bool preserveWildcards = false );
/// Applies only simple case folding algorithm. Since many dictionaries have
/// different case style, we interpret words differing only by case as synonyms.
wstring applySimpleCaseOnly( wstring const & );
wstring applySimpleCaseOnly( QString const & in );
wstring applySimpleCaseOnly( std::string const & in );
std::u32string applySimpleCaseOnly( std::u32string const & );
std::u32string applySimpleCaseOnly( QString const & in );
std::u32string applySimpleCaseOnly( std::string const & in );
/// Applies only full case folding algorithm. This includes simple case, but also
/// decomposing ligatures and complex letters.
wstring applyFullCaseOnly( wstring const & );
std::u32string applyFullCaseOnly( std::u32string const & );
/// Applies only diacritics folding algorithm.
wstring applyDiacriticsOnly( wstring const & );
std::u32string applyDiacriticsOnly( std::u32string const & );
/// Applies only punctuation folding algorithm.
wstring applyPunctOnly( wstring const & );
std::u32string applyPunctOnly( std::u32string const & );
QString applyPunctOnly( QString const & in );
/// Applies only whitespace folding algorithm.
wstring applyWhitespaceOnly( wstring const & );
std::u32string applyWhitespaceOnly( std::u32string const & );
/// Applies only whitespace&punctuation folding algorithm.
wstring applyWhitespaceAndPunctOnly( wstring const & );
std::u32string applyWhitespaceAndPunctOnly( std::u32string const & );
/// Returns true if the given character is any form of whitespace, false
/// otherwise. Whitespace corresponds to Zl/Zp/Zs Unicode classes, and also
/// includes \n, \r and \t.
bool isWhitespace( wchar ch );
bool isWhitespaceOrPunct( wchar ch );
bool isWhitespace( char32_t ch );
bool isWhitespaceOrPunct( char32_t ch );
/// Returns true if the given character is any form of punctuation, false
/// otherwise. Punctuation corresponds to Pc/Pd/Pe/Pf/Pi/Po/Ps classes.
bool isPunct( wchar ch );
bool isPunct( char32_t ch );
/// Removes any whitespace or punctuation from the beginning and the end of
/// the word.
wstring trimWhitespaceOrPunct( wstring const & );
std::u32string trimWhitespaceOrPunct( std::u32string const & );
QString trimWhitespaceOrPunct( QString const & in );
/// Removes any whitespace from the beginning and the end of
/// the word.
wstring trimWhitespace( wstring const & );
std::u32string trimWhitespace( std::u32string const & );
QString trimWhitespace( QString const & in );
/// Same as apply( wstring ), but without any heap operations, therefore
@ -86,6 +84,6 @@ QString unescapeWildcardSymbols( QString const & );
QString escapeWildcardSymbols( QString const & );
/// Tests if the given char is one of the Unicode combining marks.
bool isCombiningMark( wchar ch );
bool isCombiningMark( char32_t ch );
} // namespace Folding

View file

@ -1,29 +0,0 @@
/* This file is (c) 2013 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "gddebug.hh"
#include <QDebug>
#include <QString>
#include <QtCore5Compat/QTextCodec>
QFile * logFilePtr;
void gdWarning( const char * msg, ... )
{
va_list ap;
va_start( ap, msg );
qWarning() << QString().vasprintf( msg, ap );
va_end( ap );
}
void gdDebug( const char * msg, ... )
{
va_list ap;
va_start( ap, msg );
qDebug().noquote() << QString().vasprintf( msg, ap );
va_end( ap );
}

View file

@ -1,29 +0,0 @@
#pragma once
#include <QFile>
#ifdef NO_CONSOLE
#define GD_DPRINTF( ... ) \
do { \
} while ( 0 )
#define GD_FDPRINTF( ... ) \
do { \
} while ( 0 )
#else
#define GD_DPRINTF( ... ) gdDebug( __VA_ARGS__ )
#define GD_FDPRINTF( ... ) fprintf( __VA_ARGS__ )
#endif
void gdWarning( const char *, ... ) /* print warning message */
#if defined( Q_CC_GNU ) && !defined( __INSURE__ )
__attribute__( ( format( printf, 1, 2 ) ) )
#endif
;
void gdDebug( const char *, ... )
#if defined( Q_CC_GNU ) && !defined( __INSURE__ )
__attribute__( ( format( printf, 1, 2 ) ) )
#endif
;
extern QFile * logFilePtr;

View file

@ -5,14 +5,9 @@
#include <vector>
#include <errno.h>
#include <string.h>
#include "wstring_qt.hh"
char const * const Iconv::GdWchar = "UTF-32LE";
char const * const Iconv::Utf16Le = "UTF-16LE";
char const * const Iconv::Utf8 = "UTF-8";
Iconv::Iconv( char const * from ):
state( iconv_open( Utf8, from ) )
state( iconv_open( Text::utf8, from ) )
{
if ( state == (iconv_t)-1 ) {
throw exCantInit( strerror( errno ) );
@ -80,7 +75,7 @@ QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
return QString::fromUtf8( &outBuf.front(), datasize );
}
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
std::u32string Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
{
/// Special-case the dataSize == 0 to avoid any kind of iconv-specific
@ -111,6 +106,12 @@ std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData, siz
return outStr.toStdString();
}
std::string Iconv::toUtf8( char const * fromEncoding, std::u32string_view str )
{
// u32string::size -> returns the number of char32_t instead of the length of bytes
return toUtf8( fromEncoding, str.data(), str.size() * sizeof( char32_t ) );
}
QString Iconv::toQString( char const * fromEncoding, void const * fromData, size_t dataSize )
{
if ( dataSize == 0 ) {

View file

@ -3,14 +3,11 @@
#pragma once
#include <QString>
#include "wstring.hh"
#include "ex.hh"
#include "text.hh"
#include <QString>
#include <iconv.h>
/// "Internationalization conversion" for char encoding conversion, currently implemented with iconv()
/// Only supports converting from a known "from" to UTF8
class Iconv
@ -22,12 +19,6 @@ public:
DEF_EX( Ex, "Iconv exception", std::exception )
DEF_EX_STR( exCantInit, "Can't initialize iconv conversion:", Ex )
// Some predefined character sets' names
static char const * const GdWchar;
static char const * const Utf16Le;
static char const * const Utf8;
explicit Iconv( char const * from );
~Iconv();
@ -35,11 +26,12 @@ public:
QString convert( void const *& inBuf, size_t & inBytesLeft );
// Converts a given block of data from the given encoding to a wide string.
static gd::wstring toWstring( char const * fromEncoding, void const * fromData, size_t dataSize );
static std::u32string toWstring( char const * fromEncoding, void const * fromData, size_t dataSize );
// Converts a given block of data from the given encoding to an utf8-encoded
// string.
static std::string toUtf8( char const * fromEncoding, void const * fromData, size_t dataSize );
static std::string toUtf8( char const * fromEncoding, std::u32string_view str );
static QString toQString( char const * fromEncoding, void const * fromData, size_t dataSize );

2
src/common/logfileptr.cc Normal file
View file

@ -0,0 +1,2 @@
#include "logfileptr.hh"
QFile * logFilePtr;

3
src/common/logfileptr.hh Normal file
View file

@ -0,0 +1,3 @@
#pragma once
#include <QFile>
extern QFile * logFilePtr;

321
src/common/text.cc Normal file
View file

@ -0,0 +1,321 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "text.hh"
#include <vector>
#include <algorithm>
#include <QByteArray>
#include <QString>
#include <QList>
namespace Text {
const char * getEncodingNameFor( Encoding e )
{
switch ( e ) {
case Encoding::Utf32LE:
return utf32_le;
case Encoding::Utf32BE:
return utf32_be;
case Encoding::Utf32:
return utf32;
case Encoding::Utf16LE:
return utf16_le;
case Encoding::Utf16BE:
return utf16_be;
case Encoding::Windows1252:
return windows_1252;
case Encoding::Windows1251:
return windows_1251;
case Encoding::Windows1250:
return windows_1250;
case Encoding::Utf8:
default:
return utf8;
}
}
Encoding getEncodingForName( const QByteArray & name )
{
auto const n = name.toUpper();
if ( n == utf32_le ) {
return Encoding::Utf32LE;
}
if ( n == utf32_be ) {
return Encoding::Utf32BE;
}
if ( n == utf32 ) {
return Encoding::Utf32;
}
if ( n == utf16_le ) {
return Encoding::Utf16LE;
}
if ( n == utf16_be ) {
return Encoding::Utf16BE;
}
if ( n == windows_1252 ) {
return Encoding::Windows1252;
}
if ( n == windows_1251 ) {
return Encoding::Windows1251;
}
if ( n == windows_1250 ) {
return Encoding::Windows1250;
}
return Encoding::Utf8;
}
/// Encodes the given UTF-32 into UTF-8. The inSize specifies the number
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
/// at least inSize * 4 bytes long. The function returns the number of chars
/// stored in the 'out' buffer. The result is not 0-terminated.
size_t encode( char32_t const * in, size_t inSize, char * out_ )
{
unsigned char * out = (unsigned char *)out_;
while ( inSize-- ) {
if ( *in < 0x80 ) {
*out++ = *in++;
}
else if ( *in < 0x800 ) {
*out++ = 0xC0 | ( *in >> 6 );
*out++ = 0x80 | ( *in++ & 0x3F );
}
else if ( *in < 0x10000 ) {
*out++ = 0xE0 | ( *in >> 12 );
*out++ = 0x80 | ( ( *in >> 6 ) & 0x3F );
*out++ = 0x80 | ( *in++ & 0x3F );
}
else {
*out++ = 0xF0 | ( *in >> 18 );
*out++ = 0x80 | ( ( *in >> 12 ) & 0x3F );
*out++ = 0x80 | ( ( *in >> 6 ) & 0x3F );
*out++ = 0x80 | ( *in++ & 0x3F );
}
}
return out - (unsigned char *)out_;
}
/// Decodes the given UTF-8 into UTF-32. The inSize specifies the number
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
/// function returns -1, otherwise it returns the number of wide characters
/// stored in the 'out' buffer. The result is not 0-terminated.
long decode( char const * in_, size_t inSize, char32_t * out_ )
{
unsigned char const * in = (unsigned char const *)in_;
char32_t * out = out_;
while ( inSize-- ) {
char32_t result;
if ( *in & 0x80 ) {
if ( *in & 0x40 ) {
if ( *in & 0x20 ) {
if ( *in & 0x10 ) {
// Four-byte sequence
if ( *in & 8 ) {
// This can't be
return -1;
}
if ( inSize < 3 ) {
return -1;
}
inSize -= 3;
result = ( (char32_t)*in++ & 7 ) << 18;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (char32_t)*in++ & 0x3F ) << 12;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (char32_t)*in++ & 0x3F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (char32_t)*in++ & 0x3F;
}
else {
// Three-byte sequence
if ( inSize < 2 ) {
return -1;
}
inSize -= 2;
result = ( (char32_t)*in++ & 0xF ) << 12;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (char32_t)*in++ & 0x3F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (char32_t)*in++ & 0x3F;
}
}
else {
// Two-byte sequence
if ( !inSize ) {
return -1;
}
--inSize;
result = ( (char32_t)*in++ & 0x1F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (char32_t)*in++ & 0x3F;
}
}
else {
// This char is from the middle of encoding, it can't be leading
return -1;
}
}
else {
// One-byte encoding
result = *in++;
}
*out++ = result;
}
return out - out_;
}
std::string toUtf8( std::u32string const & in ) noexcept
{
if ( in.empty() ) {
return {};
}
std::vector< char > buffer( in.size() * 4 );
return { &buffer.front(), encode( in.data(), in.size(), &buffer.front() ) };
}
std::u32string toUtf32( std::string const & in )
{
if ( in.empty() ) {
return {};
}
std::vector< char32_t > buffer( in.size() );
long result = decode( in.data(), in.size(), &buffer.front() );
if ( result < 0 ) {
throw exCantDecode( in );
}
return std::u32string( &buffer.front(), result );
}
bool isspace( int c )
{
switch ( c ) {
case ' ':
case '\f':
case '\n':
case '\r':
case '\t':
case '\v':
return true;
default:
return false;
}
}
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length )
{
char * pos = std::search( s1, s1 + s1length, s2, s2 + s2length );
if ( pos == s1 + s1length ) {
return pos - s1;
}
//the line size.
return pos - s1 + s2length;
}
LineFeed initLineFeed( const Encoding e )
{
LineFeed lf{};
switch ( e ) {
case Encoding::Utf32LE:
lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 };
lf.length = 4;
break;
case Encoding::Utf32BE:
lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A };
lf.length = 4;
break;
case Encoding::Utf16LE:
lf.lineFeed = new char[ 2 ]{ 0x0A, 0 };
lf.length = 2;
break;
case Encoding::Utf16BE:
lf.lineFeed = new char[ 2 ]{ 0, 0x0A };
lf.length = 2;
break;
case Encoding::Windows1252:
case Encoding::Windows1251:
case Encoding::Windows1250:
case Encoding::Utf8:
default:
lf.length = 1;
lf.lineFeed = new char[ 1 ]{ 0x0A };
}
return lf;
}
// When convert non-BMP characters to wstring,the ending char maybe \0 .This method remove the tailing \0 from the wstring
// as \0 is sensitive in the index. This method will be only used with index related operations like store/query.
std::u32string removeTrailingZero( std::u32string const & v )
{
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
return std::u32string( v.data(), n );
}
std::u32string removeTrailingZero( QString const & in )
{
QList< unsigned int > v = in.toUcs4();
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
if ( n != v.size() ) {
v.resize( n );
}
return std::u32string( (const char32_t *)v.constData(), v.size() );
}
std::u32string normalize( const std::u32string & str )
{
return QString::fromStdU32String( str ).normalized( QString::NormalizationForm_C ).toStdU32String();
}
} // namespace Text

64
src/common/text.hh Normal file
View file

@ -0,0 +1,64 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include "ex.hh"
#include <QByteArray>
#include <string>
/// Facilities to process Text, focusing on Unicode
namespace Text {
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
/// Encoding names. Ref -> IANA's encoding names https://www.iana.org/assignments/character-sets/character-sets.xhtml
/// Notice: The ordering must not be changed before Utf32LE. The current .dsl format index file depends on it.
enum class Encoding {
Utf16LE = 0,
Utf16BE,
Windows1252,
Windows1251,
Windows1250,
Utf8,
Utf32BE,
Utf32LE,
Utf32,
};
inline constexpr auto utf16_be = "UTF-16BE";
inline constexpr auto utf16_le = "UTF-16LE";
inline constexpr auto utf32 = "UTF-32";
inline constexpr auto utf32_be = "UTF-32BE";
inline constexpr auto utf32_le = "UTF-32LE";
inline constexpr auto utf8 = "UTF-8";
inline constexpr auto windows_1250 = "WINDOWS-1250";
inline constexpr auto windows_1251 = "WINDOWS-1251";
inline constexpr auto windows_1252 = "WINDOWS-1252";
const char * getEncodingNameFor( Encoding e );
Encoding getEncodingForName( const QByteArray & name );
/// utf32 -> utf8
std::string toUtf8( std::u32string const & ) noexcept;
/// utf8 -> utf32
std::u32string toUtf32( std::string const & );
/// Since the standard isspace() is locale-specific, we need something
/// that would never mess up our utf8 input. The stock one worked fine under
/// Linux but was messing up strings under Windows.
bool isspace( int c );
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
struct LineFeed
{
int length;
char * lineFeed;
};
LineFeed initLineFeed( Encoding e );
std::u32string removeTrailingZero( std::u32string const & v );
std::u32string removeTrailingZero( QString const & in );
std::u32string normalize( std::u32string const & );
} // namespace Text

View file

@ -1,280 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "utf8.hh"
#include <vector>
#include <algorithm>
#include <QByteArray>
#include <QString>
namespace Utf8 {
size_t encode( wchar const * in, size_t inSize, char * out_ )
{
unsigned char * out = (unsigned char *)out_;
while ( inSize-- ) {
if ( *in < 0x80 ) {
*out++ = *in++;
}
else if ( *in < 0x800 ) {
*out++ = 0xC0 | ( *in >> 6 );
*out++ = 0x80 | ( *in++ & 0x3F );
}
else if ( *in < 0x10000 ) {
*out++ = 0xE0 | ( *in >> 12 );
*out++ = 0x80 | ( ( *in >> 6 ) & 0x3F );
*out++ = 0x80 | ( *in++ & 0x3F );
}
else {
*out++ = 0xF0 | ( *in >> 18 );
*out++ = 0x80 | ( ( *in >> 12 ) & 0x3F );
*out++ = 0x80 | ( ( *in >> 6 ) & 0x3F );
*out++ = 0x80 | ( *in++ & 0x3F );
}
}
return out - (unsigned char *)out_;
}
long decode( char const * in_, size_t inSize, wchar * out_ )
{
unsigned char const * in = (unsigned char const *)in_;
wchar * out = out_;
while ( inSize-- ) {
wchar result;
if ( *in & 0x80 ) {
if ( *in & 0x40 ) {
if ( *in & 0x20 ) {
if ( *in & 0x10 ) {
// Four-byte sequence
if ( *in & 8 ) {
// This can't be
return -1;
}
if ( inSize < 3 ) {
return -1;
}
inSize -= 3;
result = ( (wchar)*in++ & 7 ) << 18;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (wchar)*in++ & 0x3F ) << 12;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (wchar)*in++ & 0x3F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (wchar)*in++ & 0x3F;
}
else {
// Three-byte sequence
if ( inSize < 2 ) {
return -1;
}
inSize -= 2;
result = ( (wchar)*in++ & 0xF ) << 12;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= ( (wchar)*in++ & 0x3F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (wchar)*in++ & 0x3F;
}
}
else {
// Two-byte sequence
if ( !inSize ) {
return -1;
}
--inSize;
result = ( (wchar)*in++ & 0x1F ) << 6;
if ( ( *in & 0xC0 ) != 0x80 ) {
return -1;
}
result |= (wchar)*in++ & 0x3F;
}
}
else {
// This char is from the middle of encoding, it can't be leading
return -1;
}
}
else {
// One-byte encoding
result = *in++;
}
*out++ = result;
}
return out - out_;
}
string encode( wstring const & in ) noexcept
{
if ( in.empty() ) {
return {};
}
std::vector< char > buffer( in.size() * 4 );
return string( &buffer.front(), encode( in.data(), in.size(), &buffer.front() ) );
}
wstring decode( string const & in )
{
if ( in.empty() ) {
return {};
}
std::vector< wchar > buffer( in.size() );
long result = decode( in.data(), in.size(), &buffer.front() );
if ( result < 0 ) {
throw exCantDecode( in );
}
return wstring( &buffer.front(), result );
}
bool isspace( int c )
{
switch ( c ) {
case ' ':
case '\f':
case '\n':
case '\r':
case '\t':
case '\v':
return true;
default:
return false;
}
}
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length )
{
char * pos = std::search( s1, s1 + s1length, s2, s2 + s2length );
if ( pos == s1 + s1length ) {
return pos - s1;
}
//the line size.
return pos - s1 + s2length;
}
char const * getEncodingNameFor( Encoding e )
{
switch ( e ) {
case Utf32LE:
return "UTF-32LE";
case Utf32BE:
return "UTF-32BE";
case Utf16LE:
return "UTF-16LE";
case Utf16BE:
return "UTF-16BE";
case Windows1252:
return "WINDOWS-1252";
case Windows1251:
return "WINDOWS-1251";
case Utf8:
return "UTF-8";
case Windows1250:
return "WINDOWS-1250";
default:
return "UTF-8";
}
}
Encoding getEncodingForName( const QByteArray & _name )
{
const auto name = _name.toUpper();
if ( name == "UTF-32LE" ) {
return Utf32LE;
}
if ( name == "UTF-32BE" ) {
return Utf32BE;
}
if ( name == "UTF-16LE" ) {
return Utf16LE;
}
if ( name == "UTF-16BE" ) {
return Utf16BE;
}
if ( name == "WINDOWS-1252" ) {
return Windows1252;
}
if ( name == "WINDOWS-1251" ) {
return Windows1251;
}
if ( name == "UTF-8" ) {
return Utf8;
}
if ( name == "WINDOWS-1250" ) {
return Windows1250;
}
return Utf8;
}
LineFeed initLineFeed( const Encoding e )
{
LineFeed lf{};
switch ( e ) {
case Utf8::Utf32LE:
lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 };
lf.length = 4;
break;
case Utf8::Utf32BE:
lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A };
lf.length = 4;
break;
case Utf8::Utf16LE:
lf.lineFeed = new char[ 2 ]{ 0x0A, 0 };
lf.length = 2;
break;
case Utf8::Utf16BE:
lf.lineFeed = new char[ 2 ]{ 0, 0x0A };
lf.length = 2;
break;
case Utf8::Windows1252:
case Utf8::Windows1251:
case Utf8::Utf8:
case Utf8::Windows1250:
default:
lf.length = 1;
lf.lineFeed = new char[ 1 ]{ 0x0A };
}
return lf;
}
} // namespace Utf8

View file

@ -1,68 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include <cstdio>
#include <QByteArray>
#include <string>
#include "ex.hh"
#include "wstring.hh"
/// A simple UTF-8 encoder/decoder. Some dictionary backends only require
/// utf8, so we have this separately, removing the iconv dependency for them.
/// Besides, utf8 is quite ubiquitous now, and its use is spreaded over many
/// places.
namespace Utf8 {
// Those are possible encodings for .dsl files
enum Encoding {
Utf16LE,
Utf16BE,
Windows1252,
Windows1251,
Windows1250,
Utf8, // This is an extension. Detected solely by the UTF8 BOM.
Utf32BE,
Utf32LE,
};
using std::string;
using gd::wstring;
using gd::wchar;
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
/// Encodes the given UCS-4 into UTF-8. The inSize specifies the number
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
/// at least inSize * 4 bytes long. The function returns the number of chars
/// stored in the 'out' buffer. The result is not 0-terminated.
size_t encode( wchar const * in, size_t inSize, char * out );
/// Decodes the given UTF-8 into UCS-32. The inSize specifies the number
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
/// function returns -1, otherwise it returns the number of wide characters
/// stored in the 'out' buffer. The result is not 0-terminated.
long decode( char const * in, size_t inSize, wchar * out );
/// Versions for non time-critical code.
string encode( wstring const & ) noexcept;
wstring decode( string const & );
/// Since the standard isspace() is locale-specific, we need something
/// that would never mess up our utf8 input. The stock one worked fine under
/// Linux but was messing up strings under Windows.
bool isspace( int c );
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
char const * getEncodingNameFor( Encoding e );
Encoding getEncodingForName( const QByteArray & name );
struct LineFeed
{
int length;
char * lineFeed;
};
LineFeed initLineFeed( Encoding e );
} // namespace Utf8

View file

@ -4,9 +4,6 @@
#include <QStyle>
#include <QMessageBox>
#include <string>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include <QBuffer>
#include <QTextCodec>
@ -31,10 +28,10 @@ std::string c_string( const QString & str )
return std::string( str.toUtf8().constData() );
}
bool endsWithIgnoreCase( const string & str1, string str2 )
bool endsWithIgnoreCase( QByteArrayView str, QByteArrayView extension )
{
return ( str1.size() >= (unsigned)str2.size() )
&& ( strcasecmp( str1.c_str() + ( str1.size() - str2.size() ), str2.data() ) == 0 );
return ( str.size() >= extension.size() )
&& ( str.last( extension.size() ).compare( extension, Qt::CaseInsensitive ) == 0 );
}
QString escapeAmps( QString const & str )

View file

@ -40,7 +40,7 @@ inline QString rstrip( const QString & str )
}
std::string c_string( const QString & str );
bool endsWithIgnoreCase( const string & str1, string str2 );
bool endsWithIgnoreCase( QByteArrayView str, QByteArrayView extension );
/**
* remove punctuation , space, symbol
*
@ -257,9 +257,14 @@ inline bool isAudioUrl( QUrl const & url )
{
if ( !url.isValid() )
return false;
// Note: we check for forvo sound links explicitly, as they don't have extensions
return ( url.scheme() == "http" || url.scheme() == "https" || url.scheme() == "gdau" )
// gdau links are known to be audios, (sometimes they may not have file extension).
if ( url.scheme() == "gdau" ) {
return true;
}
// Note: we check for forvo sound links explicitly, as they don't have extensions
return ( url.scheme() == "http" || url.scheme() == "https" )
&& ( Filetype::isNameOfSound( url.path().toUtf8().data() ) || url.host() == "apifree.forvo.com" );
}

View file

@ -1,17 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#include <string>
///
/// Aliases for legacy reasons.
///
/// For new code, just use the standardized std::u32string for UTF-32 strings instead.
///
namespace gd {
using wchar = char32_t;
using wstring = std::u32string;
} // namespace gd

View file

@ -1,38 +0,0 @@
#include "wstring_qt.hh"
#include <QList>
namespace gd {
// When convert non-BMP characters to wstring,the ending char maybe \0 .This method remove the tailing \0 from the wstring
// as \0 is sensitive in the index. This method will be only used with index related operations like store/query.
wstring removeTrailingZero( wstring const & v )
{
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
return wstring( v.data(), n );
}
wstring removeTrailingZero( QString const & in )
{
QList< unsigned int > v = in.toUcs4();
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) {
n--;
}
if ( n != v.size() ) {
v.resize( n );
}
return wstring( (const wchar *)v.constData(), v.size() );
}
wstring normalize( const wstring & str )
{
return QString::fromStdU32String( str ).normalized( QString::NormalizationForm_C ).toStdU32String();
}
} // namespace gd

View file

@ -1,16 +0,0 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
/// This file adds conversions between gd::wstring and QString. See wstring.hh
/// for more details on gd::wstring.
#include "wstring.hh"
#include <QString>
namespace gd {
wstring removeTrailingZero( wstring const & v );
wstring removeTrailingZero( QString const & in );
wstring normalize( wstring const & );
} // namespace gd

View file

@ -8,7 +8,6 @@
#include <QtXml>
#include <QApplication>
#include <QStyle>
#include "gddebug.hh"
#ifdef Q_OS_WIN32
//this is a windows header file.
@ -119,57 +118,6 @@ QKeySequence HotKey::toKeySequence() const
;
}
bool InternalPlayerBackend::anyAvailable()
{
#if defined( MAKE_FFMPEG_PLAYER ) || defined( MAKE_QTMULTIMEDIA_PLAYER )
return true;
#else
return false;
#endif
}
InternalPlayerBackend InternalPlayerBackend::defaultBackend()
{
#if defined( MAKE_FFMPEG_PLAYER )
return ffmpeg();
#elif defined( MAKE_QTMULTIMEDIA_PLAYER )
return qtmultimedia();
#else
return InternalPlayerBackend( QString() );
#endif
}
QStringList InternalPlayerBackend::nameList()
{
QStringList result;
#ifdef MAKE_FFMPEG_PLAYER
result.push_back( ffmpeg().uiName() );
#endif
#ifdef MAKE_QTMULTIMEDIA_PLAYER
result.push_back( qtmultimedia().uiName() );
#endif
return result;
}
bool InternalPlayerBackend::isFfmpeg() const
{
#ifdef MAKE_FFMPEG_PLAYER
return *this == ffmpeg();
#else
return false;
#endif
}
bool InternalPlayerBackend::isQtmultimedia() const
{
#ifdef MAKE_QTMULTIMEDIA_PLAYER
return *this == qtmultimedia();
#else
return false;
#endif
}
QString Preferences::sanitizeInputPhrase( QString const & inputWord ) const
{
QString result = inputWord;
@ -181,9 +129,9 @@ QString Preferences::sanitizeInputPhrase( QString const & inputWord ) const
}
if ( limitInputPhraseLength && result.size() > inputPhraseLengthLimit ) {
gdDebug( "Ignoring an input phrase %lld symbols long. The configured maximum input phrase length is %d symbols.",
result.size(),
inputPhraseLengthLimit );
qDebug( "Ignoring an input phrase %lld symbols long. The configured maximum input phrase length is %d symbols.",
result.size(),
inputPhraseLengthLimit );
return {};
}
@ -232,7 +180,6 @@ Preferences::Preferences():
pronounceOnLoadMain( false ),
pronounceOnLoadPopup( false ),
useInternalPlayer( InternalPlayerBackend::anyAvailable() ),
internalPlayerBackend( InternalPlayerBackend::defaultBackend() ),
checkForNewReleases( true ),
disallowContentFromOtherSites( false ),
hideGoldenDictHeader( false ),
@ -549,10 +496,6 @@ Class load()
c.paths.push_back( Path( getPortableVersionDictionaryDir(), true ) );
}
#ifndef Q_OS_WIN32
c.preferences.audioPlaybackProgram = "mplayer";
#endif
QString possibleMorphologyPath = getProgramDataDir() + "/content/morphology";
if ( QDir( possibleMorphologyPath ).exists() ) {
@ -592,7 +535,7 @@ Class load()
if ( !loadFromTemplate ) {
// Load the config as usual
if ( !dd.setContent( &configFile, false, &errorStr, &errorLine, &errorColumn ) ) {
GD_DPRINTF( "Error: %s at %d,%d\n", errorStr.toLocal8Bit().constData(), errorLine, errorColumn );
qDebug( "Error: %s at %d,%d", errorStr.toLocal8Bit().constData(), errorLine, errorColumn );
throw exMalformedConfigFile();
}
}
@ -605,7 +548,7 @@ Class load()
QBuffer bufferedData( &data );
if ( !dd.setContent( &bufferedData, false, &errorStr, &errorLine, &errorColumn ) ) {
GD_DPRINTF( "Error: %s at %d,%d\n", errorStr.toLocal8Bit().constData(), errorLine, errorColumn );
qDebug( "Error: %s at %d,%d", errorStr.toLocal8Bit().constData(), errorLine, errorColumn );
throw exMalformedConfigFile();
}
}
@ -852,7 +795,7 @@ Class load()
// Upgrading
c.dictServers = makeDefaultDictServers();
}
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
QDomNode ves = root.namedItem( "voiceEngines" );
if ( !ves.isNull() ) {
@ -1003,7 +946,7 @@ Class load()
}
if ( !preferences.namedItem( "internalPlayerBackend" ).isNull() ) {
c.preferences.internalPlayerBackend.setUiName(
c.preferences.internalPlayerBackend.setName(
preferences.namedItem( "internalPlayerBackend" ).toElement().text() );
}
@ -1011,7 +954,7 @@ Class load()
c.preferences.audioPlaybackProgram = preferences.namedItem( "audioPlaybackProgram" ).toElement().text();
}
else {
c.preferences.audioPlaybackProgram = "mplayer";
c.preferences.audioPlaybackProgram = "vlc --intf dummy --play-and-exit";
}
QDomNode proxy = preferences.namedItem( "proxyserver" );
@ -1736,7 +1679,7 @@ void save( Class const & c )
p.setAttributeNode( icon );
}
}
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
{
QDomNode ves = dd.createElement( "voiceEngines" );
root.appendChild( ves );
@ -1970,7 +1913,7 @@ void save( Class const & c )
preferences.appendChild( opt );
opt = dd.createElement( "internalPlayerBackend" );
opt.appendChild( dd.createTextNode( c.preferences.internalPlayerBackend.uiName() ) );
opt.appendChild( dd.createTextNode( c.preferences.internalPlayerBackend.getName() ) );
preferences.appendChild( opt );
opt = dd.createElement( "audioPlaybackProgram" );

View file

@ -3,19 +3,20 @@
#pragma once
#include <QObject>
#include <QList>
#include <QString>
#include <QSize>
#include <QDateTime>
#include <QKeySequence>
#include <QSet>
#include <QMetaType>
#include "audio/internalplayerbackend.hh"
#include "ex.hh"
#include <QDateTime>
#include <QDomDocument>
#include <QKeySequence>
#include <QList>
#include <QLocale>
#include <optional>
#include <QMetaType>
#include <QObject>
#include <QSet>
#include <QSize>
#include <QString>
#include <QThread>
#include <optional>
/// Special group IDs
enum GroupId : unsigned {
@ -269,66 +270,6 @@ struct CustomFonts
}
};
/// This class encapsulates supported backend preprocessor logic,
/// discourages duplicating backend names in code, which is error-prone.
class InternalPlayerBackend
{
public:
/// Returns true if at least one backend is available.
static bool anyAvailable();
/// Returns the default backend or null backend if none is available.
static InternalPlayerBackend defaultBackend();
/// Returns the name list of supported backends.
static QStringList nameList();
/// Returns true if built with FFmpeg player support and the name matches.
bool isFfmpeg() const;
/// Returns true if built with Qt Multimedia player support and the name matches.
bool isQtmultimedia() const;
QString const & uiName() const
{
return name;
}
void setUiName( QString const & name_ )
{
name = name_;
}
bool operator==( InternalPlayerBackend const & other ) const
{
return name == other.name;
}
bool operator!=( InternalPlayerBackend const & other ) const
{
return !operator==( other );
}
private:
#ifdef MAKE_FFMPEG_PLAYER
static InternalPlayerBackend ffmpeg()
{
return InternalPlayerBackend( "FFmpeg" );
}
#endif
#ifdef MAKE_QTMULTIMEDIA_PLAYER
static InternalPlayerBackend qtmultimedia()
{
return InternalPlayerBackend( "Qt Multimedia" );
}
#endif
explicit InternalPlayerBackend( QString const & name_ ):
name( name_ )
{
}
QString name;
};
/// Various user preferences
struct Preferences
{
@ -388,7 +329,7 @@ struct Preferences
// Whether the word should be pronounced on page load, in main window/popup
bool pronounceOnLoadMain, pronounceOnLoadPopup;
bool useInternalPlayer;
InternalPlayerBackend internalPlayerBackend;
InternalPlayerBackend internalPlayerBackend{};
QString audioPlaybackProgram;
ProxyServer proxyServer;
@ -784,7 +725,7 @@ struct Program
using Programs = QList< Program >;
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
struct VoiceEngine
{
bool enabled;
@ -877,7 +818,7 @@ struct Class
Lingua lingua;
Forvo forvo;
Programs programs;
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
VoiceEngines voiceEngines;
#endif

View file

@ -4,11 +4,10 @@
#include "aard.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "langcoder.hh"
#include "decompress.hh"
#include "gddebug.hh"
#include "ftshelpers.hh"
#include "htmlescape.hh"
@ -16,19 +15,11 @@
#include <set>
#include <string>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include <QDir>
#include <QString>
#include <QSemaphore>
#include <QThreadPool>
#include <QAtomicInt>
#include <QDomDocument>
#include <QtEndian>
#include <QRegularExpression>
#include "ufile.hh"
#include "wstring_qt.hh"
#include "utils.hh"
namespace Aard {
@ -38,7 +29,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -103,7 +93,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -225,11 +215,6 @@ public:
~AardDictionary();
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -250,8 +235,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override;
@ -287,19 +274,15 @@ private:
AardDictionary::AardDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset ),
df( dictionaryFiles[ 0 ], "rb" )
df( dictionaryFiles[ 0 ], QIODevice::ReadOnly )
{
// Read dictionary name
idx.seek( sizeof( idxHeader ) );
vector< char > dName( idx.read< quint32 >() );
if ( dName.size() ) {
idx.read( &dName.front(), dName.size() );
dictionaryName = string( &dName.front(), dName.size() );
}
idx.readU32SizeAndData<>( dictionaryName );
// Initialize the index
@ -418,7 +401,7 @@ void AardDictionary::loadArticle( quint32 address, string & articleText, bool ra
df.read( &articleBody.front(), articleSize );
}
catch ( std::exception & ex ) {
gdWarning( "AARD: Failed loading article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "AARD: Failed loading article from \"%s\", reason: %s", getName().c_str(), ex.what() );
break;
}
catch ( ... ) {
@ -578,14 +561,14 @@ void AardDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "Aard: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Aard: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "Aard: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Aard: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( QString::fromStdString( ftsIdxName ) );
}
}
@ -601,7 +584,7 @@ void AardDictionary::getArticleText( uint32_t articleAddress, QString & headword
text = Html::unescape( QString::fromUtf8( articleText.data(), articleText.size() ) );
}
catch ( std::exception & ex ) {
gdWarning( "Aard: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Aard: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -619,8 +602,8 @@ AardDictionary::getSearchResults( QString const & searchString, int searchMode,
class AardArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
AardDictionary & dict;
bool ignoreDiacritics;
@ -629,8 +612,8 @@ class AardArticleRequest: public Dictionary::DataRequest
public:
AardArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
AardArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
AardDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -674,13 +657,13 @@ void AardArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -711,12 +694,12 @@ void AardArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -732,7 +715,7 @@ void AardArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += "<h3>";
@ -755,9 +738,9 @@ void AardArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > AardDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > AardDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -792,17 +775,17 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
try {
gdDebug( "Aard: Building the index for dictionary: %s\n", fileName.c_str() );
qDebug( "Aard: Building the index for dictionary: %s", fileName.c_str() );
{
QFileInfo info( QString::fromUtf8( fileName.c_str() ) );
if ( static_cast< quint64 >( info.size() ) > ULONG_MAX ) {
gdWarning( "File %s is too large\n", fileName.c_str() );
qWarning( "File %s is too large", fileName.c_str() );
continue;
}
}
File::Index df( fileName, "rb" );
File::Index df( fileName, QIODevice::ReadOnly );
AAR_header dictHeader;
@ -811,7 +794,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( strncmp( dictHeader.signature, "aard", 4 )
|| ( !has64bitIndex && strncmp( dictHeader.indexItemFormat, ">LL", 4 ) )
|| strncmp( dictHeader.keyLengthFormat, ">H", 2 ) || strncmp( dictHeader.articleLengthFormat, ">L", 2 ) ) {
gdWarning( "File %s is not in supported aard format\n", fileName.c_str() );
qWarning( "File %s is not in supported aard format", fileName.c_str() );
continue;
}
@ -819,7 +802,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
quint32 size = qFromBigEndian( dictHeader.metaLength );
if ( size == 0 ) {
gdWarning( "File %s has invalid metadata", fileName.c_str() );
qWarning( "File %s has invalid metadata", fileName.c_str() );
continue;
}
@ -833,7 +816,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
map< string, string > meta = parseMetaData( metaStr );
if ( meta.empty() ) {
gdWarning( "File %s has invalid metadata", fileName.c_str() );
qWarning( "File %s has invalid metadata", fileName.c_str() );
continue;
}
@ -871,7 +854,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
initializing.indexingDictionary( dictName );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
@ -933,7 +916,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
// Insert new entry
wstring word = Utf8::decode( string( data.data(), wordSize ) );
std::u32string word = Text::toUtf32( string( data.data(), wordSize ) );
if ( maxHeadwordsToExpand && dictHeader.wordsCount >= maxHeadwordsToExpand ) {
indexedWords.addSingleWord( word, articleOffset );
}
@ -987,11 +970,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( &idxHeader, sizeof( idxHeader ) );
}
catch ( std::exception & e ) {
gdWarning( "Aard dictionary indexing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Aard dictionary indexing failed: %s, error: %s", fileName.c_str(), e.what() );
continue;
}
catch ( ... ) {
gdWarning( "Aard dictionary indexing failed\n" );
qWarning( "Aard dictionary indexing failed" );
continue;
}
} // if need to rebuild
@ -999,7 +982,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< AardDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Aard dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Aard dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
continue;
}
}

View file

@ -8,37 +8,28 @@
#include "dictfile.hh"
#include "folding.hh"
#include "ftshelpers.hh"
#include "gddebug.hh"
#include "htmlescape.hh"
#include "langcoder.hh"
#include "language.hh"
#include "utf8.hh"
#include "text.hh"
#include "utils.hh"
#include <ctype.h>
#include <list>
#include <map>
#include <set>
#include <string.h>
#include <zlib.h>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include <QAtomicInt>
#include <QCryptographicHash>
#include <QDir>
#include <QPainter>
#include <QRegularExpression>
#include <QSemaphore>
#include <QThreadPool>
namespace Bgl {
using std::map;
using std::multimap;
using std::set;
using gd::wstring;
using gd::wchar;
using std::list;
using std::pair;
using std::string;
@ -85,7 +76,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -118,7 +109,7 @@ void trimWs( string & word )
if ( word.size() ) {
unsigned begin = 0;
while ( begin < word.size() && Utf8::isspace( word[ begin ] ) ) {
while ( begin < word.size() && Text::isspace( word[ begin ] ) ) {
++begin;
}
@ -130,7 +121,7 @@ void trimWs( string & word )
// Doesn't consist of ws entirely, so must end with just isspace()
// condition.
while ( Utf8::isspace( word[ end - 1 ] ) ) {
while ( Text::isspace( word[ end - 1 ] ) ) {
--end;
}
@ -144,7 +135,7 @@ void trimWs( string & word )
void addEntryToIndex( string & word,
uint32_t articleOffset,
IndexedWords & indexedWords,
vector< wchar > & wcharBuffer )
vector< char32_t > & wcharBuffer )
{
// Strip any leading or trailing whitespaces
trimWs( word );
@ -166,7 +157,7 @@ void addEntryToIndex( string & word,
}
// Convert the word from utf8 to wide chars
indexedWords.addWord( Utf8::decode( word ), articleOffset );
indexedWords.addWord( Text::toUtf32( word ), articleOffset );
}
class BglDictionary: public BtreeIndexing::BtreeDictionary
@ -180,11 +171,6 @@ public:
BglDictionary( string const & id, string const & indexFile, string const & dictionaryFile );
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -205,10 +191,12 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -250,7 +238,7 @@ private:
BglDictionary::BglDictionary( string const & id, string const & indexFile, string const & dictionaryFile ):
BtreeDictionary( id, vector< string >( 1, dictionaryFile ) ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset )
{
@ -258,15 +246,7 @@ BglDictionary::BglDictionary( string const & id, string const & indexFile, strin
// Read the dictionary's name
size_t len = idx.read< uint32_t >();
if ( len ) {
vector< char > nameBuf( len );
idx.read( &nameBuf.front(), len );
dictionaryName = string( &nameBuf.front(), len );
}
idx.readU32SizeAndData<>( dictionaryName );
// Initialize the index
@ -407,7 +387,7 @@ void BglDictionary::getArticleText( uint32_t articleAddress, QString & headword,
headword = QString::fromUtf8( headwordStr.data(), headwordStr.size() );
wstring wstr = Utf8::decode( articleStr );
std::u32string wstr = Text::toUtf32( articleStr );
if ( getLangTo() == LangCoder::code2toInt( "he" ) ) {
for ( char32_t & i : wstr ) {
@ -424,7 +404,7 @@ void BglDictionary::getArticleText( uint32_t articleAddress, QString & headword,
text = Html::unescape( QString::fromStdU32String( wstr ) );
}
catch ( std::exception & ex ) {
gdWarning( "BGL: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "BGL: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -440,14 +420,14 @@ void BglDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "Bgl: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Bgl: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "Bgl: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Bgl: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( QString::fromStdString( ftsIdxName ) );
}
}
@ -456,7 +436,7 @@ void BglDictionary::makeFTSIndex( QAtomicInt & isCancelled )
class BglHeadwordsRequest: public Dictionary::WordSearchRequest
{
wstring str;
std::u32string str;
BglDictionary & dict;
QAtomicInt isCancelled;
@ -464,7 +444,7 @@ class BglHeadwordsRequest: public Dictionary::WordSearchRequest
public:
BglHeadwordsRequest( wstring const & word_, BglDictionary & dict_ ):
BglHeadwordsRequest( std::u32string const & word_, BglDictionary & dict_ ):
str( word_ ),
dict( dict_ )
{
@ -496,7 +476,7 @@ void BglHeadwordsRequest::run()
vector< WordArticleLink > chain = dict.findArticles( str );
wstring caseFolded = Folding::applySimpleCaseOnly( str );
std::u32string caseFolded = Folding::applySimpleCaseOnly( str );
for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -508,11 +488,11 @@ void BglHeadwordsRequest::run()
dict.loadArticle( x.articleOffset, headword, displayedHeadword, articleText );
wstring headwordDecoded;
std::u32string headwordDecoded;
try {
headwordDecoded = Utf8::decode( removePostfix( headword ) );
headwordDecoded = Text::toUtf32( removePostfix( headword ) );
}
catch ( Utf8::exCantDecode & ) {
catch ( Text::exCantDecode & ) {
}
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) && !headwordDecoded.empty() ) {
@ -527,7 +507,7 @@ void BglHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest > BglDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > BglDictionary::findHeadwordsForSynonym( std::u32string const & word )
{
return synonymSearchEnabled ? std::make_shared< BglHeadwordsRequest >( word, *this ) :
@ -567,8 +547,8 @@ string postfixToSuperscript( string const & in )
class BglArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
BglDictionary & dict;
QAtomicInt isCancelled;
@ -577,8 +557,8 @@ class BglArticleRequest: public Dictionary::DataRequest
public:
BglArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
BglArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
BglDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -610,11 +590,11 @@ public:
void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - convert non-unicode to unicode
{
wstring hebWStr;
std::u32string hebWStr;
try {
hebWStr = Utf8::decode( hebStr );
hebWStr = Text::toUtf32( hebStr );
}
catch ( Utf8::exCantDecode & ) {
catch ( Text::exCantDecode & ) {
hebStr = "Utf-8 decoding error";
return;
}
@ -628,7 +608,7 @@ void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - conv
i += 1488 - 224; // Convert to Hebrew unicode
}
}
hebStr = Utf8::encode( hebWStr );
hebStr = Text::toUtf8( hebWStr );
}
void BglArticleRequest::fixHebArticle( string & hebArticle ) // Hebrew support - remove extra chars at the end
@ -664,7 +644,7 @@ void BglArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
@ -673,7 +653,7 @@ void BglArticleRequest::run()
// the bodies to account for this.
set< QByteArray > articleBodiesIncluded;
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -701,7 +681,7 @@ void BglArticleRequest::run()
// We do the case-folded and postfix-less comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( removePostfix( headword ) );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( removePostfix( headword ) );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
@ -724,7 +704,7 @@ void BglArticleRequest::run()
continue; // Already had this body
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( targetHeadword, articleText ) ) );
@ -733,7 +713,7 @@ void BglArticleRequest::run()
} // try
catch ( std::exception & ex ) {
gdWarning( "BGL: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
qWarning( "BGL: Failed loading article from \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
}
}
@ -745,7 +725,7 @@ void BglArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner();
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -822,9 +802,9 @@ void BglArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > BglDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > BglDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -899,8 +879,8 @@ void BglResourceRequest::run()
break;
}
vector< char > nameData( idx.read< uint32_t >() );
idx.read( &nameData.front(), nameData.size() );
vector< char > nameData;
idx.readU32SizeAndData<>( nameData );
for ( size_t x = nameData.size(); x--; ) {
nameData[ x ] = tolower( nameData[ x ] );
@ -917,9 +897,9 @@ void BglResourceRequest::run()
data.resize( idx.read< uint32_t >() );
vector< unsigned char > compressedData( idx.read< uint32_t >() );
vector< unsigned char > compressedData;
idx.read( &compressedData.front(), compressedData.size() );
idx.readU32SizeAndData<>( compressedData );
unsigned long decompressedLength = data.size();
@ -929,7 +909,7 @@ void BglResourceRequest::run()
compressedData.size() )
!= Z_OK
|| decompressedLength != data.size() ) {
gdWarning( "Failed to decompress resource \"%s\", ignoring it.\n", name.c_str() );
qWarning( "Failed to decompress resource \"%s\", ignoring it.", name.c_str() );
}
else {
hasAnyData = true;
@ -1007,14 +987,14 @@ protected:
void ResourceHandler::handleBabylonResource( string const & filename, char const * data, size_t size )
{
//GD_DPRINTF( "Handling resource file %s (%u bytes)\n", filename.c_str(), size );
//qDebug( "Handling resource file %s (%u bytes)", filename.c_str(), size );
vector< unsigned char > compressedData( compressBound( size ) );
unsigned long compressedSize = compressedData.size();
if ( compress( &compressedData.front(), &compressedSize, (unsigned char const *)data, size ) != Z_OK ) {
gdWarning( "Failed to compress the body of resource \"%s\", dropping it.\n", filename.c_str() );
qWarning( "Failed to compress the body of resource \"%s\", dropping it.", filename.c_str() );
return;
}
@ -1065,7 +1045,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
// Building the index
gdDebug( "Bgl: Building the index for dictionary: %s\n", fileName.c_str() );
qDebug( "Bgl: Building the index for dictionary: %s", fileName.c_str() );
try {
Babylon b( fileName );
@ -1077,13 +1057,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
std::string sourceCharset, targetCharset;
if ( !b.read( sourceCharset, targetCharset ) ) {
gdWarning( "Failed to start reading from %s, skipping it\n", fileName.c_str() );
qWarning( "Failed to start reading from %s, skipping it", fileName.c_str() );
continue;
}
initializing.indexingDictionary( b.title() );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
@ -1105,7 +1085,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
IndexedWords indexedWords;
// We use this buffer to decode utf8 into it.
vector< wchar > wcharBuffer;
vector< char32_t > wcharBuffer;
ChunkedStorage::Writer chunks( idx );
@ -1169,7 +1149,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idxHeader.chunksOffset = chunks.finish();
GD_DPRINTF( "Writing index...\n" );
qDebug( "Writing index..." );
// Good. Now build the index
@ -1205,7 +1185,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( &idxHeader, sizeof( idxHeader ) );
}
catch ( std::exception & e ) {
gdWarning( "BGL dictionary indexing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "BGL dictionary indexing failed: %s, error: %s", fileName.c_str(), e.what() );
}
}
@ -1213,7 +1193,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< BglDictionary >( dictId, indexFile, fileName ) );
}
catch ( std::exception & e ) {
gdWarning( "BGL dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "BGL dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
}
}

View file

@ -23,7 +23,6 @@
#include "bgl_babylon.hh"
#include "dictionary.hh"
#include "gddebug.hh"
#include "globalregex.hh"
#include "htmlescape.hh"
#include "iconv.hh"
@ -333,10 +332,10 @@ bool Babylon::read( const std::string & source_charset, const std::string & targ
convertToUtf8( m_email, BGL_TARGET_CHARSET );
convertToUtf8( m_copyright, BGL_TARGET_CHARSET );
convertToUtf8( m_description, BGL_TARGET_CHARSET );
GD_DPRINTF( "Default charset: %s\nSource Charset: %s\nTargetCharset: %s\n",
m_defaultCharset.c_str(),
m_sourceCharset.c_str(),
m_targetCharset.c_str() );
qDebug( "Default charset: %s\nSource Charset: %s\nTargetCharset: %s",
m_defaultCharset.c_str(),
m_sourceCharset.c_str(),
m_targetCharset.c_str() );
return true;
}
@ -498,7 +497,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
unsigned length = (unsigned char)block.data[ pos ] - 0x3F;
if ( length > len - a - 2 ) {
GD_FDPRINTF( stderr, "Hidden displayed headword is too large %s\n", headword.c_str() );
qWarning( "Hidden displayed headword is too large %s", headword.c_str() );
pos += len - a;
break;
}
@ -511,7 +510,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
unsigned length = (unsigned char)block.data[ pos + 1 ];
if ( length > len - a - 2 ) {
GD_FDPRINTF( stderr, "Displayed headword's length is too large for headword %s\n", headword.c_str() );
qWarning( "Displayed headword's length is too large for headword %s", headword.c_str() );
pos += len - a;
break;
}
@ -525,7 +524,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
unsigned length = qFromBigEndian( *reinterpret_cast< quint16 * >( block.data + pos + 1 ) );
if ( length > len - a - 3 ) {
GD_FDPRINTF( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() );
qWarning( "2-byte sized displayed headword for %s is too large", headword.c_str() );
pos += len - a;
break;
}
@ -541,9 +540,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
unsigned length = (unsigned char)block.data[ pos + 2 ];
if ( length > len - a - 3 ) {
GD_FDPRINTF( stderr,
"1-byte-sized transcription's length is too large for headword %s\n",
headword.c_str() );
qWarning( "1-byte-sized transcription's length is too large for headword %s\n", headword.c_str() );
pos += len - a;
break;
}
@ -553,7 +550,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
transcription = Iconv::toUtf8( "Windows-1252", block.data + pos + 3, length );
}
catch ( Iconv::Ex & e ) {
qWarning( "Bgl: charset conversion error, no trancription processing's done: %s\n", e.what() );
qWarning( "Bgl: charset conversion error, no trancription processing's done: %s", e.what() );
transcription = std::string( block.data + pos + 3, length );
}
}
@ -570,9 +567,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
unsigned length = qFromBigEndian( *reinterpret_cast< quint16 * >( block.data + pos + 2 ) );
if ( length > len - a - 4 ) {
GD_FDPRINTF( stderr,
"2-byte-sized transcription's length is too large for headword %s\n",
headword.c_str() );
qWarning( "2-byte-sized transcription's length is too large for headword %s\n", headword.c_str() );
pos += len - a;
break;
}
@ -582,7 +577,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
transcription = Iconv::toUtf8( "Windows-1252", block.data + pos + 4, length );
}
catch ( Iconv::Ex & e ) {
qWarning( "Bgl: charset conversion error, no transcription processing's done: %s\n", e.what() );
qWarning( "Bgl: charset conversion error, no transcription processing's done: %s", e.what() );
transcription = std::string( block.data + pos + 4, length );
}
}
@ -600,7 +595,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
unsigned length = (unsigned char)block.data[ pos ] - 0x3F;
if ( length > len - a - 2 ) {
GD_FDPRINTF( stderr, "Hidden transcription is too large %s\n", headword.c_str() );
qWarning( "Hidden transcription is too large %s", headword.c_str() );
pos += len - a;
break;
}

View file

@ -3,28 +3,21 @@
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include <QRunnable>
#include <QThreadPool>
#include <QSemaphore>
#include "text.hh"
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include "gddebug.hh"
#include "wstring_qt.hh"
#include "utils.hh"
#include <QRegularExpression>
#include "wildcard.hh"
#include "globalbroadcaster.hh"
#include <QtConcurrent>
#include <QtConcurrentRun>
#include <zlib.h>
namespace BtreeIndexing {
using gd::wstring;
using gd::wchar;
using std::pair;
enum {
@ -63,14 +56,14 @@ void BtreeIndex::openIndex( IndexInfo const & indexInfo, File::Index & file, QMu
}
vector< WordArticleLink >
BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount )
BtreeIndex::findArticles( std::u32string const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount )
{
//First trim ending zero
wstring word = gd::removeTrailingZero( search_word );
std::u32string word = Text::removeTrailingZero( search_word );
vector< WordArticleLink > result;
try {
wstring folded = Folding::apply( word );
std::u32string folded = Folding::apply( word );
if ( folded.empty() ) {
folded = Folding::applyWhitespaceOnly( word );
}
@ -91,11 +84,11 @@ BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, ui
}
}
catch ( std::exception & e ) {
gdWarning( "Articles searching failed, error: %s\n", e.what() );
qWarning( "Articles searching failed, error: %s", e.what() );
result.clear();
}
catch ( ... ) {
qWarning( "Articles searching failed\n" );
qWarning( "Articles searching failed" );
result.clear();
}
@ -104,7 +97,7 @@ BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, ui
BtreeWordSearchRequest::BtreeWordSearchRequest( BtreeDictionary & dict_,
wstring const & str_,
std::u32string const & str_,
unsigned minLength_,
int maxSuffixVariation_,
bool allowMiddleMatches_,
@ -141,11 +134,11 @@ void BtreeWordSearchRequest::findMatches()
bool useWildcards = false;
if ( allowMiddleMatches ) {
useWildcards = ( str.find( '*' ) != wstring::npos || str.find( '?' ) != wstring::npos
|| str.find( '[' ) != wstring::npos || str.find( ']' ) != wstring::npos );
useWildcards = ( str.find( '*' ) != std::u32string::npos || str.find( '?' ) != std::u32string::npos
|| str.find( '[' ) != std::u32string::npos || str.find( ']' ) != std::u32string::npos );
}
wstring folded = Folding::apply( str );
std::u32string folded = Folding::apply( str );
int minMatchLength = 0;
@ -158,7 +151,7 @@ void BtreeWordSearchRequest::findMatches()
regexp.setPatternOptions( QRegularExpression::CaseInsensitiveOption );
bool bNoLetters = folded.empty();
wstring foldedWithWildcards;
std::u32string foldedWithWildcards;
if ( bNoLetters ) {
foldedWithWildcards = Folding::applyWhitespaceOnly( str );
@ -268,13 +261,13 @@ void BtreeWordSearchRequest::findMatches()
break;
}
//GD_DPRINTF( "offset = %u, size = %u\n", chainOffset - &leaf.front(), leaf.size() );
//qDebug( "offset = %u, size = %u", chainOffset - &leaf.front(), leaf.size() );
vector< WordArticleLink > chain = dict.readChain( chainOffset );
wstring chainHead = Utf8::decode( chain[ 0 ].word );
std::u32string chainHead = Text::toUtf32( chain[ 0 ].word );
wstring resultFolded = Folding::apply( chainHead );
std::u32string resultFolded = Folding::apply( chainHead );
if ( resultFolded.empty() ) {
resultFolded = Folding::applyWhitespaceOnly( chainHead );
}
@ -290,9 +283,9 @@ void BtreeWordSearchRequest::findMatches()
break;
}
if ( useWildcards ) {
wstring word = Utf8::decode( x.prefix + x.word );
wstring result = Folding::applyDiacriticsOnly( word );
if ( result.size() >= (wstring::size_type)minMatchLength ) {
std::u32string word = Text::toUtf32( x.prefix + x.word );
std::u32string result = Folding::applyDiacriticsOnly( word );
if ( result.size() >= (std::u32string::size_type)minMatchLength ) {
QRegularExpressionMatch match = regexp.match( QString::fromStdU32String( result ) );
if ( match.hasMatch() && match.capturedStart() == 0 ) {
addMatch( word );
@ -302,10 +295,10 @@ void BtreeWordSearchRequest::findMatches()
else {
// Skip middle matches, if requested. If suffix variation is specified,
// make sure the string isn't larger than requested.
if ( ( allowMiddleMatches || Folding::apply( Utf8::decode( x.prefix ) ).empty() )
if ( ( allowMiddleMatches || Folding::apply( Text::toUtf32( x.prefix ) ).empty() )
&& ( maxSuffixVariation < 0
|| (int)resultFolded.size() - initialFoldedSize <= maxSuffixVariation ) ) {
addMatch( Utf8::decode( x.prefix + x.word ) );
addMatch( Text::toUtf32( x.prefix + x.word ) );
}
}
if ( matches.size() >= maxResults ) {
@ -331,7 +324,7 @@ void BtreeWordSearchRequest::findMatches()
if ( chainOffset >= leafEnd ) {
// We're past the current leaf, fetch the next one
//GD_DPRINTF( "advancing\n" );
//qDebug( "advancing" );
if ( nextLeaf ) {
QMutexLocker _( dict.idxFileMutex );
@ -345,7 +338,7 @@ void BtreeWordSearchRequest::findMatches()
uint32_t leafEntries = *(uint32_t *)&leaf.front();
if ( leafEntries == 0xffffFFFF ) {
//GD_DPRINTF( "bah!\n" );
//qDebug( "bah!" );
exit( 1 );
}
}
@ -366,10 +359,10 @@ void BtreeWordSearchRequest::findMatches()
}
}
catch ( std::exception & e ) {
qWarning( "Index searching failed: \"%s\", error: %s\n", dict.getName().c_str(), e.what() );
qWarning( "Index searching failed: \"%s\", error: %s", dict.getName().c_str(), e.what() );
}
catch ( ... ) {
gdWarning( "Index searching failed: \"%s\"\n", dict.getName().c_str() );
qWarning( "Index searching failed: \"%s\"", dict.getName().c_str() );
}
}
@ -397,13 +390,14 @@ BtreeWordSearchRequest::~BtreeWordSearchRequest()
f.waitForFinished();
}
sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch( wstring const & str, unsigned long maxResults )
sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch( std::u32string const & str,
unsigned long maxResults )
{
return std::make_shared< BtreeWordSearchRequest >( *this, str, 0, -1, true, maxResults );
}
sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch( wstring const & str,
sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch( std::u32string const & str,
unsigned minLength,
unsigned maxSuffixVariation,
unsigned long maxResults )
@ -424,7 +418,7 @@ void BtreeIndex::readNode( uint32_t offset, vector< char > & out )
uint32_t uncompressedSize = idxFile->read< uint32_t >();
uint32_t compressedSize = idxFile->read< uint32_t >();
//GD_DPRINTF( "%x,%x\n", uncompressedSize, compressedSize );
//qDebug( "%x,%x", uncompressedSize, compressedSize );
out.resize( uncompressedSize );
@ -441,8 +435,11 @@ void BtreeIndex::readNode( uint32_t offset, vector< char > & out )
}
}
char const * BtreeIndex::findChainOffsetExactOrPrefix(
wstring const & target, bool & exactMatch, vector< char > & extLeaf, uint32_t & nextLeaf, char const *& leafEnd )
char const * BtreeIndex::findChainOffsetExactOrPrefix( std::u32string const & target,
bool & exactMatch,
vector< char > & extLeaf,
uint32_t & nextLeaf,
char const *& leafEnd )
{
if ( !idxFile ) {
throw exIndexWasNotOpened();
@ -453,7 +450,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
// Lookup the index by traversing the index btree
// vector< wchar > wcharBuffer;
wstring w_word;
std::u32string w_word;
exactMatch = false;
// Read a node
@ -505,7 +502,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
if ( leafEntries == 0xffffFFFF ) {
// A node
//GD_DPRINTF( "=>a node\n" );
//qDebug( "=>a node" );
uint32_t const * offsets = (uint32_t *)leaf + 1;
@ -534,7 +531,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
size_t wordSize = strlen( closestString );
w_word = Utf8::decode( string( closestString, wordSize ) );
w_word = Text::toUtf32( string( closestString, wordSize ) );
compareResult = target.compare( w_word );
@ -591,13 +588,13 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
currentNodeOffset = offsets[ entry + 1 ];
}
//GD_DPRINTF( "reading node at %x\n", currentNodeOffset );
//qDebug( "reading node at %x", currentNodeOffset );
readNode( currentNodeOffset, extLeaf );
leaf = &extLeaf.front();
leafEnd = leaf + extLeaf.size();
}
else {
//GD_DPRINTF( "=>a leaf\n" );
//qDebug( "=>a leaf" );
// A leaf
// If this leaf is the root, there's no next leaf, it just can't be.
@ -630,7 +627,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
memcpy( &chainSize, ptr, sizeof( uint32_t ) );
//GD_DPRINTF( "%s + %s\n", ptr + sizeof( uint32_t ), ptr + sizeof( uint32_t ) + strlen( ptr + sizeof( uint32_t ) ) + 1 );
//qDebug( "%s + %s", ptr + sizeof( uint32_t ), ptr + sizeof( uint32_t ) + strlen( ptr + sizeof( uint32_t ) ) + 1 );
ptr += sizeof( uint32_t ) + chainSize;
}
@ -643,7 +640,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
unsigned windowSize = chainOffsets.size();
for ( ;; ) {
//GD_DPRINTF( "window = %u, ws = %u\n", window - &chainOffsets.front(), windowSize );
//qDebug( "window = %u, ws = %u", window - &chainOffsets.front(), windowSize );
char const ** chainToCheck = window + windowSize / 2;
ptr = *chainToCheck;
@ -653,9 +650,9 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
size_t wordSize = strlen( ptr );
w_word = Utf8::decode( string( ptr, wordSize ) );
w_word = Text::toUtf32( string( ptr, wordSize ) );
wstring foldedWord = Folding::apply( w_word );
std::u32string foldedWord = Folding::apply( w_word );
if ( foldedWord.empty() ) {
foldedWord = Folding::applyWhitespaceOnly( w_word );
}
@ -754,9 +751,9 @@ vector< WordArticleLink > BtreeIndex::readChain( char const *& ptr, uint32_t max
return result;
}
void BtreeIndex::antialias( wstring const & str, vector< WordArticleLink > & chain, bool ignoreDiacritics )
void BtreeIndex::antialias( std::u32string const & str, vector< WordArticleLink > & chain, bool ignoreDiacritics )
{
wstring caseFolded = Folding::applySimpleCaseOnly( gd::normalize( str ) );
std::u32string caseFolded = Folding::applySimpleCaseOnly( Text::normalize( str ) );
if ( ignoreDiacritics ) {
caseFolded = Folding::applyDiacriticsOnly( caseFolded );
}
@ -768,8 +765,8 @@ void BtreeIndex::antialias( wstring const & str, vector< WordArticleLink > & cha
for ( unsigned x = chain.size(); x--; ) {
// If after applying case folding to each word they wouldn't match, we
// drop the entry.
wstring entry =
Folding::applySimpleCaseOnly( gd::normalize( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ) );
std::u32string entry =
Folding::applySimpleCaseOnly( Text::normalize( Text::toUtf32( chain[ x ].prefix + chain[ x ].word ) ) );
if ( ignoreDiacritics ) {
entry = Folding::applyDiacriticsOnly( entry );
}
@ -927,9 +924,9 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex,
return offset;
}
void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset, unsigned int maxHeadwordSize )
void IndexedWords::addWord( std::u32string const & index_word, uint32_t articleOffset, unsigned int maxHeadwordSize )
{
wstring word = gd::removeTrailingZero( index_word );
std::u32string word = Text::removeTrailingZero( index_word );
string::size_type wordSize = word.size();
// Safeguard us against various bugs here. Don't attempt adding words
@ -949,7 +946,7 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
wordSize = word.size();
}
wchar const * wordBegin = word.c_str();
char32_t const * wordBegin = word.c_str();
// Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
@ -962,7 +959,7 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
--wordSize;
}
wchar const * nextChar = wordBegin;
char32_t const * nextChar = wordBegin;
vector< char > utfBuffer( wordSize * 4 );
@ -974,11 +971,11 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
if ( !*nextChar ) // End of string ends everything
{
if ( wordsAdded == 0 ) {
wstring folded = Folding::applyWhitespaceOnly( wstring( wordBegin, wordSize ) );
std::u32string folded = Folding::applyWhitespaceOnly( std::u32string( wordBegin, wordSize ) );
if ( !folded.empty() ) {
auto i = insert( { Utf8::encode( folded ), vector< WordArticleLink >() } ).first;
auto i = insert( { Text::toUtf8( folded ), vector< WordArticleLink >() } ).first;
string utfWord = Utf8::encode( wstring( wordBegin, wordSize ) );
string utfWord = Text::toUtf8( std::u32string( wordBegin, wordSize ) );
string utfPrefix;
i->second.emplace_back( utfWord, articleOffset, utfPrefix );
}
@ -992,15 +989,15 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
}
// Insert this word
wstring folded = Folding::apply( nextChar );
auto name = Utf8::encode( folded );
std::u32string folded = Folding::apply( nextChar );
auto name = Text::toUtf8( folded );
auto i = insert( { std::move( name ), vector< WordArticleLink >() } ).first;
if ( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches
{
string utfWord = Utf8::encode( wstring( nextChar, wordSize - ( nextChar - wordBegin ) ) );
string utfPrefix = Utf8::encode( wstring( wordBegin, nextChar - wordBegin ) );
string utfWord = Text::toUtf8( std::u32string( nextChar, wordSize - ( nextChar - wordBegin ) ) );
string utfPrefix = Text::toUtf8( std::u32string( wordBegin, nextChar - wordBegin ) );
i->second.emplace_back( std::move( utfWord ), articleOffset, std::move( utfPrefix ) );
// reduce the vector reallocation.
@ -1024,14 +1021,14 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
}
}
void IndexedWords::addSingleWord( wstring const & index_word, uint32_t articleOffset )
void IndexedWords::addSingleWord( std::u32string const & index_word, uint32_t articleOffset )
{
wstring const & word = gd::removeTrailingZero( index_word );
wstring folded = Folding::apply( word );
std::u32string const & word = Text::removeTrailingZero( index_word );
std::u32string folded = Folding::apply( word );
if ( folded.empty() ) {
folded = Folding::applyWhitespaceOnly( word );
}
operator[]( Utf8::encode( folded ) ).emplace_back( Utf8::encode( word ), articleOffset );
operator[]( Text::toUtf8( folded ) ).emplace_back( Text::toUtf8( word ), articleOffset );
}
IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file )
@ -1059,7 +1056,7 @@ IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file )
btreeMaxElements = BtreeMaxElements;
}
GD_DPRINTF( "Building a tree of %u elements\n", (unsigned)btreeMaxElements );
qDebug( "Building a tree of %u elements", (unsigned)btreeMaxElements );
uint32_t lastLeafOffset = 0;
@ -1419,7 +1416,7 @@ bool BtreeDictionary::getHeadwords( QStringList & headwords )
}
}
catch ( std::exception & ex ) {
gdWarning( "Failed headwords retrieving for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Failed headwords retrieving for \"%s\", reason: %s", getName().c_str(), ex.what() );
}
return headwords.size() > 0;

View file

@ -5,17 +5,12 @@
#include "dict/dictionary.hh"
#include "dictfile.hh"
#include <algorithm>
#include <map>
#include <stdint.h>
#include <string>
#include <vector>
#include <QFuture>
#include <QList>
#include <QSet>
#include <QList>
/// A base for the dictionary which creates a btree index to look up
@ -23,7 +18,6 @@
namespace BtreeIndexing {
using std::string;
using gd::wstring;
using std::vector;
using std::map;
@ -85,7 +79,8 @@ public:
/// Finds articles that match the given string. A case-insensitive search
/// is performed.
vector< WordArticleLink > findArticles( wstring const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 );
vector< WordArticleLink >
findArticles( std::u32string const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 );
/// Find all unique article links in the index
void findAllArticleLinks( QList< WordArticleLink > & articleLinks );
@ -121,8 +116,11 @@ protected:
/// case, the returned pointer wouldn't belong to 'leaf' at all. To that end,
/// the leafEnd pointer always holds the pointer to the first byte outside
/// the node data.
char const * findChainOffsetExactOrPrefix(
wstring const & target, bool & exactMatch, vector< char > & leaf, uint32_t & nextLeaf, char const *& leafEnd );
char const * findChainOffsetExactOrPrefix( std::u32string const & target,
bool & exactMatch,
vector< char > & leaf,
uint32_t & nextLeaf,
char const *& leafEnd );
/// Reads a node or leaf at the given offset. Just uncompresses its data
/// to the given vector and does nothing more.
@ -134,7 +132,7 @@ protected:
/// Drops any aliases which arose due to folding. Only case-folded aliases
/// are left.
void antialias( wstring const &, vector< WordArticleLink > &, bool ignoreDiactitics );
void antialias( std::u32string const &, vector< WordArticleLink > &, bool ignoreDiactitics );
protected:
@ -166,10 +164,10 @@ public:
/// This function does the search using the btree index. Derivatives usually
/// need not to implement this function.
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( std::u32string const &, unsigned long );
virtual sptr< Dictionary::WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
stemmedMatch( std::u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
virtual bool isLocalDictionary()
{
@ -215,7 +213,7 @@ class BtreeWordSearchRequest: public Dictionary::WordSearchRequest
{
protected:
BtreeDictionary & dict;
wstring str;
std::u32string str;
unsigned long maxResults;
unsigned minLength;
int maxSuffixVariation;
@ -226,7 +224,7 @@ protected:
public:
BtreeWordSearchRequest( BtreeDictionary & dict_,
wstring const & str_,
std::u32string const & str_,
unsigned minLength_,
int maxSuffixVariation_,
bool allowMiddleMatches_,
@ -256,11 +254,11 @@ struct IndexedWords: public map< string, vector< WordArticleLink > >
/// Instead of adding to the map directly, use this function. It does folding
/// itself, and for phrases/sentences it adds additional entries beginning with
/// each new word.
void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
void addWord( std::u32string const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
/// Differs from addWord() in that it only adds a single entry. We use this
/// for zip's file names.
void addSingleWord( wstring const & word, uint32_t articleOffset );
void addSingleWord( std::u32string const & word, uint32_t articleOffset );
};
/// Builds the index, as a compressed btree. Returns IndexInfo.

View file

@ -4,10 +4,9 @@
#include "dictdfiles.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "dictzip.hh"
#include "htmlescape.hh"
#include "langcoder.hh"
#include <map>
#include <set>
@ -16,17 +15,13 @@
#include <list>
#include <wctype.h>
#include <stdlib.h>
#include "gddebug.hh"
#include "ftshelpers.hh"
#include <QDir>
#include <QUrl>
#include <QRegularExpression>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
namespace DictdFiles {
using std::map;
@ -34,7 +29,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using std::vector;
using std::list;
@ -74,7 +68,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -96,16 +90,6 @@ public:
~DictdDictionary();
string getName() noexcept override
{
return dictionaryName;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -128,8 +112,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override;
@ -155,19 +141,15 @@ DictdDictionary::DictdDictionary( string const & id,
string const & indexFile,
vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
indexFile( dictionaryFiles[ 0 ], "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
indexFile( dictionaryFiles[ 0 ], QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() )
{
// Read the dictionary name
idx.seek( sizeof( idxHeader ) );
vector< char > dName( idx.read< uint32_t >() );
if ( dName.size() > 0 ) {
idx.read( &dName.front(), dName.size() );
dictionaryName = string( &dName.front(), dName.size() );
}
idx.readU32SizeAndData<>( dictionaryName );
// Open the .dict file
@ -253,9 +235,9 @@ uint32_t decodeBase64( string const & str )
return number;
}
sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > DictdDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -270,13 +252,13 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, string > mainArticles, alternateArticles;
multimap< std::u32string, string > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -396,12 +378,12 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, string > & mapToUse =
multimap< std::u32string, string > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( x.word ), articleText ) );
@ -415,7 +397,7 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
string result;
multimap< wstring, string >::const_iterator i;
multimap< std::u32string, string >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += i->second;
@ -441,7 +423,8 @@ QString const & DictdDictionary::getDescription()
return dictionaryDescription;
}
sptr< Dictionary::DataRequest > req = getArticle( U"00databaseinfo", vector< wstring >(), wstring(), false );
sptr< Dictionary::DataRequest > req =
getArticle( U"00databaseinfo", vector< std::u32string >(), std::u32string(), false );
if ( req->dataSize() > 0 ) {
dictionaryDescription = QString::fromUtf8( req->getFullData().data(), req->getFullData().size() );
@ -469,14 +452,14 @@ void DictdDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "DictD: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "DictD: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "DictD: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "DictD: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( QString::fromStdString( ftsIdxName ) );
}
}
@ -550,7 +533,7 @@ void DictdDictionary::getArticleText( uint32_t articleAddress, QString & headwor
}
}
catch ( std::exception & ex ) {
gdWarning( "DictD: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "DictD: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -602,11 +585,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Building the index
string dictionaryName = nameFromFileName( dictFiles[ 0 ] );
gdDebug( "DictD: Building the index for dictionary: %s\n", dictionaryName.c_str() );
qDebug( "DictD: Building the index for dictionary: %s", dictionaryName.c_str() );
initializing.indexingDictionary( dictionaryName );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
@ -619,7 +602,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
IndexedWords indexedWords;
File::Index indexFile( dictFiles[ 0 ], "rb" );
File::Index indexFile( dictFiles[ 0 ], QIODevice::ReadOnly );
// Read words from index until none's left.
@ -641,17 +624,17 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( tab3 ) {
char * tab4 = strchr( tab3 + 1, '\t' );
if ( tab4 ) {
GD_DPRINTF( "Warning: too many tabs present, skipping: %s\n", buf );
qDebug( "Warning: too many tabs present, skipping: %s", buf );
continue;
}
// Handle the forth entry, if it exists. From dictfmt man:
// When --index-keep-orig option is used fourth column is created
// (if necessary) in .index file.
indexedWords.addWord( Utf8::decode( string( tab3 + 1, strlen( tab3 + 1 ) ) ), curOffset );
indexedWords.addWord( Text::toUtf32( string( tab3 + 1, strlen( tab3 + 1 ) ) ), curOffset );
++idxHeader.wordCount;
}
indexedWords.addWord( Utf8::decode( string( buf, strchr( buf, '\t' ) - buf ) ), curOffset );
indexedWords.addWord( Text::toUtf32( string( buf, strchr( buf, '\t' ) - buf ) ), curOffset );
++idxHeader.wordCount;
++idxHeader.articleCount;
@ -676,7 +659,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
eol = articleBody; // No headword itself
}
if ( eol ) {
while ( *eol && Utf8::isspace( *eol ) ) {
while ( *eol && Text::isspace( *eol ) ) {
++eol; // skip spaces
}
@ -686,7 +669,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
*endEol = 0;
}
GD_DPRINTF( "DICT NAME: '%s'\n", eol );
qDebug( "DICT NAME: '%s'", eol );
dictionaryName = eol;
}
}
@ -698,12 +681,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
}
else {
GD_DPRINTF( "Warning: only a single tab present, skipping: %s\n", buf );
qDebug( "Warning: only a single tab present, skipping: %s", buf );
continue;
}
}
else {
GD_DPRINTF( "Warning: no tabs present, skipping: %s\n", buf );
qDebug( "Warning: no tabs present, skipping: %s", buf );
continue;
}
@ -747,7 +730,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< DictdDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Dictd dictionary \"%s\" reading failed, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Dictd dictionary \"%s\" reading failed, error: %s", fileName.c_str(), e.what() );
}
}

View file

@ -177,7 +177,7 @@ void Class::deferredInit()
//base method.
}
sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/,
sptr< WordSearchRequest > Class::stemmedMatch( std::u32string const & /*str*/,
unsigned /*minLength*/,
unsigned /*maxSuffixVariation*/,
unsigned long /*maxResults*/ )
@ -185,12 +185,12 @@ sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/,
return std::make_shared< WordSearchRequestInstant >();
}
sptr< WordSearchRequest > Class::findHeadwordsForSynonym( wstring const & )
sptr< WordSearchRequest > Class::findHeadwordsForSynonym( std::u32string const & )
{
return std::make_shared< WordSearchRequestInstant >();
}
vector< wstring > Class::getAlternateWritings( wstring const & ) noexcept
vector< std::u32string > Class::getAlternateWritings( std::u32string const & ) noexcept
{
return {};
}

View file

@ -19,7 +19,7 @@
#include "langcoder.hh"
#include "sptr.hh"
#include "utils.hh"
#include "wstring.hh"
#include "text.hh"
#include <QtGlobal>
/// Abstract dictionary-related stuff
@ -27,16 +27,8 @@ namespace Dictionary {
using std::vector;
using std::string;
using gd::wstring;
using std::map;
enum Property {
Author,
Copyright,
Description,
Email
};
DEF_EX( Ex, "Dictionary error", std::exception )
DEF_EX( exIndexOutOfRange, "The supplied index is out of range", Ex )
DEF_EX( exSliceOutOfRange, "The requested data slice is out of range", Ex )
@ -131,19 +123,19 @@ private:
/// algorithms. Positive values are used by morphology matches.
struct WordMatch
{
wstring word;
std::u32string word;
int weight;
WordMatch():
weight( 0 )
{
}
WordMatch( wstring const & word_ ):
WordMatch( std::u32string const & word_ ):
word( word_ ),
weight( 0 )
{
}
WordMatch( wstring const & word_, int weight_ ):
WordMatch( std::u32string const & word_, int weight_ ):
word( word_ ),
weight( weight_ )
{
@ -380,10 +372,6 @@ public:
metadata_enable_fts = _enable_FTS;
}
/// Returns all the available properties, like the author's name, copyright,
/// description etc. All strings are in utf8.
virtual map< Property, string > getProperties() noexcept = 0;
/// Returns the features the dictionary possess. See the Feature enum for
/// their list.
virtual Features getFeatures() const noexcept
@ -442,7 +430,7 @@ public:
/// prefix results should be added. Not more than maxResults results should
/// be stored. The whole operation is supposed to be fast, though some
/// dictionaries, the network ones particularly, may of course be slow.
virtual sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) = 0;
virtual sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) = 0;
/// Looks up a given word in the dictionary, aiming to find different forms
/// of the given word by allowing suffix variations. This means allowing words
@ -453,20 +441,20 @@ public:
/// in the middle of a phrase got matched should be returned.
/// The default implementation does nothing, returning an empty result.
virtual sptr< WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
stemmedMatch( std::u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
/// Finds known headwords for the given word, that is, the words for which
/// the given word is a synonym. If a dictionary can't perform this operation,
/// it should leave the default implementation which always returns an empty
/// result.
virtual sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & );
virtual sptr< WordSearchRequest > findHeadwordsForSynonym( std::u32string const & );
/// For a given word, provides alternate writings of it which are to be looked
/// up alongside with it. Transliteration dictionaries implement this. The
/// default implementation returns an empty list. Note that this function is
/// supposed to be very fast and simple, and the results are thus returned
/// synchronously.
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept;
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept;
/// Returns a definition for the given word. The definition should
/// be an html fragment (without html/head/body tags) in an utf8 encoding.
@ -475,10 +463,10 @@ public:
/// synonyms for the main word.
/// context is a dictionary-specific data, currently only used for the
/// 'Websites' feature.
virtual sptr< DataRequest > getArticle( wstring const &,
vector< wstring > const & alts,
wstring const & context = wstring(),
bool ignoreDiacritics = false ) = 0;
virtual sptr< DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const & context = std::u32string(),
bool ignoreDiacritics = false ) = 0;
/// Loads contents of a resource named 'name' into the 'data' vector. This is
/// usually a picture file referenced in the article or something like that.

View file

@ -2,16 +2,16 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "dictserver.hh"
#include "wstring_qt.hh"
#include <QTimer>
#include <QUrl>
#include <QTcpSocket>
#include <QString>
#include <list>
#include "htmlescape.hh"
#include <QCryptographicHash>
#include <QDir>
#include <QFileInfo>
#include <QRegularExpression>
#include <QtConcurrent>
#include <QtConcurrentRun>
namespace DictServer {
@ -176,7 +176,6 @@ class DictServerDictionary: public Dictionary::Class
{
Q_OBJECT
string name;
QString url, icon;
quint32 langId;
QString errorString;
@ -196,11 +195,13 @@ public:
QString const & strategies_,
QString const & icon_ ):
Dictionary::Class( id, vector< string >() ),
name( name_ ),
url( url_ ),
icon( icon_ ),
langId( 0 )
{
dictionaryName = name_;
int pos = url.indexOf( "://" );
if ( pos < 0 ) {
url = "dict://" + url;
@ -301,15 +302,6 @@ public:
disconnectFromServer( socket );
}
string getName() noexcept override
{
return name;
}
map< Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override
{
@ -321,9 +313,10 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
quint32 getLangFrom() const override
{
@ -394,7 +387,7 @@ class DictServerWordSearchRequest: public Dictionary::WordSearchRequest
{
Q_OBJECT
QAtomicInt isCancelled;
wstring word;
std::u32string word;
QString errorString;
DictServerDictionary & dict;
@ -409,7 +402,7 @@ class DictServerWordSearchRequest: public Dictionary::WordSearchRequest
public:
DictServerWordSearchRequest( wstring word_, DictServerDictionary & dict_ ):
DictServerWordSearchRequest( std::u32string word_, DictServerDictionary & dict_ ):
word( std::move( word_ ) ),
dict( dict_ ),
dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-w" ) )
@ -486,7 +479,7 @@ void DictServerWordSearchRequest::run()
qDebug() << "receive match:" << reply;
auto code = reply.left( 3 );
if ( reply.left( 3 ) != "152" ) {
if ( code != "152" ) {
matchNext();
}
@ -569,32 +562,42 @@ void DictServer::DictServerWordSearchRequest::addMatchedWord( const QString & st
class DictServerArticleRequest: public Dictionary::DataRequest
{
QAtomicInt isCancelled;
wstring word;
std::u32string word;
QString errorString;
DictServerDictionary & dict;
string articleData;
QString articleText;
int currentDatabase = 0;
DictServerState state;
QTimer * timer;
bool contentInHtml = false;
public:
DictServerImpl * dictImpl;
DictServerArticleRequest( wstring word_, DictServerDictionary & dict_ ):
DictServerArticleRequest( std::u32string word_, DictServerDictionary & dict_ ):
word( std::move( word_ ) ),
dict( dict_ ),
dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-t" ) )
{
timer = new QTimer( this );
timer->setInterval( 5000 );
timer->setSingleShot( true );
qDebug() << "receive data:" << QDateTime::currentDateTime();
connect( timer, &QTimer::timeout, this, [ this ]() {
qDebug() << "Server takes too much time to response" << QDateTime::currentDateTime();
cancel();
} );
connect( this, &DictServerArticleRequest::finishedArticle, this, [ this ]( QString articleText ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
cancel();
return;
}
qDebug() << articleText;
static QRegularExpression phonetic( R"(\\([^\\]+)\\)",
QRegularExpression::CaseInsensitiveOption ); // phonetics: \stuff\ ...
static QRegularExpression divs_inside_phonetic( "</div([^>]*)><div([^>]*)>",
@ -687,10 +690,7 @@ public:
defineNext();
} );
QTimer::singleShot( 5000, this, [ this ]() {
qDebug() << "Server takes too much time to response";
cancel();
} );
timer->start();
}
void run();
@ -723,9 +723,9 @@ void DictServerArticleRequest::run()
return;
}
connect( &dictImpl->socket, &QTcpSocket::readyRead, this, [ this ]() {
QMutexLocker const _( &dictImpl->mutex );
timer->start();
if ( state == DictServerState::DEFINE ) {
QByteArray reply = dictImpl->socket.readLine();
qDebug() << "receive define:" << reply;
@ -748,34 +748,19 @@ void DictServerArticleRequest::run()
if ( reply.left( 3 ) == "150" ) {
// Articles found
int countPos = reply.indexOf( ' ', 4 );
QString articleText;
// Get articles count
// Get articles count,
// todo ,how to use this count?
int count = reply.mid( 4, countPos > 4 ? countPos - 4 : -1 ).toInt();
// Read articles
for ( int x = 0; x < count; x++ ) {
reply = dictImpl->socket.readLine();
if ( reply.isEmpty() ) {
state = DictServerState::DEFINE_DATA;
return;
}
readData( reply );
}
readData( reply );
state = DictServerState::DEFINE_DATA;
}
}
else if ( state == DictServerState::DEFINE_DATA ) {
QByteArray reply = dictImpl->socket.readLine();
qDebug() << "receive define data:" << reply;
while ( true ) {
if ( reply.isEmpty() ) {
return;
}
readData( reply );
reply = dictImpl->socket.readLine();
}
qDebug() << "receive define data:" << reply << QDateTime::currentDateTime();
readData( reply );
}
} );
@ -814,7 +799,8 @@ void DictServerArticleRequest::readData( QByteArray reply )
pos = endPos + 1;
QString dbID, dbName;
QString dbID;
QString dbName;
// Retrieve database ID
endPos = reply.indexOf( ' ', pos );
@ -827,8 +813,7 @@ void DictServerArticleRequest::readData( QByteArray reply )
dbID = reply.mid( pos, endPos - pos );
// Retrieve database ID
pos = endPos + 1;
endPos = reply.indexOf( ' ', pos );
pos = endPos + 1;
if ( reply[ pos ] == '\"' ) {
endPos = reply.indexOf( '\"', pos + 1 ) + 1;
}
@ -852,47 +837,30 @@ void DictServerArticleRequest::readData( QByteArray reply )
articleData += string( "<div class=\"dictserver_from\">" ) + dbName.toUtf8().data() + "[" + dbID.toUtf8().data()
+ "]" + "</div>";
// Retreive MIME headers if any
reply = dictImpl->socket.readAll();
static QRegularExpression contentTypeExpr( "Content-Type\\s*:\\s*text/html",
QRegularExpression::CaseInsensitiveOption );
for ( ;; ) {
reply = dictImpl->socket.readLine();
if ( reply.isEmpty() ) {
return;
}
if ( reply == "\r\n" ) {
break;
}
QRegularExpressionMatch match = contentTypeExpr.match( reply );
if ( match.hasMatch() ) {
contentInHtml = true;
}
}
QString articleText;
// Retrieve article text
articleText.clear();
for ( ;; ) {
reply = dictImpl->socket.readLine();
if ( reply.isEmpty() ) {
return;
}
qDebug() << "reply data:" << reply;
if ( reply == ".\r\n" ) {
//discard all left message.
while ( !dictImpl->socket.readLine().isEmpty() ) {}
emit finishedArticle( articleText );
return;
}
articleText += reply;
articleText += reply;
qDebug() << "reply data:" << reply << QDateTime::currentDateTime();
if ( articleText.contains( "\r\n.\r\n" ) ) {
//discard all left message.
emit finishedArticle( articleText );
return;
}
}
else {
articleText += reply;
reply = dictImpl->socket.readAll();
qDebug() << "reply data:" << reply << QDateTime::currentDateTime();
articleText += reply;
if ( reply.contains( "\r\n.\r\n" ) ) {
//discard all left message. maybe delete all the remaining data after `.\r\n`
emit finishedArticle( articleText );
return;
}
}
//restart.
timer->start();
}
void DictServerArticleRequest::cancel()
@ -902,7 +870,7 @@ void DictServerArticleRequest::cancel()
finish();
}
sptr< WordSearchRequest > DictServerDictionary::prefixMatch( wstring const & word, unsigned long maxResults )
sptr< WordSearchRequest > DictServerDictionary::prefixMatch( std::u32string const & word, unsigned long maxResults )
{
(void)maxResults;
if ( word.size() > 80 ) {
@ -915,8 +883,10 @@ sptr< WordSearchRequest > DictServerDictionary::prefixMatch( wstring const & wor
}
}
sptr< DataRequest >
DictServerDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
sptr< DataRequest > DictServerDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const &,
std::u32string const &,
bool )
{
if ( word.size() > 80 ) {
@ -950,4 +920,4 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::DictServers const
return result;
}
#include "dictserver.moc"
} // namespace DictServer
} // namespace DictServer

View file

@ -5,54 +5,33 @@
#include "dsl_details.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "dictzip.hh"
#include "htmlescape.hh"
#include "iconv.hh"
#include "filetype.hh"
#include "audiolink.hh"
#include "langcoder.hh"
#include "wstring_qt.hh"
#include "indexedzip.hh"
#include "gddebug.hh"
#include "tiff.hh"
#include "ftshelpers.hh"
#include <map>
#include <set>
#include <string>
#include <vector>
#include <list>
#include <wctype.h>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include <QSemaphore>
#include <QThreadPool>
#include <QAtomicInt>
#include <QUrl>
#include <QDir>
#include <QFileInfo>
#include <QPainter>
#include <QStringList>
#include <QRegularExpression>
// For TIFF conversion
#include <QImage>
#include <QByteArray>
#include <QBuffer>
// For SVG handling
#include <QtSvg/QSvgRenderer>
#include <QtConcurrent>
#include <QSvgRenderer>
#include <QtConcurrentRun>
#include "utils.hh"
namespace Dsl {
@ -64,11 +43,9 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using gd::wchar;
using std::vector;
using std::list;
using Utf8::Encoding;
using Text::Encoding;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -120,8 +97,8 @@ struct InsidedCard
{
uint32_t offset;
uint32_t size;
QList< wstring > headwords;
InsidedCard( uint32_t _offset, uint32_t _size, QList< wstring > const & words ):
QList< std::u32string > headwords;
InsidedCard( uint32_t _offset, uint32_t _size, QList< std::u32string > const & words ):
offset( _offset ),
size( _size ),
headwords( words )
@ -132,7 +109,7 @@ struct InsidedCard
bool indexIsOldOrBad( string const & indexFile, bool hasZipFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -158,14 +135,13 @@ class DslDictionary: public BtreeIndexing::BtreeDictionary
QAtomicInt deferredInitDone;
QMutex deferredInitMutex;
bool deferredInitRunnableStarted;
QSemaphore deferredInitRunnableExited;
string initError;
int optionalPartNom;
quint8 articleNom;
wstring currentHeadword;
std::u32string currentHeadword;
string resourceDir1, resourceDir2;
public:
@ -176,15 +152,6 @@ public:
~DslDictionary();
string getName() noexcept override
{
return dictionaryName;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
@ -217,8 +184,10 @@ public:
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -262,15 +231,15 @@ private:
/// Loads the article. Does not process the DSL language.
void loadArticle( uint32_t address,
wstring const & requestedHeadwordFolded,
std::u32string const & requestedHeadwordFolded,
bool ignoreDiacritics,
wstring & tildeValue,
wstring & displayedHeadword,
std::u32string & tildeValue,
std::u32string & displayedHeadword,
unsigned & headwordIndex,
wstring & articleText );
std::u32string & articleText );
/// Converts DSL language to an Html.
string dslToHtml( wstring const &, wstring const & headword = wstring() );
string dslToHtml( std::u32string const &, std::u32string const & headword = std::u32string() );
// Parts of dslToHtml()
string nodeToHtml( ArticleDom::Node const & );
@ -289,7 +258,7 @@ private:
DslDictionary::DslDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
dz( 0 ),
deferredInitRunnableStarted( false ),
@ -303,17 +272,9 @@ DslDictionary::DslDictionary( string const & id, string const & indexFile, vecto
idx.seek( sizeof( idxHeader ) );
vector< char > dName( idx.read< uint32_t >() );
if ( dName.size() > 0 ) {
idx.read( &dName.front(), dName.size() );
dictionaryName = string( &dName.front(), dName.size() );
}
idx.readU32SizeAndData<>( dictionaryName );
idx.readU32SizeAndData<>( preferredSoundDictionary );
vector< char > sName( idx.read< uint32_t >() );
if ( sName.size() > 0 ) {
idx.read( &sName.front(), sName.size() );
preferredSoundDictionary = string( &sName.front(), sName.size() );
}
resourceDir1 = getDictionaryFilenames()[ 0 ] + ".files" + Utils::Fs::separator();
QString s = QString::fromStdString( getDictionaryFilenames()[ 0 ] );
@ -407,7 +368,7 @@ void DslDictionary::doDeferredInit()
memcpy( &total, abrvBlock, sizeof( uint32_t ) );
abrvBlock += sizeof( uint32_t );
GD_DPRINTF( "Loading %u abbrv\n", total );
qDebug( "Loading %u abbrv", total );
while ( total-- ) {
uint32_t keySz;
@ -490,7 +451,7 @@ void DslDictionary::loadIcon() noexcept
/// so nbsp is not a whitespace character for Dsl compiler.
/// For now we have only space and tab, since those are most likely the only
/// ones recognized as spaces by that compiler.
bool isDslWs( wchar ch )
bool isDslWs( char32_t ch )
{
switch ( ch ) {
case ' ':
@ -502,14 +463,14 @@ bool isDslWs( wchar ch )
}
void DslDictionary::loadArticle( uint32_t address,
wstring const & requestedHeadwordFolded,
std::u32string const & requestedHeadwordFolded,
bool ignoreDiacritics,
wstring & tildeValue,
wstring & displayedHeadword,
std::u32string & tildeValue,
std::u32string & displayedHeadword,
unsigned & headwordIndex,
wstring & articleText )
std::u32string & articleText )
{
wstring articleData;
std::u32string articleData;
{
vector< char > chunk;
@ -527,7 +488,7 @@ void DslDictionary::loadArticle( uint32_t address,
memcpy( &articleOffset, articleProps, sizeof( articleOffset ) );
memcpy( &articleSize, articleProps + sizeof( articleOffset ), sizeof( articleSize ) );
GD_DPRINTF( "offset = %x\n", articleOffset );
qDebug( "offset = %x", articleOffset );
char * articleBody;
@ -545,7 +506,7 @@ void DslDictionary::loadArticle( uint32_t address,
else {
try {
articleData =
Iconv::toWstring( Utf8::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
Iconv::toWstring( Text::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
free( articleBody );
// Strip DSL comments
@ -566,27 +527,27 @@ void DslDictionary::loadArticle( uint32_t address,
// Check is we retrieve insided card
bool insidedCard = isDslWs( articleData.at( 0 ) );
wstring tildeValueWithUnsorted; // This one has unsorted parts left
std::u32string tildeValueWithUnsorted; // This one has unsorted parts left
for ( headwordIndex = 0;; ) {
size_t begin = pos;
pos = articleData.find_first_of( U"\n\r", begin );
if ( pos == wstring::npos ) {
if ( pos == std::u32string::npos ) {
pos = articleData.size();
}
if ( !foundDisplayedHeadword ) {
// Process the headword
wstring rawHeadword = wstring( articleData, begin, pos - begin );
std::u32string rawHeadword = std::u32string( articleData, begin, pos - begin );
if ( insidedCard && !rawHeadword.empty() && isDslWs( rawHeadword[ 0 ] ) ) {
// Headword of the insided card
wstring::size_type hpos = rawHeadword.find( L'@' );
std::u32string::size_type hpos = rawHeadword.find( L'@' );
if ( hpos != string::npos ) {
wstring head = Folding::trimWhitespace( rawHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' );
std::u32string head = Folding::trimWhitespace( rawHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' );
while ( hpos != string::npos ) {
if ( hpos == 0 || head[ hpos ] != L'\\' ) {
break;
@ -607,7 +568,7 @@ void DslDictionary::loadArticle( uint32_t address,
// We need our tilde expansion value
tildeValue = rawHeadword;
list< wstring > lst;
list< std::u32string > lst;
expandOptionalParts( tildeValue, &lst );
@ -619,7 +580,7 @@ void DslDictionary::loadArticle( uint32_t address,
processUnsortedParts( tildeValue, false );
}
wstring str = rawHeadword;
std::u32string str = rawHeadword;
if ( hadFirstHeadword ) {
expandTildes( str, tildeValueWithUnsorted );
@ -629,7 +590,7 @@ void DslDictionary::loadArticle( uint32_t address,
str = Folding::applySimpleCaseOnly( str );
list< wstring > lst;
list< std::u32string > lst;
expandOptionalParts( str, &lst );
// Does one of the results match the requested word? If so, we'd choose
@ -695,15 +656,15 @@ void DslDictionary::loadArticle( uint32_t address,
// Check for begin article text
if ( insidedCard ) {
// Check for next insided headword
wstring::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == wstring::npos ) {
std::u32string::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == std::u32string::npos ) {
hpos = articleData.size();
}
wstring str = wstring( articleData, pos, hpos - pos );
std::u32string str = std::u32string( articleData, pos, hpos - pos );
hpos = str.find( L'@' );
if ( hpos == wstring::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
if ( hpos == std::u32string::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
break;
}
}
@ -725,18 +686,18 @@ void DslDictionary::loadArticle( uint32_t address,
}
if ( pos != articleData.size() ) {
articleText = wstring( articleData, pos );
articleText = std::u32string( articleData, pos );
}
else {
articleText.clear();
}
}
string DslDictionary::dslToHtml( wstring const & str, wstring const & headword )
string DslDictionary::dslToHtml( std::u32string const & str, std::u32string const & headword )
{
// Normalize the string
wstring normalizedStr = gd::normalize( str );
currentHeadword = headword;
std::u32string normalizedStr = Text::normalize( str );
currentHeadword = headword;
ArticleDom dom( normalizedStr, getName(), headword );
@ -771,7 +732,7 @@ string DslDictionary::getNodeLink( ArticleDom::Node const & node )
}
}
if ( link.empty() ) {
link = Html::escape( Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false ) );
link = Html::escape( Filetype::simplifyString( Text::toUtf8( node.renderAsText() ), false ) );
}
return link;
@ -782,7 +743,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
string result;
if ( !node.isTag ) {
result = Html::escape( Utf8::encode( node.text ) );
result = Html::escape( Text::toUtf8( node.text ) );
// Handle all end-of-line
@ -822,7 +783,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<span class=\"c_default_color\">" + processNodeChildren( node ) + "</span>";
}
else {
result += "<font color=\"" + Html::escape( Utf8::encode( node.tagAttrs ) ) + "\">" + processNodeChildren( node )
result += "<font color=\"" + Html::escape( Text::toUtf8( node.tagAttrs ) ) + "\">" + processNodeChildren( node )
+ "</font>";
}
}
@ -835,7 +796,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<div class=\"dsl_m\">" + processNodeChildren( node ) + "</div>";
}
else if ( node.tagName.size() == 2 && node.tagName[ 0 ] == L'm' && iswdigit( node.tagName[ 1 ] ) ) {
result += "<div class=\"dsl_" + Utf8::encode( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>";
result += "<div class=\"dsl_" + Text::toUtf8( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>";
}
else if ( node.tagName == U"trn" ) {
result += "<span class=\"dsl_trn\">" + processNodeChildren( node ) + "</span>";
@ -847,7 +808,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<span class=\"dsl_com\">" + processNodeChildren( node ) + "</span>";
}
else if ( node.tagName == U"s" || node.tagName == U"video" ) {
string filename = Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false );
string filename = Filetype::simplifyString( Text::toUtf8( node.renderAsText() ), false );
string n = resourceDir1 + filename;
if ( Filetype::isNameOfSound( filename ) ) {
@ -926,7 +887,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
else if ( node.tagName == U"p" ) {
result += "<span class=\"dsl_p\"";
string val = Utf8::encode( node.renderAsText() );
string val = Text::toUtf8( node.renderAsText() );
// If we have such a key, display a title
@ -946,7 +907,8 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
// user could pick up the best suitable option.
string data = processNodeChildren( node );
result += R"(<span class="dsl_stress"><span class="dsl_stress_without_accent">)" + data + "</span>"
+ "<span class=\"dsl_stress_with_accent\">" + data + Utf8::encode( wstring( 1, 0x301 ) ) + "</span></span>";
+ "<span class=\"dsl_stress_with_accent\">" + data + Text::toUtf8( std::u32string( 1, 0x301 ) )
+ "</span></span>";
}
else if ( node.tagName == U"lang" ) {
result += "<span class=\"dsl_lang\"";
@ -982,7 +944,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
url.setScheme( "gdlookup" );
url.setHost( "localhost" );
auto nodeStr = Utf8::decode( getNodeLink( node ) );
auto nodeStr = Text::toUtf32( getNodeLink( node ) );
normalizeHeadword( nodeStr );
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) );
@ -1006,7 +968,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
url.setScheme( "gdlookup" );
url.setHost( "localhost" );
wstring nodeStr = node.renderAsText();
std::u32string nodeStr = node.renderAsText();
normalizeHeadword( nodeStr );
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) );
@ -1026,11 +988,11 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
result += "<br />";
}
else {
gdWarning( R"(DSL: Unknown tag "%s" with attributes "%s" found in "%s", article "%s".)",
QString::fromStdU32String( node.tagName ).toUtf8().data(),
QString::fromStdU32String( node.tagAttrs ).toUtf8().data(),
getName().c_str(),
QString::fromStdU32String( currentHeadword ).toUtf8().data() );
qWarning( R"(DSL: Unknown tag "%s" with attributes "%s" found in "%s", article "%s".)",
QString::fromStdU32String( node.tagName ).toUtf8().data(),
QString::fromStdU32String( node.tagAttrs ).toUtf8().data(),
getName().c_str(),
QString::fromStdU32String( currentHeadword ).toUtf8().data() );
result += "<span class=\"dsl_unknown\">[" + string( QString::fromStdU32String( node.tagName ).toUtf8().data() );
if ( !node.tagAttrs.empty() ) {
@ -1138,14 +1100,14 @@ void DslDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "Dsl: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Dsl: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "DSL: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "DSL: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( ftsIdxName.c_str() );
}
}
@ -1158,7 +1120,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
vector< char > chunk;
char * articleProps;
wstring articleData;
std::u32string articleData;
{
QMutexLocker _( &idxMutex );
@ -1182,8 +1144,9 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
}
else {
try {
articleData =
Iconv::toWstring( getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
articleData = Iconv::toWstring( getEncodingNameFor( static_cast< Encoding >( idxHeader.dslEncoding ) ),
articleBody,
articleSize );
free( articleBody );
// Strip DSL comments
@ -1199,7 +1162,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
// Skip headword
size_t pos = 0;
wstring articleHeadword, tildeValue;
std::u32string articleHeadword, tildeValue;
// Check if we retrieve insided card
bool insidedCard = isDslWs( articleData.at( 0 ) );
@ -1208,20 +1171,20 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
size_t begin = pos;
pos = articleData.find_first_of( U"\n\r", begin );
if ( pos == wstring::npos ) {
if ( pos == std::u32string::npos ) {
pos = articleData.size();
}
if ( articleHeadword.empty() ) {
// Process the headword
articleHeadword = wstring( articleData, begin, pos - begin );
articleHeadword = std::u32string( articleData, begin, pos - begin );
if ( insidedCard && !articleHeadword.empty() && isDslWs( articleHeadword[ 0 ] ) ) {
// Headword of the insided card
wstring::size_type hpos = articleHeadword.find( L'@' );
std::u32string::size_type hpos = articleHeadword.find( L'@' );
if ( hpos != string::npos ) {
wstring head = Folding::trimWhitespace( articleHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' );
std::u32string head = Folding::trimWhitespace( articleHeadword.substr( hpos + 1 ) );
hpos = head.find( L'~' );
while ( hpos != string::npos ) {
if ( hpos == 0 || head[ hpos ] != L'\\' ) {
break;
@ -1238,7 +1201,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
}
if ( !articleHeadword.empty() ) {
list< wstring > lst;
list< std::u32string > lst;
tildeValue = articleHeadword;
@ -1275,15 +1238,15 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
// Check for begin article text
if ( insidedCard ) {
// Check for next insided headword
wstring::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == wstring::npos ) {
std::u32string::size_type hpos = articleData.find_first_of( U"\n\r", pos );
if ( hpos == std::u32string::npos ) {
hpos = articleData.size();
}
wstring str = wstring( articleData, pos, hpos - pos );
std::u32string str = std::u32string( articleData, pos, hpos - pos );
hpos = str.find( L'@' );
if ( hpos == wstring::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
if ( hpos == std::u32string::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
break;
}
}
@ -1299,17 +1262,17 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
headword = QString::fromStdU32String( articleHeadword );
}
wstring articleText;
std::u32string articleText;
if ( pos != articleData.size() ) {
articleText = wstring( articleData, pos );
articleText = std::u32string( articleData, pos );
}
else {
articleText.clear();
}
if ( !tildeValue.empty() ) {
list< wstring > lst;
list< std::u32string > lst;
processUnsortedParts( tildeValue, false );
expandOptionalParts( tildeValue, &lst );
@ -1415,19 +1378,18 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
class DslArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
DslDictionary & dict;
bool ignoreDiacritics;
QAtomicInt isCancelled;
QSemaphore hasExited;
QFuture< void > f;
public:
DslArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
DslArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
DslDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -1483,7 +1445,7 @@ void DslArticleRequest::run()
// index here.
set< pair< uint32_t, unsigned > > articlesIncluded;
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) {
// Check if we're cancelled occasionally
@ -1494,9 +1456,9 @@ void DslArticleRequest::run()
// Grab that article
wstring tildeValue;
wstring displayedHeadword;
wstring articleBody;
std::u32string tildeValue;
std::u32string displayedHeadword;
std::u32string articleBody;
unsigned headwordIndex;
string articleText, articleAfter;
@ -1567,7 +1529,7 @@ void DslArticleRequest::run()
articleText += articleAfter;
}
catch ( std::exception & ex ) {
gdWarning( "DSL: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
qWarning( "DSL: Failed loading article from \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
articleText =
string( "<span class=\"dsl_article\">" ) + QObject::tr( "Article loading error" ).toStdString() + "</span>";
}
@ -1580,9 +1542,9 @@ void DslArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > DslDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > DslDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -1598,7 +1560,6 @@ class DslResourceRequest: public Dictionary::DataRequest
string resourceName;
QAtomicInt isCancelled;
QSemaphore hasExited;
QFuture< void > f;
public:
@ -1643,7 +1604,7 @@ void DslResourceRequest::run()
string n = dict.getContainingFolder().toStdString() + Utils::Fs::separator() + resourceName;
GD_DPRINTF( "dsl resource name is %s\n", n.c_str() );
qDebug( "dsl resource name is %s", n.c_str() );
try {
try {
@ -1672,7 +1633,7 @@ void DslResourceRequest::run()
if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive
}
}
@ -1695,10 +1656,10 @@ void DslResourceRequest::run()
hasAnyData = true;
}
catch ( std::exception & ex ) {
gdWarning( "DSL: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
qWarning( "DSL: Failed loading resource \"%s\" for \"%s\", reason: %s",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
// Resource not loaded -- we don't set the hasAnyData flag then
}
@ -1745,11 +1706,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
continue;
}
// Make sure it's not an abbreviation file
// Make sure it's not an abbreviation file. extSize of ".dsl" or ".dsl.dz"
int extSize = ( uncompressedDsl ? 4 : 7 );
if ( fileName.size() - extSize >= 5
&& strncasecmp( fileName.c_str() + fileName.size() - extSize - 5, "_abrv", 5 ) == 0 ) {
if ( int extSize = ( uncompressedDsl ? 4 : 7 ); ( fileName.size() >= ( 5 + extSize ) )
&& ( QByteArrayView( fileName ).chopped( extSize ).last( 5 ).compare( "_abrv", Qt::CaseInsensitive ) == 0 ) ) {
// It is, skip it
continue;
}
@ -1802,12 +1762,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
// Building the index
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) );
initializing.indexingDictionary( Text::toUtf8( scanner.getDictionaryName() ) );
gdDebug( "Dsl: Building the index for dictionary: %s\n",
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
qDebug( "Dsl: Building the index for dictionary: %s",
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
@ -1818,19 +1778,19 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( idxHeader );
string dictionaryName = Utf8::encode( scanner.getDictionaryName() );
string dictionaryName = Text::toUtf8( scanner.getDictionaryName() );
idx.write( (uint32_t)dictionaryName.size() );
idx.write( dictionaryName.data(), dictionaryName.size() );
string soundDictName = Utf8::encode( scanner.getSoundDictionaryName() );
string soundDictName = Text::toUtf8( scanner.getSoundDictionaryName() );
if ( !soundDictName.empty() ) {
idxHeader.hasSoundDictionaryName = 1;
idx.write( (uint32_t)soundDictName.size() );
idx.write( soundDictName.data(), soundDictName.size() );
}
idxHeader.dslEncoding = scanner.getEncoding();
idxHeader.dslEncoding = static_cast< uint32_t >( scanner.getEncoding() );
IndexedWords indexedWords;
@ -1844,7 +1804,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
map< string, string > abrv;
wstring curString;
std::u32string curString;
size_t curOffset;
for ( ;; ) {
@ -1856,7 +1816,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
continue;
}
list< wstring > keys;
list< std::u32string > keys;
bool eof = false;
@ -1871,7 +1831,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
expandOptionalParts( curString, &keys );
if ( !abrvScanner.readNextLineWithoutComments( curString, curOffset ) || curString.empty() ) {
gdWarning( "Premature end of file %s\n", abrvFileName.c_str() );
qWarning( "Premature end of file %s", abrvFileName.c_str() );
eof = true;
break;
}
@ -1892,13 +1852,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
// If the string has any dsl markup, we strip it
string value = Utf8::encode( ArticleDom( curString ).root.renderAsText() );
string value = Text::toUtf8( ArticleDom( curString ).root.renderAsText() );
for ( auto & key : keys ) {
unescapeDsl( key );
normalizeHeadword( key );
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
}
}
@ -1910,7 +1870,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
chunks.addToBlock( &sz, sizeof( uint32_t ) );
for ( const auto & i : abrv ) {
// GD_DPRINTF( "%s:%s\n", i->first.c_str(), i->second.c_str() );
// qDebug( "%s:%s", i->first.c_str(), i->second.c_str() );
sz = i.first.size();
chunks.addToBlock( &sz, sizeof( uint32_t ) );
@ -1921,12 +1881,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
}
catch ( std::exception & e ) {
gdWarning( "Error reading abrv file \"%s\", error: %s. Skipping it.\n", abrvFileName.c_str(), e.what() );
qWarning( "Error reading abrv file \"%s\", error: %s. Skipping it.", abrvFileName.c_str(), e.what() );
}
}
bool hasString = false;
wstring curString;
std::u32string curString;
size_t curOffset;
uint32_t articleCount = 0, wordCount = 0;
@ -1951,7 +1911,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// characters are blank, too.
for ( size_t x = 1; x < curString.size(); ++x ) {
if ( !isDslWs( curString[ x ] ) ) {
gdWarning( "Garbage string in %s at offset 0x%lX\n", fileName.c_str(), curOffset );
qWarning( "Garbage string in %s at offset 0x%lX", fileName.c_str(), curOffset );
break;
}
}
@ -1960,20 +1920,20 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Ok, got the headword
list< wstring > allEntryWords;
list< std::u32string > allEntryWords;
processUnsortedParts( curString, true );
expandOptionalParts( curString, &allEntryWords );
uint32_t articleOffset = curOffset;
//GD_DPRINTF( "Headword: %ls\n", curString.c_str() );
//qDebug( "Headword: %ls", curString.c_str() );
// More headwords may follow
for ( ;; ) {
if ( !( hasString = scanner.readNextLineWithoutComments( curString, curOffset ) ) ) {
gdWarning( "Premature end of file %s\n", fileName.c_str() );
qWarning( "Premature end of file %s", fileName.c_str() );
break;
}
@ -2013,10 +1973,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
wordCount += allEntryWords.size();
int insideInsided = 0;
wstring headword;
std::u32string headword;
QList< InsidedCard > insidedCards;
uint32_t offset = curOffset;
QList< wstring > insidedHeadwords;
QList< std::u32string > insidedHeadwords;
unsigned linesInsideCard = 0;
int dogLine = 0;
bool wasEmptyLine = false;
@ -2031,11 +1991,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( !hasString || ( curString.size() && !isDslWs( curString[ 0 ] ) ) ) {
if ( insideInsided ) {
gdWarning( "Unclosed tag '@' at line %i", dogLine );
qWarning( "Unclosed tag '@' at line %i", dogLine );
insidedCards.append( InsidedCard( offset, curOffset - offset, insidedHeadwords ) );
}
if ( noSignificantLines ) {
gdWarning( "Orphan headword at line %i", headwordLine );
qWarning( "Orphan headword at line %i", headwordLine );
}
break;
@ -2049,7 +2009,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
else {
if ( wasEmptyLine && !Folding::applyWhitespaceOnly( curString ).empty() ) {
gdWarning( "Orphan string at line %i", scanner.getLinesRead() - 1 );
qWarning( "Orphan string at line %i", scanner.getLinesRead() - 1 );
}
}
@ -2059,8 +2019,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Find embedded cards
wstring::size_type n = curString.find( L'@' );
if ( n == wstring::npos || curString[ n - 1 ] == L'\\' ) {
std::u32string::size_type n = curString.find( L'@' );
if ( n == std::u32string::npos || curString[ n - 1 ] == L'\\' ) {
if ( insideInsided ) {
linesInsideCard++;
}
@ -2070,7 +2030,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
else {
// Embedded card tag must be placed at first position in line after spaces
if ( !isAtSignFirst( curString ) ) {
gdWarning( "Unescaped '@' symbol at line %i", scanner.getLinesRead() - 1 );
qWarning( "Unescaped '@' symbol at line %i", scanner.getLinesRead() - 1 );
if ( insideInsided ) {
linesInsideCard++;
@ -2160,7 +2120,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// If there was a zip file, index it too
if ( zipFileName.size() ) {
GD_DPRINTF( "Indexing zip file\n" );
qDebug( "Indexing zip file" );
idxHeader.hasZipFile = 1;
@ -2216,7 +2176,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< DslDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "DSL dictionary reading failed: %s:%u, error: %s\n", fileName.c_str(), atLine, e.what() );
qWarning( "DSL dictionary reading failed: %s:%u, error: %s", fileName.c_str(), atLine, e.what() );
}
}

View file

@ -5,9 +5,8 @@
#include "folding.hh"
#include "langcoder.hh"
#include "gddebug.hh"
#include "ufile.hh"
#include "utf8.hh"
#include "text.hh"
#include <exception>
#include <stdio.h>
@ -18,9 +17,8 @@
namespace Dsl {
namespace Details {
using gd::wstring;
using std::list;
using Utf8::Encoding;
using Text::Encoding;
static QMap< int, string > lang_codes = {
{ 1, "en" }, { 1033, "en" }, { 2, "ru" }, { 1049, "ru" }, { 1068, "az" }, { 1025, "ar" }, { 1067, "am" },
@ -41,7 +39,7 @@ string findCodeForDslId( int id )
return lang_codes[ id ];
}
bool isAtSignFirst( wstring const & str )
bool isAtSignFirst( std::u32string const & str )
{
// Test if '@' is first in string except spaces and dsl tags
QRegularExpression reg( R"([ \t]*(?:\[[^\]]+\][ \t]*)*@)", QRegularExpression::PatternOption::CaseInsensitiveOption );
@ -50,13 +48,13 @@ bool isAtSignFirst( wstring const & str )
/////////////// ArticleDom
wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
std::u32string ArticleDom::Node::renderAsText( bool stripTrsTag ) const
{
if ( !isTag ) {
return text;
}
wstring result;
std::u32string result;
for ( const auto & i : *this ) {
if ( !stripTrsTag || i.tagName != U"!trs" ) {
@ -70,17 +68,17 @@ wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
namespace {
/// @return true if @p tagName equals "mN" where N is a digit
bool is_mN( wstring const & tagName )
bool is_mN( std::u32string const & tagName )
{
return tagName.size() == 2 && tagName[ 0 ] == U'm' && iswdigit( tagName[ 1 ] );
}
bool isAnyM( wstring const & tagName )
bool isAnyM( std::u32string const & tagName )
{
return tagName == U"m" || is_mN( tagName );
}
bool checkM( wstring const & dest, wstring const & src )
bool checkM( std::u32string const & dest, std::u32string const & src )
{
return src == U"m" && is_mN( dest );
}
@ -98,8 +96,8 @@ struct MustTagBeClosed
} // unnamed namespace
ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring const & headword_ ):
root( Node::Tag(), wstring(), wstring() ),
ArticleDom::ArticleDom( std::u32string const & str, string const & dictName, std::u32string const & headword_ ):
root( Node::Tag(), std::u32string(), std::u32string() ),
stringPos( str.c_str() ),
lineStartPos( str.c_str() ),
transcriptionCount( 0 ),
@ -119,15 +117,15 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
if ( !atSignFirstInLine() ) {
// Not insided card
if ( dictName.empty() ) {
gdWarning( "Unescaped '@' symbol found" );
qWarning( "Unescaped '@' symbol found" );
}
else {
gdWarning( "Unescaped '@' symbol found in \"%s\"", dictName.c_str() );
qWarning( "Unescaped '@' symbol found in \"%s\"", dictName.c_str() );
}
}
else {
// Insided card
wstring linkTo;
std::u32string linkTo;
nextChar();
for ( ;; nextChar() ) {
if ( ch == L'\n' ) {
@ -143,13 +141,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
linkTo = Folding::trimWhitespace( linkTo );
if ( !linkTo.empty() ) {
list< wstring > allLinkEntries;
list< std::u32string > allLinkEntries;
processUnsortedParts( linkTo, true );
expandOptionalParts( linkTo, &allLinkEntries );
for ( auto entry = allLinkEntries.begin(); entry != allLinkEntries.end(); ) {
if ( !textNode ) {
Node text = Node( Node::Text(), wstring() );
Node text = Node( Node::Text(), std::u32string() );
if ( stack.empty() ) {
root.push_back( text );
@ -169,10 +167,10 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
stack.pop_back();
textNode = 0;
wstring linkText = Folding::trimWhitespace( *entry );
std::u32string linkText = Folding::trimWhitespace( *entry );
ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), U"@", wstring() );
Node link( Node::Tag(), U"@", std::u32string() );
for ( auto & n : nodeDom.root ) {
link.push_back( n );
}
@ -182,13 +180,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
if ( stack.empty() ) {
root.push_back( link );
if ( entry != allLinkEntries.end() ) { // Add line break before next entry
root.push_back( Node( Node::Tag(), U"br", wstring() ) );
root.push_back( Node( Node::Tag(), U"br", std::u32string() ) );
}
}
else {
stack.back()->push_back( link );
if ( entry != allLinkEntries.end() ) {
stack.back()->push_back( Node( Node::Tag(), U"br", wstring() ) );
stack.back()->push_back( Node( Node::Tag(), U"br", std::u32string() ) );
}
}
}
@ -209,8 +207,8 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
if ( ch == L'[' && !escaped ) {
// Beginning of a tag.
bool isClosing;
wstring name;
wstring attrs;
std::u32string name;
std::u32string attrs;
try {
do {
@ -246,16 +244,16 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
}
catch ( std::exception & ex ) {
if ( !dictionaryName.empty() ) {
gdWarning( R"(DSL: Unfinished tag "%s" with attributes "%s" found in "%s", article "%s".)",
QString::fromStdU32String( name ).toUtf8().data(),
QString::fromStdU32String( attrs ).toUtf8().data(),
dictionaryName.c_str(),
QString::fromStdU32String( headword ).toUtf8().data() );
qWarning( R"(DSL: Unfinished tag "%s" with attributes "%s" found in "%s", article "%s".)",
QString::fromStdU32String( name ).toUtf8().data(),
QString::fromStdU32String( attrs ).toUtf8().data(),
dictionaryName.c_str(),
QString::fromStdU32String( headword ).toUtf8().data() );
}
else {
gdWarning( R"(DSL: Unfinished tag "%s" with attributes "%s" found)",
QString::fromStdU32String( name ).toUtf8().data(),
QString::fromStdU32String( attrs ).toUtf8().data() );
qWarning( R"(DSL: Unfinished tag "%s" with attributes "%s" found)",
QString::fromStdU32String( name ).toUtf8().data(),
QString::fromStdU32String( attrs ).toUtf8().data() );
}
throw ex;
@ -331,7 +329,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
nextChar();
} while ( Folding::isWhitespace( ch ) );
wstring linkTo, linkText;
std::u32string linkTo, linkText;
for ( ;; nextChar() ) {
// Is it the end?
@ -374,7 +372,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
processUnsortedParts( linkText, true );
ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), U"ref", wstring() );
Node link( Node::Tag(), U"ref", std::u32string() );
for ( auto & n : nodeDom.root ) {
link.push_back( n );
}
@ -428,7 +426,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
// If there's currently no text node, open one
if ( !textNode ) {
Node text = Node( Node::Text(), wstring() );
Node text = Node( Node::Text(), std::u32string() );
if ( stack.empty() ) {
root.push_back( text );
@ -678,21 +676,21 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
unsigned const unclosedTagCount = 1 + std::count_if( it, stack.end(), MustTagBeClosed() );
if ( dictName.empty() ) {
gdWarning( "Warning: %u tag(s) were unclosed, first tag name \"%s\".\n",
unclosedTagCount,
firstTagName.constData() );
qWarning( "Warning: %u tag(s) were unclosed, first tag name \"%s\".",
unclosedTagCount,
firstTagName.constData() );
}
else {
gdWarning( "Warning: %u tag(s) were unclosed in \"%s\", article \"%s\", first tag name \"%s\".\n",
unclosedTagCount,
dictName.c_str(),
QString::fromStdU32String( headword ).toUtf8().constData(),
firstTagName.constData() );
qWarning( "Warning: %u tag(s) were unclosed in \"%s\", article \"%s\", first tag name \"%s\".",
unclosedTagCount,
dictName.c_str(),
QString::fromStdU32String( headword ).toUtf8().constData(),
firstTagName.constData() );
}
}
}
void ArticleDom::openTag( wstring const & name, wstring const & attrs, list< Node * > & stack )
void ArticleDom::openTag( std::u32string const & name, std::u32string const & attrs, list< Node * > & stack )
{
list< Node > nodesToReopen;
@ -747,7 +745,7 @@ void ArticleDom::openTag( wstring const & name, wstring const & attrs, list< Nod
}
}
void ArticleDom::closeTag( wstring const & name, list< Node * > & stack, bool warn )
void ArticleDom::closeTag( std::u32string const & name, list< Node * > & stack, bool warn )
{
// Find the tag which is to be closed
@ -787,14 +785,14 @@ void ArticleDom::closeTag( wstring const & name, list< Node * > & stack, bool wa
}
else if ( warn ) {
if ( !dictionaryName.empty() ) {
gdWarning( R"(No corresponding opening tag for closing tag "%s" found in "%s", article "%s".)",
QString::fromStdU32String( name ).toUtf8().data(),
dictionaryName.c_str(),
QString::fromStdU32String( headword ).toUtf8().data() );
qWarning( R"(No corresponding opening tag for closing tag "%s" found in "%s", article "%s".)",
QString::fromStdU32String( name ).toUtf8().data(),
dictionaryName.c_str(),
QString::fromStdU32String( headword ).toUtf8().data() );
}
else {
gdWarning( "No corresponding opening tag for closing tag \"%s\" found.",
QString::fromStdU32String( name ).toUtf8().data() );
qWarning( "No corresponding opening tag for closing tag \"%s\" found.",
QString::fromStdU32String( name ).toUtf8().data() );
}
}
}
@ -840,13 +838,13 @@ bool ArticleDom::atSignFirstInLine()
return true;
}
return isAtSignFirst( wstring( lineStartPos ) );
return isAtSignFirst( std::u32string( lineStartPos ) );
}
/////////////// DslScanner
DslScanner::DslScanner( string const & fileName ):
encoding( Utf8::Utf8 ),
encoding( Text::Encoding::Utf8 ),
readBufferPtr( readBuffer ),
readBufferLeft( 0 ),
linesRead( 0 )
@ -877,19 +875,19 @@ DslScanner::DslScanner( string const & fileName ):
guessedEncoding.has_value() ) {
switch ( guessedEncoding.value() ) {
case QStringConverter::Utf8:
encoding = Utf8::Utf8;
encoding = Text::Encoding::Utf8;
break;
case QStringConverter::Utf16LE:
encoding = Utf8::Utf16LE;
encoding = Text::Encoding::Utf16LE;
break;
case QStringConverter::Utf16BE:
encoding = Utf8::Utf16BE;
encoding = Text::Encoding::Utf16BE;
break;
case QStringConverter::Utf32LE:
encoding = Utf8::Utf16LE;
encoding = Text::Encoding::Utf16LE;
break;
case QStringConverter::Utf32BE:
encoding = Utf8::Utf32BE;
encoding = Text::Encoding::Utf32BE;
break;
default:
break;
@ -906,10 +904,10 @@ DslScanner::DslScanner( string const & fileName ):
}
//iconv.reinit( encoding );
lineFeed = Utf8::initLineFeed( encoding );
lineFeed = Text::initLineFeed( encoding );
// We now can use our own readNextLine() function
wstring str;
std::u32string str;
size_t offset;
for ( ;; ) {
@ -947,7 +945,7 @@ DslScanner::DslScanner( string const & fileName ):
size_t beg = str.find_first_of( L'"' );
if ( beg == wstring::npos ) {
if ( beg == std::u32string::npos ) {
throw exMalformedDslFile( fileName );
}
@ -957,7 +955,7 @@ DslScanner::DslScanner( string const & fileName ):
throw exMalformedDslFile( fileName );
}
wstring arg( str, beg + 1, end - beg - 1 );
std::u32string arg( str, beg + 1, end - beg - 1 );
if ( isName ) {
dictionaryName = arg;
@ -975,16 +973,16 @@ DslScanner::DslScanner( string const & fileName ):
// The encoding
if ( !needExactEncoding ) {
// We don't need that!
GD_FDPRINTF( stderr, "Warning: encoding was specified in a Unicode file, ignoring.\n" );
qWarning( "Warning: encoding was specified in a Unicode file, ignoring." );
}
else if ( !arg.compare( U"Latin" ) ) {
encoding = Utf8::Windows1252;
encoding = Text::Encoding::Windows1252;
}
else if ( !arg.compare( U"Cyrillic" ) ) {
encoding = Utf8::Windows1251;
encoding = Text::Encoding::Windows1251;
}
else if ( !arg.compare( U"EasternEuropean" ) ) {
encoding = Utf8::Windows1250;
encoding = Text::Encoding::Windows1250;
}
else {
gzclose( f );
@ -1010,7 +1008,7 @@ DslScanner::~DslScanner() noexcept
gzclose( f );
}
bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_word )
bool DslScanner::readNextLine( std::u32string & out, size_t & offset, bool only_head_word )
{
offset = gztell( f ) - readBufferLeft /*+pos*/;
@ -1037,7 +1035,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
return false;
}
int pos = Utf8::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
int pos = Text::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
if ( pos == -1 ) {
return false;
}
@ -1058,9 +1056,9 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
}
}
bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset, bool only_headword )
bool DslScanner::readNextLineWithoutComments( std::u32string & out, size_t & offset, bool only_headword )
{
wstring str;
std::u32string str;
bool commentToNextLine = false;
size_t currentOffset;
@ -1088,14 +1086,14 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset, bo
/////////////// DslScanner
void processUnsortedParts( wstring & str, bool strip )
void processUnsortedParts( std::u32string & str, bool strip )
{
int refCount = 0;
size_t startPos = 0;
for ( size_t x = 0; x < str.size(); ) {
wchar ch = str[ x ];
char32_t ch = str[ x ];
if ( ch == L'\\' ) {
// Escape code
@ -1121,7 +1119,7 @@ void processUnsortedParts( wstring & str, bool strip )
--refCount;
if ( refCount < 0 ) {
GD_FDPRINTF( stderr, "Warning: an unmatched closing brace was encountered.\n" );
qWarning( "Warning: an unmatched closing brace was encountered." );
refCount = 0;
// But we remove that thing either way
str.erase( x, 1 );
@ -1146,23 +1144,23 @@ void processUnsortedParts( wstring & str, bool strip )
}
if ( strip && refCount ) {
GD_FDPRINTF( stderr, "Warning: unclosed brace(s) encountered.\n" );
qWarning( "Warning: unclosed brace(s) encountered." );
str.erase( startPos );
}
}
void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, bool inside_recurse )
void expandOptionalParts( std::u32string & str, list< std::u32string > * result, size_t x, bool inside_recurse )
{
// if str is too long ,it can never be headwords.
if ( str.size() > 100 ) {
return;
}
list< wstring > expanded;
list< wstring > * headwords;
list< std::u32string > expanded;
list< std::u32string > * headwords;
headwords = inside_recurse ? result : &expanded;
for ( ; x < str.size(); ) {
wchar ch = str[ x ];
char32_t ch = str[ x ];
if ( ch == L'\\' ) {
// Escape code
@ -1175,7 +1173,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
int refCount = 1;
for ( size_t y = x + 1; y < str.size(); ++y ) {
wchar ch = str[ y ];
char32_t ch = str[ y ];
if ( ch == L'\\' ) {
// Escape code
@ -1191,7 +1189,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
if ( y != x + 1 ) // Only do for non-empty cases
{
wstring removed( str, 0, x );
std::u32string removed( str, 0, x );
removed.append( str, y + 1, str.size() - y - 1 );
expandOptionalParts( removed, headwords, x, true );
@ -1205,7 +1203,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
if ( refCount && x != str.size() - 1 ) {
// Closing paren not found? Chop it.
wstring removed( str, 0, x );
std::u32string removed( str, 0, x );
// Limit the amount of results to avoid excessive resource consumption
if ( headwords->size() < 32 ) {
@ -1243,10 +1241,10 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
}
}
static const wstring openBraces( U"{{" );
static const wstring closeBraces( U"}}" );
static const std::u32string openBraces( U"{{" );
static const std::u32string closeBraces( U"}}" );
void stripComments( wstring & str, bool & nextLine )
void stripComments( std::u32string & str, bool & nextLine )
{
string::size_type n = 0, n2 = 0;
@ -1270,9 +1268,9 @@ void stripComments( wstring & str, bool & nextLine )
}
}
void expandTildes( wstring & str, wstring const & tildeReplacement )
void expandTildes( std::u32string & str, std::u32string const & tildeReplacement )
{
wstring tildeValue = Folding::trimWhitespace( tildeReplacement );
std::u32string tildeValue = Folding::trimWhitespace( tildeReplacement );
for ( size_t x = 0; x < str.size(); ) {
if ( str[ x ] == L'\\' ) {
x += 2;
@ -1295,7 +1293,7 @@ void expandTildes( wstring & str, wstring const & tildeReplacement )
}
}
void unescapeDsl( wstring & str )
void unescapeDsl( std::u32string & str )
{
for ( size_t x = 0; x < str.size(); ++x ) {
if ( str[ x ] == L'\\' ) {
@ -1304,7 +1302,7 @@ void unescapeDsl( wstring & str )
}
}
void normalizeHeadword( wstring & str )
void normalizeHeadword( std::u32string & str )
{
for ( size_t x = str.size(); x-- > 1; ) // >1 -- Don't test the first char
{
@ -1332,7 +1330,7 @@ void normalizeHeadword( wstring & str )
}
namespace {
void cutEnding( wstring & where, wstring const & ending )
void cutEnding( std::u32string & where, std::u32string const & ending )
{
if ( where.size() > ending.size() && where.compare( where.size() - ending.size(), ending.size(), ending ) == 0 ) {
where.erase( where.size() - ending.size() );
@ -1340,17 +1338,17 @@ void cutEnding( wstring & where, wstring const & ending )
}
} // namespace
quint32 dslLanguageToId( wstring const & name )
quint32 dslLanguageToId( std::u32string const & name )
{
static wstring newSp( U"newspelling" );
static wstring st( U"standard" );
static wstring ms( U"modernsort" );
static wstring ts( U"traditionalsort" );
static wstring prc( U"prc" );
static std::u32string newSp( U"newspelling" );
static std::u32string st( U"standard" );
static std::u32string ms( U"modernsort" );
static std::u32string ts( U"traditionalsort" );
static std::u32string prc( U"prc" );
// Any of those endings are to be removed
wstring nameStripped = Folding::apply( name );
std::u32string nameStripped = Folding::apply( name );
cutEnding( nameStripped, newSp );
cutEnding( nameStripped, st );

View file

@ -11,23 +11,21 @@
#include "iconv.hh"
#include <QtCore5Compat/QTextCodec>
#include <QByteArray>
#include "utf8.hh"
#include "text.hh"
// Implementation details for Dsl, not part of its interface
namespace Dsl {
namespace Details {
using std::string;
using gd::wstring;
using gd::wchar;
using std::list;
using std::vector;
using Utf8::Encoding;
using Utf8::LineFeed;
using Text::Encoding;
using Text::LineFeed;
string findCodeForDslId( int id );
bool isAtSignFirst( wstring const & str );
bool isAtSignFirst( std::u32string const & str );
/// Parses the DSL language, representing it in its structural DOM form.
struct ArticleDom
@ -37,23 +35,23 @@ struct ArticleDom
bool isTag; // true if it is a tag with subnodes, false if it's a leaf text
// data.
// Those are only used if isTag is true
wstring tagName;
wstring tagAttrs;
wstring text; // This is only used if isTag is false
std::u32string tagName;
std::u32string tagAttrs;
std::u32string text; // This is only used if isTag is false
class Text
{};
class Tag
{};
Node( Tag, wstring const & name, wstring const & attrs ):
Node( Tag, std::u32string const & name, std::u32string const & attrs ):
isTag( true ),
tagName( name ),
tagAttrs( attrs )
{
}
Node( Text, wstring const & text_ ):
Node( Text, std::u32string const & text_ ):
isTag( false ),
text( text_ )
{
@ -61,30 +59,32 @@ struct ArticleDom
/// Concatenates all childen text nodes recursively to form all text
/// the node contains stripped of any markup.
wstring renderAsText( bool stripTrsTag = false ) const;
std::u32string renderAsText( bool stripTrsTag = false ) const;
};
/// Does the parse at construction. Refer to the 'root' member variable
/// afterwards.
explicit ArticleDom( wstring const &, string const & dictName = string(), wstring const & headword_ = wstring() );
explicit ArticleDom( std::u32string const &,
string const & dictName = string(),
std::u32string const & headword_ = std::u32string() );
/// Root of DOM's tree
Node root;
private:
void openTag( wstring const & name, wstring const & attr, list< Node * > & stack );
void openTag( std::u32string const & name, std::u32string const & attr, list< Node * > & stack );
void closeTag( wstring const & name, list< Node * > & stack, bool warn = true );
void closeTag( std::u32string const & name, list< Node * > & stack, bool warn = true );
bool atSignFirstInLine();
wchar const *stringPos, *lineStartPos;
char32_t const *stringPos, *lineStartPos;
class eot: std::exception
{};
wchar ch;
char32_t ch;
bool escaped;
unsigned transcriptionCount; // >0 = inside a [t] tag
unsigned mediaCount; // >0 = inside a [s] tag
@ -93,7 +93,7 @@ private:
/// Information for diagnostic purposes
string dictionaryName;
wstring headword;
std::u32string headword;
};
/// Opens the .dsl or .dsl.dz file and allows line-by-line reading. Auto-detects
@ -103,9 +103,9 @@ class DslScanner
gzFile f;
Encoding encoding;
QTextCodec * codec;
wstring dictionaryName;
wstring langFrom, langTo;
wstring soundDictionary;
std::u32string dictionaryName;
std::u32string langFrom, langTo;
std::u32string soundDictionary;
char readBuffer[ 65536 ];
char * readBufferPtr;
LineFeed lineFeed;
@ -132,25 +132,25 @@ public:
}
/// Returns the dictionary's name, as was read from file's headers.
wstring const & getDictionaryName() const
std::u32string const & getDictionaryName() const
{
return dictionaryName;
}
/// Returns the dictionary's source language, as was read from file's headers.
wstring const & getLangFrom() const
std::u32string const & getLangFrom() const
{
return langFrom;
}
/// Returns the dictionary's target language, as was read from file's headers.
wstring const & getLangTo() const
std::u32string const & getLangTo() const
{
return langTo;
}
/// Returns the preferred external dictionary with sounds, as was read from file's headers.
wstring const & getSoundDictionaryName() const
std::u32string const & getSoundDictionaryName() const
{
return soundDictionary;
}
@ -161,10 +161,10 @@ public:
/// If end of file is reached, false is returned.
/// Reading begins from the first line after the headers (ones which start
/// with #).
bool readNextLine( wstring &, size_t & offset, bool only_head_word = false );
bool readNextLine( std::u32string &, size_t & offset, bool only_head_word = false );
/// Similar readNextLine but strip all DSL comments {{...}}
bool readNextLineWithoutComments( wstring &, size_t & offset, bool only_headword = false );
bool readNextLineWithoutComments( std::u32string &, size_t & offset, bool only_headword = false );
/// Returns the number of lines read so far from the file.
unsigned getLinesRead() const
@ -180,32 +180,35 @@ public:
/// This function either removes parts of string enclosed in braces, or leaves
/// them intact. The braces themselves are removed always, though.
void processUnsortedParts( wstring & str, bool strip );
void processUnsortedParts( std::u32string & str, bool strip );
/// Expands optional parts of a headword (ones marked with parentheses),
/// producing all possible combinations where they are present or absent.
void expandOptionalParts( wstring & str, list< wstring > * result, size_t x = 0, bool inside_recurse = false );
void expandOptionalParts( std::u32string & str,
list< std::u32string > * result,
size_t x = 0,
bool inside_recurse = false );
/// Expands all unescaped tildes, inserting tildeReplacement text instead of
/// them.
void expandTildes( wstring & str, wstring const & tildeReplacement );
void expandTildes( std::u32string & str, std::u32string const & tildeReplacement );
/// Unescapes any escaped chars. Be sure to handle all their special meanings
/// before unescaping them.
void unescapeDsl( wstring & str );
void unescapeDsl( std::u32string & str );
/// Normalizes the headword. Currently turns any sequences of consecutive spaces
/// into a single space.
void normalizeHeadword( wstring & );
void normalizeHeadword( std::u32string & );
/// Strip DSL {{...}} comments
void stripComments( wstring &, bool & );
void stripComments( std::u32string &, bool & );
inline size_t DslScanner::distanceToBytes( size_t x ) const
{
switch ( encoding ) {
case Utf8::Utf16LE:
case Utf8::Utf16BE:
case Encoding::Utf16LE:
case Encoding::Utf16BE:
return x * 2;
default:
return x;
@ -214,7 +217,7 @@ inline size_t DslScanner::distanceToBytes( size_t x ) const
/// Converts the given language name taken from Dsl header (i.e. getLangFrom(),
/// getLangTo()) to its proper language id.
quint32 dslLanguageToId( wstring const & name );
quint32 dslLanguageToId( std::u32string const & name );
} // namespace Details
} // namespace Dsl

View file

@ -1,26 +1,18 @@
/* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_EPWING_SUPPORT
#include <QDir>
#ifdef EPWING_SUPPORT
#include "epwing_book.hh"
#include "epwing.hh"
#include <QByteArray>
#include <QDir>
#include <QRunnable>
#include <QSemaphore>
#include <map>
#include <QtConcurrent>
#include <QtConcurrentRun>
#include <set>
#include <string>
#include "btreeidx.hh"
#include "folding.hh"
#include "gddebug.hh"
#include "chunkedstorage.hh"
#include "wstring_qt.hh"
#include "filetype.hh"
#include "ftshelpers.hh"
#include "globalregex.hh"
@ -37,7 +29,7 @@ using std::multimap;
using std::vector;
using std::set;
using std::pair;
using gd::wstring;
using std::u32string;
namespace {
@ -66,7 +58,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -82,7 +74,6 @@ class EpwingDictionary: public BtreeIndexing::BtreeDictionary
QMutex idxMutex;
File::Index idx;
IdxHeader idxHeader;
string bookName;
ChunkedStorage::Reader chunks;
Epwing::Book::EpwingBook eBook;
QString cacheDirectory;
@ -96,21 +87,6 @@ public:
~EpwingDictionary();
string getName() noexcept override
{
return bookName;
}
void setName( string _name ) noexcept override
{
bookName = _name;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -133,10 +109,10 @@ public:
QString const & getDescription() override;
void getHeadwordPos( wstring const & word_, QList< int > & pg, QList< int > & off );
void getHeadwordPos( u32string const & word_, QList< int > & pg, QList< int > & off );
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -158,16 +134,16 @@ public:
&& ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize );
}
static int japaneseWriting( gd::wchar ch );
static int japaneseWriting( char32_t ch );
static bool isSign( gd::wchar ch );
static bool isSign( char32_t ch );
static bool isJapanesePunctiation( gd::wchar ch );
static bool isJapanesePunctiation( char32_t ch );
sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long ) override;
sptr< Dictionary::WordSearchRequest > prefixMatch( u32string const &, unsigned long ) override;
sptr< Dictionary::WordSearchRequest >
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ) override;
stemmedMatch( u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ) override;
protected:
@ -180,7 +156,7 @@ private:
quint32 address, string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & word ) override;
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( u32string const & word ) override;
void loadArticleNextPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
void
@ -219,7 +195,7 @@ EpwingDictionary::EpwingDictionary( string const & id,
vector< string > const & dictionaryFiles,
int subBook ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset )
{
@ -227,7 +203,7 @@ EpwingDictionary::EpwingDictionary( string const & id,
idx.seek( sizeof( idxHeader ) );
if ( data.size() > 0 ) {
idx.read( &data.front(), idxHeader.nameSize );
bookName = string( &data.front(), idxHeader.nameSize );
dictionaryName = string( &data.front(), idxHeader.nameSize );
}
// Initialize eBook
@ -431,16 +407,14 @@ void EpwingDictionary::makeFTSIndex( QAtomicInt & isCancelled )
return;
gdDebug( "Epwing: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Epwing: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "Epwing: Failed building full-text search index for \"%s\", reason: %s\n",
getName().c_str(),
ex.what() );
qWarning( "Epwing: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( QString::fromStdString( ftsIdxName ) );
}
}
@ -475,7 +449,7 @@ void EpwingDictionary::getArticleText( uint32_t articleAddress, QString & headwo
class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest
{
wstring str;
u32string str;
EpwingDictionary & dict;
QAtomicInt isCancelled;
@ -483,7 +457,7 @@ class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest
public:
EpwingHeadwordsRequest( wstring const & word_, EpwingDictionary & dict_ ):
EpwingHeadwordsRequest( u32string const & word_, EpwingDictionary & dict_ ):
str( word_ ),
dict( dict_ )
{
@ -559,7 +533,7 @@ void EpwingHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym( u32string const & word )
{
return synonymSearchEnabled ? std::make_shared< EpwingHeadwordsRequest >( word, *this ) :
Class::findHeadwordsForSynonym( word );
@ -568,8 +542,8 @@ sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym(
class EpwingArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
u32string word;
vector< u32string > alts;
EpwingDictionary & dict;
bool ignoreDiacritics;
@ -578,8 +552,8 @@ class EpwingArticleRequest: public Dictionary::DataRequest
public:
EpwingArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
EpwingArticleRequest( u32string const & word_,
vector< u32string > const & alts_,
EpwingDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -594,10 +568,10 @@ public:
void run();
void getBuiltInArticle( wstring const & word_,
void getBuiltInArticle( u32string const & word_,
QList< int > & pages,
QList< int > & offsets,
multimap< wstring, pair< string, string > > & mainArticles );
multimap< u32string, pair< string, string > > & mainArticles );
void cancel() override
{
@ -627,13 +601,13 @@ void EpwingArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
@ -667,11 +641,11 @@ void EpwingArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics )
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
multimap< wstring, pair< string, string > > & mapToUse =
multimap< u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -696,7 +670,7 @@ void EpwingArticleRequest::run()
string result = "<div class=\"epwing_article\">";
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += "<h3>";
@ -745,10 +719,10 @@ void EpwingArticleRequest::run()
finish();
}
void EpwingArticleRequest::getBuiltInArticle( wstring const & word_,
void EpwingArticleRequest::getBuiltInArticle( u32string const & word_,
QList< int > & pages,
QList< int > & offsets,
multimap< wstring, pair< string, string > > & mainArticles )
multimap< u32string, pair< string, string > > & mainArticles )
{
try {
string headword, articleText;
@ -782,7 +756,7 @@ void EpwingArticleRequest::getBuiltInArticle( wstring const & word_,
}
}
void EpwingDictionary::getHeadwordPos( wstring const & word_, QList< int > & pg, QList< int > & off )
void EpwingDictionary::getHeadwordPos( u32string const & word_, QList< int > & pg, QList< int > & off )
{
try {
QMutexLocker _( &eBook.getLibMutex() );
@ -793,9 +767,9 @@ void EpwingDictionary::getHeadwordPos( wstring const & word_, QList< int > & pg,
}
}
sptr< Dictionary::DataRequest > EpwingDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > EpwingDictionary::getArticle( u32string const & word,
vector< u32string > const & alts,
u32string const &,
bool ignoreDiacritics )
{
@ -879,10 +853,10 @@ void EpwingResourceRequest::run()
}
}
catch ( std::exception & ex ) {
gdWarning( "Epwing: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
qWarning( "Epwing: Failed loading resource \"%s\" for \"%s\", reason: %s",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
// Resource not loaded -- we don't set the hasAnyData flag then
}
@ -908,7 +882,7 @@ sptr< Dictionary::DataRequest > EpwingDictionary::getSearchResults( QString cons
ignoreDiacritics );
}
int EpwingDictionary::japaneseWriting( gd::wchar ch )
int EpwingDictionary::japaneseWriting( char32_t ch )
{
if ( ( ch >= 0x30A0 && ch <= 0x30FF ) || ( ch >= 0x31F0 && ch <= 0x31FF ) || ( ch >= 0x3200 && ch <= 0x32FF )
|| ( ch >= 0xFF00 && ch <= 0xFFEF ) || ( ch == 0x1B000 ) )
@ -921,7 +895,7 @@ int EpwingDictionary::japaneseWriting( gd::wchar ch )
return 0;
}
bool EpwingDictionary::isSign( gd::wchar ch )
bool EpwingDictionary::isSign( char32_t ch )
{
switch ( ch ) {
case 0x002B: // PLUS SIGN
@ -941,7 +915,7 @@ bool EpwingDictionary::isSign( gd::wchar ch )
}
}
bool EpwingDictionary::isJapanesePunctiation( gd::wchar ch )
bool EpwingDictionary::isJapanesePunctiation( char32_t ch )
{
return ch >= 0x3000 && ch <= 0x303F;
}
@ -955,7 +929,7 @@ class EpwingWordSearchRequest: public BtreeIndexing::BtreeWordSearchRequest
public:
EpwingWordSearchRequest( EpwingDictionary & dict_,
wstring const & str_,
u32string const & str_,
unsigned minLength_,
int maxSuffixVariation_,
bool allowMiddleMatches_,
@ -1002,13 +976,13 @@ void EpwingWordSearchRequest::findMatches()
finish();
}
sptr< Dictionary::WordSearchRequest > EpwingDictionary::prefixMatch( wstring const & str, unsigned long maxResults )
sptr< Dictionary::WordSearchRequest > EpwingDictionary::prefixMatch( u32string const & str, unsigned long maxResults )
{
return std::make_shared< EpwingWordSearchRequest >( *this, str, 0, -1, true, maxResults );
}
sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( wstring const & str,
sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( u32string const & str,
unsigned minLength,
unsigned maxSuffixVariation,
unsigned long maxResults )
@ -1047,20 +1021,20 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
chunks.addToBlock( &head.page, sizeof( head.page ) );
chunks.addToBlock( &head.offset, sizeof( head.offset ) );
wstring hw = head.headword.toStdU32String();
u32string hw = head.headword.toStdU32String();
indexedWords.addWord( hw, offset );
wordCount++;
articleCount++;
vector< wstring > words;
vector< u32string > words;
// Parse combined kanji/katakana/hiragana headwords
int w_prev = 0;
wstring word;
for ( wstring::size_type n = 0; n < hw.size(); n++ ) {
gd::wchar ch = hw[ n ];
u32string word;
for ( u32string::size_type n = 0; n < hw.size(); n++ ) {
char32_t ch = hw[ n ];
if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isSign( ch )
|| EpwingDictionary::isJapanesePunctiation( ch ) )
@ -1070,7 +1044,7 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
if ( w > 0 ) {
// Store only separated words
gd::wchar ch_prev = 0;
char32_t ch_prev = 0;
if ( n )
ch_prev = hw[ n - 1 ];
bool needStore = ( n == 0 || Folding::isPunct( ch_prev ) || Folding::isWhitespace( ch_prev )
@ -1078,7 +1052,7 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
word.push_back( ch );
w_prev = w;
wstring::size_type i;
u32string::size_type i;
for ( i = n + 1; i < hw.size(); i++ ) {
ch = hw[ i ];
if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isJapanesePunctiation( ch ) )
@ -1156,7 +1130,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
subBooksNumber = dict.setBook( mainDirectory );
}
catch ( std::exception & e ) {
gdWarning( "Epwing dictionary initializing failed: %s, error: %s\n", mainDirectory.c_str(), e.what() );
qWarning( "Epwing dictionary initializing failed: %s, error: %s", mainDirectory.c_str(), e.what() );
continue;
}
@ -1191,13 +1165,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
string indexFile = indicesDir + dictId;
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
gdDebug( "Epwing: Building the index for dictionary in directory %s\n", dir.toUtf8().data() );
qDebug( "Epwing: Building the index for dictionary in directory %s", dir.toUtf8().data() );
QString str = dict.title();
QByteArray nameData = str.toUtf8();
initializing.indexingDictionary( nameData.data() );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader{};
@ -1271,7 +1245,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< EpwingDictionary >( dictId, indexFile, dictFiles, sb ) );
}
catch ( std::exception & e ) {
gdWarning( "Epwing dictionary initializing failed: %s, error: %s\n", dir.toUtf8().data(), e.what() );
qWarning( "Epwing dictionary initializing failed: %s, error: %s", dir.toUtf8().data(), e.what() );
continue;
}
}

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
#include "epwing_book.hh"
@ -9,11 +9,8 @@
#include <QTextStream>
#include <QTextDocumentFragment>
#include <QHash>
#include "gddebug.hh"
#include "audiolink.hh"
#include "wstring.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "folding.hh"
#include "epwing_charmap.hh"
#include "htmlescape.hh"
@ -577,7 +574,7 @@ QString EpwingBook::createCacheDir( QString const & dirName )
QFileInfo info( mainCacheDir );
if ( !info.exists() || !info.isDir() ) {
if ( !dir.mkdir( mainCacheDir ) ) {
gdWarning( "Epwing: can't create cache directory \"%s\"", mainCacheDir.toUtf8().data() );
qWarning( "Epwing: can't create cache directory \"%s\"", mainCacheDir.toUtf8().data() );
return {};
}
}
@ -586,7 +583,7 @@ QString EpwingBook::createCacheDir( QString const & dirName )
info = QFileInfo( cacheDir );
if ( !info.exists() || !info.isDir() ) {
if ( !dir.mkdir( cacheDir ) ) {
gdWarning( "Epwing: can't create cache directory \"%s\"", cacheDir.toUtf8().data() );
qWarning( "Epwing: can't create cache directory \"%s\"", cacheDir.toUtf8().data() );
return {};
}
}
@ -1137,7 +1134,7 @@ void EpwingBook::fixHeadword( QString & headword )
// return;
//}
gd::wstring folded = Folding::applyPunctOnly( fixed.toStdU32String() );
std::u32string folded = Folding::applyPunctOnly( fixed.toStdU32String() );
//fixed = QString::fromStdU32String( folded );
//if( isHeadwordCorrect( fixed ) )
@ -1263,7 +1260,7 @@ const char * EpwingBook::beginDecoration( unsigned int code )
str = "<sup>";
break;
default:
gdWarning( "Epwing: Unknown decoration code %i", code );
qWarning( "Epwing: Unknown decoration code %i", code );
code = UNKNOWN;
break;
}
@ -1285,7 +1282,7 @@ const char * EpwingBook::endDecoration( unsigned int code )
storedCode = decorationStack.pop();
if ( storedCode != code ) {
gdWarning( "Epwing: tags mismatch detected" );
qWarning( "Epwing: tags mismatch detected" );
if ( storedCode == UNKNOWN )
storedCode = code;
}
@ -1405,7 +1402,7 @@ QByteArray EpwingBook::handleColorImage( EB_Hook_Code code, const unsigned int *
EB_Error_Code ret = eb_set_binary_color_graphic( &book, &pos );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_set_binary_color_graphic", ret );
gdWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
return QByteArray();
}
@ -1443,7 +1440,7 @@ QByteArray EpwingBook::handleColorImage( EB_Hook_Code code, const unsigned int *
ret = eb_read_binary( &book, BinaryBufferSize, buffer.data(), &length );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_read_binary", ret );
gdWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
break;
}
@ -1480,7 +1477,7 @@ QByteArray EpwingBook::handleMonoImage( EB_Hook_Code code, const unsigned int *
EB_Error_Code ret = eb_set_binary_mono_graphic( &book, &pos, monoWidth, monoHeight );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_set_binary_mono_graphic", ret );
gdWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
return QByteArray();
}
@ -1515,7 +1512,7 @@ QByteArray EpwingBook::handleMonoImage( EB_Hook_Code code, const unsigned int *
ret = eb_read_binary( &book, BinaryBufferSize, buffer.data(), &length );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_read_binary", ret );
gdWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
break;
}
@ -1585,7 +1582,7 @@ QByteArray EpwingBook::handleWave( EB_Hook_Code code, const unsigned int * argv
EB_Error_Code ret = eb_read_binary( &book, BinaryBufferSize, buffer.data(), &length );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_read_binary", ret );
gdWarning( "Epwing sound retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing sound retrieve error: %s", error_string.toUtf8().data() );
break;
}
@ -1652,7 +1649,7 @@ QByteArray EpwingBook::handleMpeg( EB_Hook_Code code, const unsigned int * argv
EB_Error_Code ret = eb_read_binary( &book, BinaryBufferSize, buffer.data(), &length );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_read_binary", ret );
gdWarning( "Epwing movie retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing movie retrieve error: %s", error_string.toUtf8().data() );
break;
}
@ -1720,7 +1717,7 @@ QByteArray EpwingBook::handleNarrowFont( const unsigned int * argv, bool text_on
EB_Error_Code ret = eb_narrow_font_character_bitmap( &book, *argv, bitmap );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_narrow_font_character_bitmap", ret );
gdWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
return QByteArray( "?" );
}
@ -1729,7 +1726,7 @@ QByteArray EpwingBook::handleNarrowFont( const unsigned int * argv, bool text_on
ret = eb_bitmap_to_png( bitmap, 8, 16, buff, &nlen );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_bitmap_to_png", ret );
gdWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
return QByteArray( "?" );
}
@ -1784,7 +1781,7 @@ QByteArray EpwingBook::handleWideFont( const unsigned int * argv, bool text_only
EB_Error_Code ret = eb_wide_font_character_bitmap( &book, *argv, bitmap );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_wide_font_character_bitmap", ret );
gdWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
return QByteArray( "?" );
}
@ -1793,7 +1790,7 @@ QByteArray EpwingBook::handleWideFont( const unsigned int * argv, bool text_only
ret = eb_bitmap_to_png( bitmap, 16, 16, buff, &wlen );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_bitmap_to_png", ret );
gdWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
qWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
return QByteArray( "?" );
}
@ -1873,14 +1870,14 @@ bool EpwingBook::getMatches( QString word, QList< QString > & matches )
EB_Error_Code ret = eb_search_word( &book, bword.data() );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_search_word", ret );
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
return false;
}
ret = eb_hit_list( &book, 10, hits, &hitCount );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_hit_list", ret );
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
return false;
}
}
@ -1889,14 +1886,14 @@ bool EpwingBook::getMatches( QString word, QList< QString > & matches )
EB_Error_Code ret = eb_search_word( &book, bword2.data() );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_search_word", ret );
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
return false;
}
ret = eb_hit_list( &book, 10, hits, &hitCount );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_hit_list", ret );
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
return false;
}
}
@ -1944,14 +1941,14 @@ bool EpwingBook::getArticlePos( QString word, QList< int > & pages, QList< int >
EB_Error_Code ret = eb_search_exactword( &book, bword.data() );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_search_word", ret );
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
return false;
}
ret = eb_hit_list( &book, HitsBufferSize, hits, &hitCount );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_hit_list", ret );
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
return false;
}
}
@ -1960,14 +1957,14 @@ bool EpwingBook::getArticlePos( QString word, QList< int > & pages, QList< int >
EB_Error_Code ret = eb_search_exactword( &book, bword2.data() );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_search_word", ret );
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
return false;
}
ret = eb_hit_list( &book, HitsBufferSize, hits, &hitCount );
if ( ret != EB_SUCCESS ) {
setErrorString( "eb_hit_list", ret );
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
return false;
}
}
@ -1995,4 +1992,4 @@ QMutex EpwingBook::libMutex;
} // namespace Epwing
#endif
#endif

View file

@ -18,8 +18,12 @@
#include <QString>
#include <QtCore5Compat/QTextCodec>
#ifdef _MSC_VER
#include <stub_msvc.h>
// POSIX symbol unavailable on Windows needed for eb headers
#ifdef Q_OS_WIN
#ifndef _SSIZE_T
#define _SSIZE_T
#define ssize_t long
#endif
#endif
#include <eb/eb.h>

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
#include "epwing_charmap.hh"

View file

@ -2,15 +2,13 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "forvo.hh"
#include "wstring_qt.hh"
#include <QNetworkAccessManager>
#include <QNetworkReply>
#include <QtXml>
#include <list>
#include "audiolink.hh"
#include "htmlescape.hh"
#include "utf8.hh"
#include "gddebug.hh"
#include "text.hh"
namespace Forvo {
@ -20,7 +18,6 @@ namespace {
class ForvoDictionary: public Dictionary::Class
{
string name;
QString apiKey, languageCode;
QNetworkAccessManager & netMgr;
@ -32,22 +29,13 @@ public:
QString const & languageCode_,
QNetworkAccessManager & netMgr_ ):
Dictionary::Class( id, vector< string >() ),
name( name_ ),
apiKey( apiKey_ ),
languageCode( languageCode_ ),
netMgr( netMgr_ )
{
dictionaryName = name_;
}
string getName() noexcept override
{
return name;
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
@ -59,7 +47,7 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) override
sptr< WordSearchRequest > prefixMatch( std::u32string const & /*word*/, unsigned long /*maxResults*/ ) override
{
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -68,7 +56,8 @@ public:
return sr;
}
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
protected:
@ -100,8 +89,8 @@ class ForvoArticleRequest: public Dictionary::DataRequest
public:
ForvoArticleRequest( wstring const & word,
vector< wstring > const & alts,
ForvoArticleRequest( std::u32string const & word,
vector< std::u32string > const & alts,
QString const & apiKey_,
QString const & languageCode_,
string const & dictionaryId_,
@ -111,14 +100,16 @@ public:
private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
private slots:
virtual void requestFinished( QNetworkReply * );
};
sptr< DataRequest >
ForvoDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool )
sptr< DataRequest > ForvoDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool )
{
if ( word.size() > 80 || apiKey.isEmpty() ) {
@ -148,8 +139,8 @@ void ForvoArticleRequest::cancel()
finish();
}
ForvoArticleRequest::ForvoArticleRequest( wstring const & str,
vector< wstring > const & alts,
ForvoArticleRequest::ForvoArticleRequest( std::u32string const & str,
vector< std::u32string > const & alts,
QString const & apiKey_,
QString const & languageCode_,
string const & dictionaryId_,
@ -167,9 +158,9 @@ ForvoArticleRequest::ForvoArticleRequest( wstring const & str,
}
}
void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const & str )
void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, std::u32string const & str )
{
gdDebug( "Forvo: requesting article %s\n", QString::fromStdU32String( str ).toUtf8().data() );
qDebug( "Forvo: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() );
QString key = apiKey;
@ -184,16 +175,16 @@ void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const &
+ "/language/" + languageCode + "/order/rate-desc" )
.toUtf8() );
// GD_DPRINTF( "req: %s\n", reqUrl.toEncoded().data() );
// qDebug( "req: %s", reqUrl.toEncoded().data() );
sptr< QNetworkReply > netReply = std::shared_ptr< QNetworkReply >( mgr.get( QNetworkRequest( reqUrl ) ) );
netReplies.push_back( NetReply( netReply, Utf8::encode( str ) ) );
netReplies.push_back( NetReply( netReply, Text::toUtf8( str ) ) );
}
void ForvoArticleRequest::requestFinished( QNetworkReply * r )
{
GD_DPRINTF( "Finished.\n" );
qDebug( "Finished." );
if ( isFinished() ) { // Was cancelled
return;
@ -232,7 +223,7 @@ void ForvoArticleRequest::requestFinished( QNetworkReply * r )
QString( tr( "XML parse error: %1 at %2,%3" ).arg( errorStr ).arg( errorLine ).arg( errorColumn ) ) );
}
else {
// GD_DPRINTF( "%s\n", dd.toByteArray().data() );
// qDebug( "%s", dd.toByteArray().data() );
QDomNode items = dd.namedItem( "items" );
@ -335,7 +326,7 @@ void ForvoArticleRequest::requestFinished( QNetworkReply * r )
setErrorString( text );
}
}
GD_DPRINTF( "done.\n" );
qDebug( "done." );
}
else {
setErrorString( netReply->errorString() );

View file

@ -8,40 +8,27 @@
#include "ufile.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "gddebug.hh"
#include "utf8.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "langcoder.hh"
#include "dictzip.hh"
#include "indexedzip.hh"
#include "ftshelpers.hh"
#include "htmlescape.hh"
#include "filetype.hh"
#include "tiff.hh"
#include "audiolink.hh"
#include <QString>
#include <QSemaphore>
#include <QThreadPool>
#include <QAtomicInt>
// For TIFF conversion
#include <QImage>
#include <QByteArray>
#include <QBuffer>
#include <QDir>
#include <QRegularExpression>
#include <QtCore5Compat/QTextCodec>
#include <string>
#include <list>
#include <map>
#include <set>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
namespace Gls {
@ -51,14 +38,12 @@ using std::set;
using std::multimap;
using std::pair;
using gd::wstring;
using gd::wchar;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;
using Utf8::Encoding;
using Utf8::LineFeed;
using Text::Encoding;
using Text::LineFeed;
/////////////// GlsScanner
@ -67,9 +52,9 @@ class GlsScanner
gzFile f;
Encoding encoding;
QTextCodec * codec;
wstring dictionaryName;
wstring dictionaryDecription, dictionaryAuthor;
wstring langFrom, langTo;
std::u32string dictionaryName;
std::u32string dictionaryDecription, dictionaryAuthor;
std::u32string langFrom, langTo;
char readBuffer[ 10000 ];
char * readBufferPtr;
size_t readBufferLeft;
@ -94,31 +79,31 @@ public:
}
/// Returns the dictionary's name, as was read from file's headers.
wstring const & getDictionaryName() const
std::u32string const & getDictionaryName() const
{
return dictionaryName;
}
/// Returns the dictionary's author, as was read from file's headers.
wstring const & getDictionaryAuthor() const
std::u32string const & getDictionaryAuthor() const
{
return dictionaryAuthor;
}
/// Returns the dictionary's description, as was read from file's headers.
wstring const & getDictionaryDescription() const
std::u32string const & getDictionaryDescription() const
{
return dictionaryDecription;
}
/// Returns the dictionary's source language, as was read from file's headers.
wstring const & getLangFrom() const
std::u32string const & getLangFrom() const
{
return langFrom;
}
/// Returns the dictionary's target language, as was read from file's headers.
wstring const & getLangTo() const
std::u32string const & getLangTo() const
{
return langTo;
}
@ -129,7 +114,7 @@ public:
/// If end of file is reached, false is returned.
/// Reading begins from the first line after the headers (ones which end
/// by the "### Glossary section:" line).
bool readNextLine( wstring &, size_t & offset );
bool readNextLine( std::u32string &, size_t & offset );
/// Returns the number of lines read so far from the file.
unsigned getLinesRead() const
{
@ -138,7 +123,7 @@ public:
};
GlsScanner::GlsScanner( string const & fileName ):
encoding( Utf8::Utf8 ),
encoding( Encoding::Utf8 ),
readBufferPtr( readBuffer ),
readBufferLeft( 0 ),
linesRead( 0 )
@ -164,10 +149,10 @@ GlsScanner::GlsScanner( string const & fileName ):
// If the file begins with the dedicated Unicode marker, we just consume
// it. If, on the other hand, it's not, we return the bytes back
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) {
encoding = Utf8::Utf16LE;
encoding = Encoding::Utf16LE;
}
else if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) {
encoding = Utf8::Utf16BE;
encoding = Encoding::Utf16BE;
}
else if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) {
// Looks like Utf8, read one more byte
@ -176,29 +161,29 @@ GlsScanner::GlsScanner( string const & fileName ):
gzclose( f );
throw exMalformedGlsFile( fileName );
}
encoding = Utf8::Utf8;
encoding = Encoding::Utf8;
}
else {
if ( gzrewind( f ) ) {
gzclose( f );
throw exCantOpen( fileName );
}
encoding = Utf8::Utf8;
encoding = Encoding::Utf8;
}
codec = QTextCodec::codecForName( Utf8::getEncodingNameFor( encoding ) );
codec = QTextCodec::codecForName( Text::getEncodingNameFor( encoding ) );
// We now can use our own readNextLine() function
lineFeed = Utf8::initLineFeed( encoding );
lineFeed = Text::initLineFeed( encoding );
wstring str;
wstring * currentField = 0;
wstring mark = U"###";
wstring titleMark = U"### Glossary title:";
wstring authorMark = U"### Author:";
wstring descriptionMark = U"### Description:";
wstring langFromMark = U"### Source language:";
wstring langToMark = U"### Target language:";
wstring endOfHeaderMark = U"### Glossary section:";
std::u32string str;
std::u32string * currentField = 0;
std::u32string mark = U"###";
std::u32string titleMark = U"### Glossary title:";
std::u32string authorMark = U"### Author:";
std::u32string descriptionMark = U"### Description:";
std::u32string langFromMark = U"### Source language:";
std::u32string langToMark = U"### Target language:";
std::u32string endOfHeaderMark = U"### Glossary section:";
size_t offset;
for ( ;; ) {
@ -211,22 +196,22 @@ GlsScanner::GlsScanner( string const & fileName ):
currentField = 0;
if ( str.compare( 0, titleMark.size(), titleMark ) == 0 ) {
dictionaryName = wstring( str, titleMark.size(), str.size() - titleMark.size() );
dictionaryName = std::u32string( str, titleMark.size(), str.size() - titleMark.size() );
currentField = &dictionaryName;
}
else if ( str.compare( 0, authorMark.size(), authorMark ) == 0 ) {
dictionaryAuthor = wstring( str, authorMark.size(), str.size() - authorMark.size() );
dictionaryAuthor = std::u32string( str, authorMark.size(), str.size() - authorMark.size() );
currentField = &dictionaryAuthor;
}
else if ( str.compare( 0, descriptionMark.size(), descriptionMark ) == 0 ) {
dictionaryDecription = wstring( str, descriptionMark.size(), str.size() - descriptionMark.size() );
dictionaryDecription = std::u32string( str, descriptionMark.size(), str.size() - descriptionMark.size() );
currentField = &dictionaryDecription;
}
else if ( str.compare( 0, langFromMark.size(), langFromMark ) == 0 ) {
langFrom = wstring( str, langFromMark.size(), str.size() - langFromMark.size() );
langFrom = std::u32string( str, langFromMark.size(), str.size() - langFromMark.size() );
}
else if ( str.compare( 0, langToMark.size(), langToMark ) == 0 ) {
langTo = wstring( str, langToMark.size(), str.size() - langToMark.size() );
langTo = std::u32string( str, langToMark.size(), str.size() - langToMark.size() );
}
else if ( str.compare( 0, endOfHeaderMark.size(), endOfHeaderMark ) == 0 ) {
break;
@ -241,7 +226,7 @@ GlsScanner::GlsScanner( string const & fileName ):
}
}
bool GlsScanner::readNextLine( wstring & out, size_t & offset )
bool GlsScanner::readNextLine( std::u32string & out, size_t & offset )
{
offset = (size_t)( gztell( f ) - readBufferLeft );
@ -268,7 +253,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset )
return false;
}
int pos = Utf8::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
int pos = Text::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
if ( pos == -1 ) {
return false;
}
@ -335,7 +320,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile, bool hasZipFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -361,16 +346,6 @@ public:
~GlsDictionary();
string getName() noexcept override
{
return dictionaryName;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -391,10 +366,12 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -443,7 +420,7 @@ private:
GlsDictionary::GlsDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
dz( 0 ),
chunks( idx, idxHeader.chunksOffset )
@ -461,11 +438,7 @@ GlsDictionary::GlsDictionary( string const & id, string const & indexFile, vecto
idx.seek( sizeof( idxHeader ) );
vector< char > dName( idx.read< uint32_t >() );
if ( dName.size() > 0 ) {
idx.read( &dName.front(), dName.size() );
dictionaryName = string( &dName.front(), dName.size() );
}
idx.readU32SizeAndData<>( dictionaryName );
// Initialize the index
@ -529,11 +502,11 @@ QString const & GlsDictionary::getDescription()
try {
GlsScanner scanner( getDictionaryFilenames()[ 0 ] );
string str = Utf8::encode( scanner.getDictionaryAuthor() );
string str = Text::toUtf8( scanner.getDictionaryAuthor() );
if ( !str.empty() ) {
dictionaryDescription = QObject::tr( "Author: %1%2" ).arg( QString::fromUtf8( str.c_str() ) ).arg( "\n\n" );
}
str = Utf8::encode( scanner.getDictionaryDescription() );
str = Text::toUtf8( scanner.getDictionaryDescription() );
if ( !str.empty() ) {
QString desc = QString::fromUtf8( str.c_str() );
desc.replace( "\t", "<br/>" );
@ -543,7 +516,7 @@ QString const & GlsDictionary::getDescription()
}
}
catch ( std::exception & e ) {
gdWarning( "GLS dictionary description reading failed: %s, error: %s\n", getName().c_str(), e.what() );
qWarning( "GLS dictionary description reading failed: %s, error: %s", getName().c_str(), e.what() );
}
if ( dictionaryDescription.isEmpty() ) {
@ -574,14 +547,14 @@ void GlsDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "Gls: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Gls: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "Gls: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Gls: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( ftsIdxName.c_str() );
}
}
@ -618,7 +591,7 @@ void GlsDictionary::loadArticleText( uint32_t address, vector< string > & headwo
}
else {
string articleData =
Iconv::toUtf8( Utf8::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
Iconv::toUtf8( Text::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
string::size_type start_pos = 0, end_pos = 0;
for ( ;; ) {
@ -647,7 +620,7 @@ void GlsDictionary::loadArticleText( uint32_t address, vector< string > & headwo
end_pos = 0;
for ( ;; ) {
end_pos = headword.find( '|', start_pos );
if ( end_pos == wstring::npos ) {
if ( end_pos == std::u32string::npos ) {
string hw = headword.substr( start_pos );
if ( !hw.empty() ) {
headwords.push_back( hw );
@ -822,7 +795,7 @@ void GlsDictionary::getArticleText( uint32_t articleAddress, QString & headword,
text = Html::unescape( QString::fromStdString( articleStr ) );
}
catch ( std::exception & ex ) {
gdWarning( "Gls: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Gls: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -830,7 +803,7 @@ void GlsDictionary::getArticleText( uint32_t articleAddress, QString & headword,
class GlsHeadwordsRequest: public Dictionary::WordSearchRequest
{
wstring word;
std::u32string word;
GlsDictionary & dict;
QAtomicInt isCancelled;
@ -838,7 +811,7 @@ class GlsHeadwordsRequest: public Dictionary::WordSearchRequest
public:
GlsHeadwordsRequest( wstring const & word_, GlsDictionary & dict_ ):
GlsHeadwordsRequest( std::u32string const & word_, GlsDictionary & dict_ ):
word( word_ ),
dict( dict_ )
{
@ -871,7 +844,7 @@ void GlsHeadwordsRequest::run()
try {
vector< WordArticleLink > chain = dict.findArticles( word );
wstring caseFolded = Folding::applySimpleCaseOnly( word );
std::u32string caseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -884,7 +857,7 @@ void GlsHeadwordsRequest::run()
dict.loadArticleText( x.articleOffset, headwords, articleText );
wstring headwordDecoded = Utf8::decode( headwords.front() );
std::u32string headwordDecoded = Text::toUtf32( headwords.front() );
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) {
// The headword seems to differ from the input word, which makes the
@ -902,7 +875,7 @@ void GlsHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( std::u32string const & word )
{
return synonymSearchEnabled ? std::make_shared< GlsHeadwordsRequest >( word, *this ) :
@ -915,8 +888,8 @@ sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( ws
class GlsArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
GlsDictionary & dict;
bool ignoreDiacritics;
@ -925,8 +898,8 @@ class GlsArticleRequest: public Dictionary::DataRequest
public:
GlsArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
GlsArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
GlsDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -970,13 +943,13 @@ void GlsArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -1002,16 +975,16 @@ void GlsArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( Utf8::decode( headword ) );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( Text::toUtf32( headword ) );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert(
pair( Folding::applySimpleCaseOnly( Utf8::decode( headword ) ), pair( headword, articleText ) ) );
pair( Folding::applySimpleCaseOnly( Text::toUtf32( headword ) ), pair( headword, articleText ) ) );
articlesIncluded.insert( x.articleOffset );
}
@ -1024,7 +997,7 @@ void GlsArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += i->second.second;
@ -1045,9 +1018,9 @@ void GlsArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > GlsDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > GlsDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -1102,7 +1075,7 @@ void GlsResourceRequest::run()
try {
string n = dict.getContainingFolder().toStdString() + Utils::Fs::separator() + resourceName;
GD_DPRINTF( "gls resource name is %s\n", n.c_str() );
qDebug( "gls resource name is %s", n.c_str() );
try {
QMutexLocker _( &dataMutex );
@ -1123,7 +1096,7 @@ void GlsResourceRequest::run()
if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive
}
}
@ -1187,10 +1160,10 @@ void GlsResourceRequest::run()
hasAnyData = true;
}
catch ( std::exception & ex ) {
gdWarning( "GLS: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
qWarning( "GLS: Failed loading resource \"%s\" for \"%s\", reason: %s",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
// Resource not loaded -- we don't set the hasAnyData flag then
}
@ -1265,12 +1238,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// which the incident happened. We need alive scanner for that.
// Building the index
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) );
initializing.indexingDictionary( Text::toUtf8( scanner.getDictionaryName() ) );
gdDebug( "Gls: Building the index for dictionary: %s\n",
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
qDebug( "Gls: Building the index for dictionary: %s",
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
@ -1281,18 +1254,18 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( idxHeader );
string dictionaryName = Utf8::encode( scanner.getDictionaryName() );
string dictionaryName = Text::toUtf8( scanner.getDictionaryName() );
idx.write( (uint32_t)dictionaryName.size() );
idx.write( dictionaryName.data(), dictionaryName.size() );
idxHeader.glsEncoding = scanner.getEncoding();
idxHeader.glsEncoding = static_cast< uint32_t >( scanner.getEncoding() );
IndexedWords indexedWords;
ChunkedStorage::Writer chunks( idx );
wstring curString;
std::u32string curString;
size_t curOffset;
uint32_t articleCount = 0, wordCount = 0;
@ -1312,12 +1285,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Parse headwords
list< wstring > allEntryWords;
wstring::size_type start_pos = 0, end_pos = 0;
list< std::u32string > allEntryWords;
std::u32string::size_type start_pos = 0, end_pos = 0;
for ( ;; ) {
end_pos = curString.find( '|', start_pos );
if ( end_pos == wstring::npos ) {
wstring headword = curString.substr( start_pos );
if ( end_pos == std::u32string::npos ) {
std::u32string headword = curString.substr( start_pos );
if ( !headword.empty() ) {
allEntryWords.push_back( headword );
}
@ -1370,7 +1343,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// If there was a zip file, index it too
if ( zipFileName.size() ) {
GD_DPRINTF( "Indexing zip file\n" );
qDebug( "Indexing zip file" );
idxHeader.hasZipFile = 1;
@ -1435,7 +1408,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< GlsDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "GLS dictionary reading failed: %s:%u, error: %s\n", fileName.c_str(), atLine, e.what() );
qWarning( "GLS dictionary reading failed: %s:%u, error: %s", fileName.c_str(), atLine, e.what() );
}
}

View file

@ -2,46 +2,30 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "hunspell.hh"
#include "utf8.hh"
#include "text.hh"
#include "htmlescape.hh"
#include "iconv.hh"
#include "folding.hh"
#include "wstring_qt.hh"
#include "language.hh"
#include "langcoder.hh"
#include <QRunnable>
#include <QThreadPool>
#include <QSemaphore>
#include <QRegularExpression>
#include <QDir>
#include <QCoreApplication>
#include <QFileInfo>
#include <set>
#ifndef INCLUDE_LIBRARY_PATH
#include <hunspell.hxx>
#else
#include <hunspell/hunspell.hxx>
#endif
#include "gddebug.hh"
#include "utils.hh"
#include <QtConcurrent>
#include <QtConcurrentRun>
#include <hunspell/hunspell.hxx>
namespace HunspellMorpho {
using namespace Dictionary;
using gd::wchar;
namespace {
class HunspellDictionary: public Dictionary::Class
{
string name;
Hunspell hunspell;
#ifdef Q_OS_WIN32
@ -56,23 +40,13 @@ public:
/// files[ 0 ] should be .aff file, files[ 1 ] should be .dic file.
HunspellDictionary( string const & id, string const & name_, vector< string > const & files ):
Dictionary::Class( id, files ),
name( name_ ),
#ifdef Q_OS_WIN32
hunspell( Utf8ToLocal8Bit( files[ 0 ] ).c_str(), Utf8ToLocal8Bit( files[ 1 ] ).c_str() )
#else
hunspell( files[ 0 ].c_str(), files[ 1 ].c_str() )
#endif
{
}
string getName() noexcept override
{
return name;
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
dictionaryName = name_;
}
unsigned long getArticleCount() noexcept override
@ -85,18 +59,19 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
sptr< WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
bool isLocalDictionary() override
{
return true;
}
vector< wstring > getAlternateWritings( const wstring & word ) noexcept override;
vector< std::u32string > getAlternateWritings( const std::u32string & word ) noexcept override;
protected:
@ -117,27 +92,23 @@ private:
// QMutex hunspellMutex;
};
/// Encodes the given string to be passed to the hunspell object. May throw
/// Iconv::Ex
string encodeToHunspell( Hunspell &, wstring const & );
/// Decodes the given string returned by the hunspell object. May throw
/// Iconv::Ex
wstring decodeFromHunspell( Hunspell &, char const * );
std::u32string decodeFromHunspell( Hunspell &, char const * );
/// Generates suggestions via hunspell
QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hunspell );
QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex, Hunspell & hunspell );
/// Generates suggestions for compound expression
void getSuggestionsForExpression( wstring const & expression,
vector< wstring > & suggestions,
void getSuggestionsForExpression( std::u32string const & expression,
vector< std::u32string > & suggestions,
QMutex & hunspellMutex,
Hunspell & hunspell );
/// Returns true if the string contains whitespace, false otherwise
bool containsWhitespace( wstring const & str )
bool containsWhitespace( std::u32string const & str )
{
wchar const * next = str.c_str();
char32_t const * next = str.c_str();
for ( ; *next; ++next ) {
if ( Folding::isWhitespace( *next ) ) {
@ -167,9 +138,9 @@ void HunspellDictionary::loadIcon() noexcept
dictionaryIconLoaded = true;
}
vector< wstring > HunspellDictionary::getAlternateWritings( wstring const & word ) noexcept
vector< std::u32string > HunspellDictionary::getAlternateWritings( std::u32string const & word ) noexcept
{
vector< wstring > results;
vector< std::u32string > results;
if ( containsWhitespace( word ) ) {
getSuggestionsForExpression( word, results, getHunspellMutex(), hunspell );
@ -185,14 +156,14 @@ class HunspellArticleRequest: public Dictionary::DataRequest
QMutex & hunspellMutex;
Hunspell & hunspell;
wstring word;
std::u32string word;
QAtomicInt isCancelled;
QFuture< void > f;
public:
HunspellArticleRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
HunspellArticleRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ),
word( word_ )
@ -226,7 +197,7 @@ void HunspellArticleRequest::run()
vector< string > suggestions;
try {
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( containsWhitespace( trimmedWord ) ) {
// For now we don't analyze whitespace-containing phrases
@ -236,25 +207,25 @@ void HunspellArticleRequest::run()
QMutexLocker _( &hunspellMutex );
string encodedWord = encodeToHunspell( hunspell, trimmedWord );
string trimmedWord_utf8 = Iconv::toUtf8( Text::utf32_le, trimmedWord );
if ( hunspell.spell( encodedWord ) ) {
if ( hunspell.spell( trimmedWord_utf8 ) ) {
// Good word -- no spelling suggestions then.
finish();
return;
}
suggestions = hunspell.suggest( encodedWord );
suggestions = hunspell.suggest( trimmedWord_utf8 );
if ( !suggestions.empty() ) {
// There were some suggestions made for us. Make an appropriate output.
string result = "<div class=\"gdspellsuggestion\">"
+ Html::escape( QCoreApplication::translate( "Hunspell", "Spelling suggestions: " ).toUtf8().data() );
wstring lowercasedWord = Folding::applySimpleCaseOnly( word );
std::u32string lowercasedWord = Folding::applySimpleCaseOnly( word );
for ( vector< string >::size_type x = 0; x < suggestions.size(); ++x ) {
wstring suggestion = decodeFromHunspell( hunspell, suggestions[ x ].c_str() );
std::u32string suggestion = decodeFromHunspell( hunspell, suggestions[ x ].c_str() );
if ( Folding::applySimpleCaseOnly( suggestion ) == lowercasedWord ) {
// If among suggestions we see the same word just with the different
@ -265,7 +236,7 @@ void HunspellArticleRequest::run()
return;
}
string suggestionUtf8 = Utf8::encode( suggestion );
string suggestionUtf8 = Text::toUtf8( suggestion );
result += "<a href=\"bword:";
result += Html::escape( suggestionUtf8 ) + "\">";
@ -284,17 +255,19 @@ void HunspellArticleRequest::run()
}
}
catch ( Iconv::Ex & e ) {
gdWarning( "Hunspell: charset conversion error, no processing's done: %s\n", e.what() );
qWarning( "Hunspell: charset conversion error, no processing's done: %s", e.what() );
}
catch ( std::exception & e ) {
gdWarning( "Hunspell: error: %s\n", e.what() );
qWarning( "Hunspell: error: %s", e.what() );
}
finish();
}
sptr< DataRequest >
HunspellDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
sptr< DataRequest > HunspellDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const &,
std::u32string const &,
bool )
{
return std::make_shared< HunspellArticleRequest >( word, getHunspellMutex(), hunspell );
@ -307,7 +280,7 @@ class HunspellHeadwordsRequest: public Dictionary::WordSearchRequest
QMutex & hunspellMutex;
Hunspell & hunspell;
wstring word;
std::u32string word;
QAtomicInt isCancelled;
QFuture< void > f;
@ -315,7 +288,7 @@ class HunspellHeadwordsRequest: public Dictionary::WordSearchRequest
public:
HunspellHeadwordsRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
HunspellHeadwordsRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ),
word( word_ )
@ -347,7 +320,7 @@ void HunspellHeadwordsRequest::run()
return;
}
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( trimmedWord.size() > 80 ) {
// We won't do anything for overly long sentences since that would probably
@ -357,7 +330,7 @@ void HunspellHeadwordsRequest::run()
}
if ( containsWhitespace( trimmedWord ) ) {
vector< wstring > results;
vector< std::u32string > results;
getSuggestionsForExpression( trimmedWord, results, hunspellMutex, hunspell );
@ -367,7 +340,7 @@ void HunspellHeadwordsRequest::run()
}
}
else {
QList< wstring > suggestions = suggest( trimmedWord, hunspellMutex, hunspell );
QList< std::u32string > suggestions = suggest( trimmedWord, hunspellMutex, hunspell );
if ( !suggestions.empty() ) {
QMutexLocker _( &dataMutex );
@ -381,22 +354,18 @@ void HunspellHeadwordsRequest::run()
finish();
}
QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hunspell )
QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex, Hunspell & hunspell )
{
QList< wstring > result;
vector< string > suggestions;
QList< std::u32string > result;
try {
QMutexLocker _( &hunspellMutex );
string encodedWord = encodeToHunspell( hunspell, word );
suggestions = hunspell.analyze( encodedWord );
auto suggestions = hunspell.analyze( Iconv::toUtf8( Text::utf32_le, word ) );
if ( !suggestions.empty() ) {
// There were some suggestions made for us. Make an appropriate output.
wstring lowercasedWord = Folding::applySimpleCaseOnly( word );
std::u32string lowercasedWord = Folding::applySimpleCaseOnly( word );
static QRegularExpression cutStem( R"(^\s*st:(((\s+(?!\w{2}:)(?!-)(?!\+))|\S+)+))" );
@ -409,11 +378,11 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
suggestion.chop( suggestion.length() - n );
}
GD_DPRINTF( ">>>Sugg: %s\n", suggestion.toLocal8Bit().data() );
qDebug( ">>>Sugg: %s", suggestion.toLocal8Bit().data() );
auto match = cutStem.match( suggestion.trimmed() );
if ( match.hasMatch() ) {
wstring alt = match.captured( 1 ).toStdU32String();
std::u32string alt = match.captured( 1 ).toStdU32String();
if ( Folding::applySimpleCaseOnly( alt ) != lowercasedWord ) // No point in providing same word
{
@ -424,14 +393,14 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
}
}
catch ( Iconv::Ex & e ) {
gdWarning( "Hunspell: charset conversion error, no processing's done: %s\n", e.what() );
qWarning( "Hunspell: charset conversion error, no processing's done: %s", e.what() );
}
return result;
}
sptr< WordSearchRequest > HunspellDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< WordSearchRequest > HunspellDictionary::findHeadwordsForSynonym( std::u32string const & word )
{
return std::make_shared< HunspellHeadwordsRequest >( word, getHunspellMutex(), hunspell );
@ -445,14 +414,14 @@ class HunspellPrefixMatchRequest: public Dictionary::WordSearchRequest
QMutex & hunspellMutex;
Hunspell & hunspell;
wstring word;
std::u32string word;
QAtomicInt isCancelled;
QFuture< void > f;
public:
HunspellPrefixMatchRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
HunspellPrefixMatchRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
hunspellMutex( hunspellMutex_ ),
hunspell( hunspell_ ),
word( word_ )
@ -485,7 +454,7 @@ void HunspellPrefixMatchRequest::run()
}
try {
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
if ( trimmedWord.empty() || containsWhitespace( trimmedWord ) ) {
// For now we don't analyze whitespace-containing phrases
@ -495,9 +464,7 @@ void HunspellPrefixMatchRequest::run()
QMutexLocker _( &hunspellMutex );
string encodedWord = encodeToHunspell( hunspell, trimmedWord );
if ( hunspell.spell( encodedWord ) ) {
if ( hunspell.spell( Iconv::toUtf8( Text::utf32_le, trimmedWord ) ) ) {
// Known word -- add it to the result
QMutexLocker _( &dataMutex );
@ -506,20 +473,20 @@ void HunspellPrefixMatchRequest::run()
}
}
catch ( Iconv::Ex & e ) {
gdWarning( "Hunspell: charset conversion error, no processing's done: %s\n", e.what() );
qWarning( "Hunspell: charset conversion error, no processing's done: %s", e.what() );
}
finish();
}
sptr< WordSearchRequest > HunspellDictionary::prefixMatch( wstring const & word, unsigned long /*maxResults*/ )
sptr< WordSearchRequest > HunspellDictionary::prefixMatch( std::u32string const & word, unsigned long /*maxResults*/ )
{
return std::make_shared< HunspellPrefixMatchRequest >( word, getHunspellMutex(), hunspell );
}
void getSuggestionsForExpression( wstring const & expression,
vector< wstring > & suggestions,
void getSuggestionsForExpression( std::u32string const & expression,
vector< std::u32string > & suggestions,
QMutex & hunspellMutex,
Hunspell & hunspell )
{
@ -527,15 +494,15 @@ void getSuggestionsForExpression( wstring const & expression,
// This is useful for compound expressions where some words is
// in different form, e.g. "dozing off" -> "doze off".
wstring trimmedWord = Folding::trimWhitespaceOrPunct( expression );
wstring word, punct;
QList< wstring > words;
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( expression );
std::u32string word, punct;
QList< std::u32string > words;
suggestions.clear();
// Parse string to separate words
for ( wchar const * c = trimmedWord.c_str();; ++c ) {
for ( char32_t const * c = trimmedWord.c_str();; ++c ) {
if ( !*c || Folding::isPunct( *c ) || Folding::isWhitespace( *c ) ) {
if ( word.size() ) {
words.push_back( word );
@ -566,7 +533,7 @@ void getSuggestionsForExpression( wstring const & expression,
// Combine result strings from suggestions
QList< wstring > results;
QList< std::u32string > results;
for ( const auto & i : words ) {
word = i;
@ -576,13 +543,13 @@ void getSuggestionsForExpression( wstring const & expression,
}
}
else {
QList< wstring > sugg = suggest( word, hunspellMutex, hunspell );
QList< std::u32string > sugg = suggest( word, hunspellMutex, hunspell );
int suggNum = sugg.size() + 1;
if ( suggNum > 3 ) {
suggNum = 3;
}
int resNum = results.size();
wstring resultStr;
std::u32string resultStr;
if ( resNum == 0 ) {
for ( int k = 0; k < suggNum; k++ ) {
@ -612,35 +579,17 @@ void getSuggestionsForExpression( wstring const & expression,
}
}
string encodeToHunspell( Hunspell & hunspell, wstring const & str )
{
Iconv conv( Iconv::GdWchar );
void const * in = str.data();
size_t inLeft = str.size() * sizeof( wchar );
vector< char > result( str.size() * 4 + 1 ); // +1 isn't actually needed,
// but then iconv complains on empty
// words
void * out = &result.front();
size_t outLeft = result.size();
QString convStr = conv.convert( in, inLeft );
return convStr.toStdString();
}
wstring decodeFromHunspell( Hunspell & hunspell, char const * str )
std::u32string decodeFromHunspell( Hunspell & hunspell, char const * str )
{
Iconv conv( hunspell.get_dic_encoding() );
void const * in = str;
size_t inLeft = strlen( str );
vector< wchar > result( inLeft + 1 ); // +1 isn't needed, but see above
vector< char32_t > result( inLeft + 1 ); // +1 isn't needed, but see above
void * out = &result.front();
size_t outLeft = result.size() * sizeof( wchar );
size_t outLeft = result.size() * sizeof( char32_t );
QString convStr = conv.convert( in, inLeft );
return convStr.toStdU32String();

View file

@ -1,5 +1,5 @@
#include "lingualibre.hh"
#include "utf8.hh"
#include "text.hh"
#include "audiolink.hh"
#include <QJsonArray>
@ -40,8 +40,8 @@ class LinguaArticleRequest: public Dictionary::DataRequest
public:
LinguaArticleRequest( wstring const & word,
vector< wstring > const & alts,
LinguaArticleRequest( std::u32string const & word,
vector< std::u32string > const & alts,
QString const & languageCode_,
QString const & langWikipediaID_,
string const & dictionaryId_,
@ -51,7 +51,7 @@ public:
private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
private slots:
virtual void requestFinished( QNetworkReply * );
@ -60,7 +60,6 @@ private slots:
class LinguaDictionary: public Dictionary::Class
{
string name;
QString languageCode;
QString langWikipediaID;
QNetworkAccessManager & netMgr;
@ -68,10 +67,10 @@ class LinguaDictionary: public Dictionary::Class
public:
LinguaDictionary( string const & id, string name_, QString languageCode_, QNetworkAccessManager & netMgr_ ):
Dictionary::Class( id, vector< string >() ),
name( std::move( name_ ) ),
languageCode( std::move( languageCode_ ) ),
netMgr( netMgr_ )
{
dictionaryName = name_;
/* map of iso lang code to wikipedia lang id
Data was obtained by this query on https://commons-query.wikimedia.org/
@ -166,16 +165,6 @@ WHERE {
}
}
string getName() noexcept override
{
return name;
}
map< Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -186,7 +175,7 @@ WHERE {
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) override
sptr< WordSearchRequest > prefixMatch( std::u32string const & /*word*/, unsigned long /*maxResults*/ ) override
{
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -195,7 +184,10 @@ WHERE {
return sr;
}
sptr< DataRequest > getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ) override
sptr< DataRequest > getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ) override
{
if ( word.size() < 50 ) {
return std::make_shared< LinguaArticleRequest >( word, alts, languageCode, langWikipediaID, getId(), netMgr );
@ -242,8 +234,8 @@ void LinguaArticleRequest::cancel()
finish();
}
LinguaArticleRequest::LinguaArticleRequest( const wstring & str,
const vector< wstring > & alts,
LinguaArticleRequest::LinguaArticleRequest( const std::u32string & str,
const vector< std::u32string > & alts,
const QString & languageCode_,
const QString & langWikipediaID,
const string & dictionaryId_,
@ -256,7 +248,7 @@ LinguaArticleRequest::LinguaArticleRequest( const wstring & str,
addQuery( mgr, str );
}
void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const wstring & word )
void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const std::u32string & word )
{
// Doc of the <https://www.mediawiki.org/wiki/API:Query>
@ -284,7 +276,7 @@ void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const wstring
auto netReply = std::shared_ptr< QNetworkReply >( mgr.get( netRequest ) );
netReplies.emplace_back( netReply, Utf8::encode( word ) );
netReplies.emplace_back( netReply, Text::toUtf8( word ) );
}

View file

@ -11,17 +11,10 @@
#include "dict/sounddir.hh"
#include "dict/hunspell.hh"
#include "dictdfiles.hh"
#include "dict/romaji.hh"
#include "dict/customtransliteration.hh"
#include "dict/russiantranslit.hh"
#include "dict/german.hh"
#include "dict/greektranslit.hh"
#include "dict/belarusiantranslit.hh"
#include "dict/website.hh"
#include "dict/forvo.hh"
#include "dict/programs.hh"
#include "dict/voiceengines.hh"
#include "gddebug.hh"
#include "dict/xdxf.hh"
#include "dict/sdict.hh"
#include "dict/aard.hh"
@ -34,12 +27,19 @@
#include "dict/lingualibre.hh"
#include "metadata.hh"
#ifndef NO_EPWING_SUPPORT
#include "dict/transliteration/belarusian.hh"
#include "dict/transliteration/custom.hh"
#include "dict/transliteration/german.hh"
#include "dict/transliteration/greek.hh"
#include "dict/transliteration/romaji.hh"
#include "dict/transliteration/russian.hh"
#ifdef EPWING_SUPPORT
#include "dict/epwing.hh"
#endif
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT
#include "dict/chinese.hh"
#include "dict/transliteration/chinese.hh"
#endif
#include <QMessageBox>
@ -83,7 +83,7 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
<< "*.zim"
<< "*.zimaa"
#endif
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
<< "*catalogs"
#endif
;
@ -181,7 +181,7 @@ void LoadDictionaries::handlePath( Config::Path const & path )
#ifdef MAKE_ZIM_SUPPORT
addDicts( Zim::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this, maxHeadwordToExpand ) );
#endif
#ifndef NO_EPWING_SUPPORT
#ifdef EPWING_SUPPORT
addDicts( Epwing::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this ) );
#endif
}
@ -243,10 +243,10 @@ void loadDictionaries( QWidget * parent,
///// We create transliterations synchronously since they are very simple
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT
addDicts( Chinese::makeDictionaries( cfg.transliteration.chinese ) );
addDicts( ChineseTranslit::makeDictionaries( cfg.transliteration.chinese ) );
#endif
addDicts( Romaji::makeDictionaries( cfg.transliteration.romaji ) );
addDicts( RomajiTranslit::makeDictionaries( cfg.transliteration.romaji ) );
addDicts( CustomTranslit::makeDictionaries( cfg.transliteration.customTrans ) );
// Make Russian transliteration
@ -274,13 +274,13 @@ void loadDictionaries( QWidget * parent,
addDicts( Forvo::makeDictionaries( loadDicts, cfg.forvo, dictNetMgr ) );
addDicts( Lingua::makeDictionaries( loadDicts, cfg.lingua, dictNetMgr ) );
addDicts( Programs::makeDictionaries( cfg.programs ) );
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
addDicts( VoiceEngines::makeDictionaries( cfg.voiceEngines ) );
#endif
addDicts( DictServer::makeDictionaries( cfg.dictServers ) );
GD_DPRINTF( "Load done\n" );
qDebug( "Load done" );
// Remove any stale index files
@ -290,12 +290,12 @@ void loadDictionaries( QWidget * parent,
for ( unsigned x = dictionaries.size(); x--; ) {
ret = ids.insert( dictionaries[ x ]->getId() );
if ( !ret.second ) {
gdWarning( R"(Duplicate dictionary ID found: ID=%s, name="%s", path="%s")",
dictionaries[ x ]->getId().c_str(),
dictionaries[ x ]->getName().c_str(),
dictionaries[ x ]->getDictionaryFilenames().empty() ?
"" :
dictionaries[ x ]->getDictionaryFilenames()[ 0 ].c_str() );
qWarning( R"(Duplicate dictionary ID found: ID=%s, name="%s", path="%s")",
dictionaries[ x ]->getId().c_str(),
dictionaries[ x ]->getName().c_str(),
dictionaries[ x ]->getDictionaryFilenames().empty() ?
"" :
dictionaries[ x ]->getDictionaryFilenames()[ 0 ].c_str() );
}
}

View file

@ -5,19 +5,14 @@
#include "dictfile.hh"
#include "iconv.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "btreeidx.hh"
#include "audiolink.hh"
#include "gddebug.hh"
#include <set>
#include <string>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#define OV_EXCLUDE_STATIC_CALLBACKS
#include <vorbis/vorbisfile.h>
#include <QDir>
@ -29,7 +24,6 @@
namespace Lsa {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
@ -65,7 +59,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -149,7 +143,7 @@ Entry::Entry( File::Index & f )
// Read the size of the recording, in samples
samplesLength = f.read< uint32_t >();
name = Iconv::toUtf8( Iconv::Utf16Le, &filenameBuffer.front(), read * sizeof( uint16_t ) );
name = Iconv::toUtf8( Text::utf16_le, &filenameBuffer.front(), read * sizeof( uint16_t ) );
}
class LsaDictionary: public BtreeIndexing::BtreeDictionary
@ -164,11 +158,6 @@ public:
string getName() noexcept override;
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.soundsCount;
@ -179,8 +168,10 @@ public:
return getArticleCount();
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -201,7 +192,7 @@ string LsaDictionary::getName() noexcept
LsaDictionary::LsaDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() )
{
// Initialize the index
@ -209,9 +200,9 @@ LsaDictionary::LsaDictionary( string const & id, string const & indexFile, vecto
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
}
sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > LsaDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -225,13 +216,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, string > mainArticles, alternateArticles;
multimap< std::u32string, string > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -246,12 +237,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, string > & mapToUse = ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
multimap< std::u32string, string > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.word ) );
@ -264,7 +256,7 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
string result;
multimap< wstring, string >::const_iterator i;
multimap< std::u32string, string >::const_iterator i;
result += "<table class=\"lsa_play\">";
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -399,13 +391,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getResource( string const & name
string strippedName = Utils::endsWithIgnoreCase( name, ".wav" ) ? string( name, 0, name.size() - 4 ) : name;
vector< WordArticleLink > chain = findArticles( Utf8::decode( strippedName ) );
vector< WordArticleLink > chain = findArticles( Text::toUtf32( strippedName ) );
if ( chain.empty() ) {
return std::make_shared< Dictionary::DataRequestInstant >( false ); // No such resource
}
File::Index f( getDictionaryFilenames()[ 0 ], "rb" );
File::Index f( getDictionaryFilenames()[ 0 ], QIODevice::ReadOnly );
f.seek( chain[ 0 ].articleOffset );
Entry e( f );
@ -468,13 +460,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getResource( string const & name
long result = ov_read( &vf, ptr, left, 0, 2, 1, &bitstream );
if ( result <= 0 ) {
gdWarning( "Failed to read Vorbis data (code = %ld)\n", result );
qWarning( "Failed to read Vorbis data (code = %ld)", result );
memset( ptr, 0, left );
break;
}
if ( result > left ) {
GD_FDPRINTF( stderr, "Warning: Vorbis decode returned more data than requested.\n" );
qWarning( "Warning: Vorbis decode returned more data than requested." );
result = left;
}
@ -522,7 +514,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
try {
File::Index f( *i, "rb" );
File::Index f( *i, QIODevice::ReadOnly );
/// Check the signature
@ -543,11 +535,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
// Building the index
gdDebug( "Lsa: Building the index for dictionary: %s\n", i->c_str() );
qDebug( "Lsa: Building the index for dictionary: %s", i->c_str() );
initializing.indexingDictionary( Utils::Fs::basename( *i ) );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
@ -563,7 +555,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
/// XXX handle big-endian machines here!
auto entriesCount = f.read< uint32_t >();
GD_DPRINTF( "%s: %u entries\n", i->c_str(), entriesCount );
qDebug( "%s: %u entries", i->c_str(), entriesCount );
idxHeader.soundsCount = entriesCount;
@ -578,11 +570,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Remove the extension, no need for that in the index
e.name = stripExtension( e.name );
GD_DPRINTF( "Read filename %s (%u at %u)<\n", e.name.c_str(), e.samplesLength, e.samplesOffset );
qDebug( "Read filename %s (%u at %u)<", e.name.c_str(), e.samplesLength, e.samplesOffset );
// Insert new entry into an index
indexedWords.addWord( Utf8::decode( e.name ), offset );
indexedWords.addWord( Text::toUtf32( e.name ), offset );
}
idxHeader.vorbisOffset = f.tell();
@ -617,7 +609,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< LsaDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Lingvo's LSA reading failed: %s, error: %s\n", i->c_str(), e.what() );
qWarning( "Lingvo's LSA reading failed: %s, error: %s", i->c_str(), e.what() );
}
}

View file

@ -35,7 +35,6 @@
#include <QtCore5Compat/QTextCodec>
#include "decompress.hh"
#include "gddebug.hh"
#include "ripemd.hh"
#include "utils.hh"
#include "htmlescape.hh"
@ -118,7 +117,7 @@ bool MdictParser::open( const char * filename )
filename_ = QString::fromUtf8( filename );
file_ = new QFile( filename_ );
gdDebug( "MdictParser: open %s", filename );
qDebug( "MdictParser: open %s", filename );
if ( file_.isNull() || !file_->exists() ) {
return false;
@ -233,7 +232,7 @@ bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize,
case 0x00000000:
// No compression
if ( !checkAdler32( buf, size, checksum ) ) {
gdWarning( "MDict: parseCompressedBlock: plain: checksum not match" );
qWarning( "MDict: parseCompressedBlock: plain: checksum not match" );
return false;
}
@ -248,13 +247,13 @@ bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize,
result = lzo1x_decompress_safe( (const uchar *)buf, size, (uchar *)decompressedBlock.data(), &blockSize, NULL );
if ( result != LZO_E_OK || blockSize != (lzo_uint)decompressedBlockSize ) {
gdWarning( "MDict: parseCompressedBlock: decompression failed" );
qWarning( "MDict: parseCompressedBlock: decompression failed" );
return false;
}
if ( checksum
!= lzo_adler32( lzo_adler32( 0, NULL, 0 ), (const uchar *)decompressedBlock.constData(), blockSize ) ) {
gdWarning( "MDict: parseCompressedBlock: lzo: checksum does not match" );
qWarning( "MDict: parseCompressedBlock: lzo: checksum does not match" );
return false;
}
} break;
@ -263,12 +262,12 @@ bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize,
// zlib compression
decompressedBlock = zlibDecompress( buf, size, checksum );
if ( decompressedBlock.isEmpty() ) {
gdWarning( "MDict: parseCompressedBlock: zlib: failed to decompress or checksum does not match" );
qWarning( "MDict: parseCompressedBlock: zlib: failed to decompress or checksum does not match" );
return false;
}
break;
default:
gdWarning( "MDict: parseCompressedBlock: unknown type" );
qWarning( "MDict: parseCompressedBlock: unknown type" );
return false;
}
@ -320,7 +319,7 @@ bool MdictParser::readHeader( QDataStream & in )
in.setByteOrder( QDataStream::LittleEndian );
in >> checksum;
if ( !checkAdler32( headerTextUtf16.constData(), headerTextUtf16.size(), checksum ) ) {
gdWarning( "MDict: readHeader: checksum does not match" );
qWarning( "MDict: readHeader: checksum does not match" );
return false;
}
headerTextUtf16.clear();

View file

@ -4,29 +4,21 @@
#include "mdx.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "dictfile.hh"
#include "wstring.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "gddebug.hh"
#include "langcoder.hh"
#include "audiolink.hh"
#include "ex.hh"
#include "mdictparser.hh"
#include "filetype.hh"
#include "ftshelpers.hh"
#include "htmlescape.hh"
#include <algorithm>
#include <map>
#include <set>
#include <list>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include "globalregex.hh"
#include "tiff.hh"
#include "utils.hh"
@ -35,16 +27,15 @@
#include <QDir>
#include <QRegularExpression>
#include <QString>
#include <QStringBuilder>
#include <QThreadPool>
#include <QtConcurrent>
#include <QtConcurrentRun>
namespace Mdx {
using std::map;
using std::multimap;
using std::set;
using gd::wstring;
using gd::wchar;
using std::list;
using std::pair;
using std::string;
@ -135,7 +126,7 @@ public:
/// Checks whether the given file exists in the mdd file or not.
/// Note that this function is thread-safe, since it does not access mdd file.
bool hasFile( gd::wstring const & name )
bool hasFile( std::u32string const & name )
{
if ( !isFileOpen ) {
return false;
@ -146,7 +137,7 @@ public:
/// Attempts loading the given file into the given vector. Returns true on
/// success, false otherwise.
bool loadFile( gd::wstring const & name, std::vector< char > & result )
bool loadFile( std::u32string const & name, std::vector< char > & result )
{
if ( !isFileOpen ) {
return false;
@ -218,16 +209,6 @@ public:
void deferredInit() override;
string getName() noexcept override
{
return dictionaryName;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -248,8 +229,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
QString const & getDescription() override;
@ -297,12 +280,12 @@ private:
friend class MdxArticleRequest;
friend class MddResourceRequest;
void loadResourceFile( const wstring & resourceName, vector< char > & data );
void loadResourceFile( const std::u32string & resourceName, vector< char > & data );
};
MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxFileName( indexFile ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset ),
@ -310,12 +293,7 @@ MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vecto
{
// Read the dictionary's name
idx.seek( sizeof( idxHeader ) );
size_t len = idx.read< uint32_t >();
vector< char > buf( len );
if ( len > 0 ) {
idx.read( &buf.front(), len );
dictionaryName = string( &buf.front(), len );
}
idx.readU32SizeAndData<>( dictionaryName );
//fallback, use filename as dictionary name
if ( dictionaryName.empty() ) {
@ -324,12 +302,7 @@ MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vecto
}
// then read the dictionary's encoding
len = idx.read< uint32_t >();
if ( len > 0 ) {
buf.resize( len );
idx.read( &buf.front(), len );
encoding = string( &buf.front(), len );
}
idx.readU32SizeAndData<>( encoding );
dictFile.setFileName( QString::fromUtf8( dictionaryFiles[ 0 ].c_str() ) );
dictFile.open( QIODevice::ReadOnly );
@ -467,7 +440,7 @@ void MdxDictionary::makeFTSIndex( QAtomicInt & isCancelled )
// return;
gdDebug( "MDict: Building the full-text index for dictionary: %s", getName().c_str() );
qDebug( "MDict: Building the full-text index for dictionary: %s", getName().c_str() );
try {
auto _dict = std::make_shared< MdxDictionary >( this->getId(), idxFileName, this->getDictionaryFilenames() );
@ -478,7 +451,7 @@ void MdxDictionary::makeFTSIndex( QAtomicInt & isCancelled )
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "MDict: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
qWarning( "MDict: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( ftsIdxName.c_str() );
}
}
@ -493,7 +466,7 @@ void MdxDictionary::getArticleText( uint32_t articleAddress, QString & headword,
text = Html::unescape( QString::fromUtf8( articleText.data(), articleText.size() ) );
}
catch ( std::exception & ex ) {
gdWarning( "MDict: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
qWarning( "MDict: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -514,8 +487,8 @@ sptr< Dictionary::DataRequest > MdxDictionary::getSearchResults( QString const &
class MdxArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
MdxDictionary & dict;
bool ignoreDiacritics;
@ -524,8 +497,8 @@ class MdxArticleRequest: public Dictionary::DataRequest
public:
MdxArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
MdxArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
MdxDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -628,8 +601,8 @@ void MdxArticleRequest::run()
// Handle internal redirects
if ( strncmp( articleBody.c_str(), "@@@LINK=", 8 ) == 0 ) {
wstring target = Utf8::decode( articleBody.c_str() + 8 );
target = Folding::trimWhitespace( target );
std::u32string target = Text::toUtf32( articleBody.c_str() + 8 );
target = Folding::trimWhitespace( target );
// Make an additional query for this redirection
vector< WordArticleLink > altChain = dict.findArticles( target );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
@ -652,9 +625,9 @@ void MdxArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const wstring & word,
const vector< wstring > & alts,
const wstring &,
sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const std::u32string & word,
const vector< std::u32string > & alts,
const std::u32string &,
bool ignoreDiacritics )
{
return std::make_shared< MdxArticleRequest >( word, alts, *this, ignoreDiacritics );
@ -664,7 +637,7 @@ sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const wstring & word,
class MddResourceRequest: public Dictionary::DataRequest
{
MdxDictionary & dict;
wstring resourceName;
std::u32string resourceName;
QAtomicInt isCancelled;
QFuture< void > f;
@ -673,7 +646,7 @@ public:
MddResourceRequest( MdxDictionary & dict_, string const & resourceName_ ):
Dictionary::DataRequest( &dict_ ),
dict( dict_ ),
resourceName( Utf8::decode( resourceName_ ) )
resourceName( Text::toUtf32( resourceName_ ) )
{
f = QtConcurrent::run( [ this ]() {
this->run();
@ -748,7 +721,7 @@ void MddResourceRequest::run()
}
// In order to prevent recursive internal redirection...
set< wstring, std::less<> > resourceIncluded;
set< std::u32string, std::less<> > resourceIncluded;
for ( ;; ) {
// Some runnables linger enough that they are cancelled before they start
@ -756,7 +729,7 @@ void MddResourceRequest::run()
finish();
return;
}
string u8ResourceName = Utf8::encode( resourceName );
string u8ResourceName = Text::toUtf8( resourceName );
if ( !resourceIncluded.insert( resourceName ).second ) {
finish();
return;
@ -904,7 +877,8 @@ QString & MdxDictionary::filterResource( QString & article )
void MdxDictionary::replaceLinks( QString & id, QString & article )
{
QString articleNewText;
int linkPos = 0;
qsizetype linkPos = 0;
QRegularExpressionMatchIterator it = RX::Mdx::allLinksRe.globalMatch( article );
while ( it.hasNext() ) {
QRegularExpressionMatch allLinksMatch = it.next();
@ -980,7 +954,8 @@ void MdxDictionary::replaceLinks( QString & id, QString & article )
articleNewText += linkTxt;
match = RX::Mdx::closeScriptTagRe.match( article, linkPos );
if ( match.hasMatch() ) {
articleNewText += article.mid( linkPos, match.capturedEnd() - linkPos );
articleNewText += QString( QStringLiteral( "gdOnReady(()=>{%1});</script>" ) )
.arg( article.mid( linkPos, match.capturedStart() - linkPos ) );
linkPos = match.capturedEnd();
}
continue;
@ -1141,7 +1116,7 @@ QString MdxDictionary::getCachedFileName( QString filename )
QFileInfo info( cacheDirName );
if ( !info.exists() || !info.isDir() ) {
if ( !dir.mkdir( cacheDirName ) ) {
gdWarning( "Mdx: can't create cache directory \"%s\"", cacheDirName.toUtf8().data() );
qWarning( "Mdx: can't create cache directory \"%s\"", cacheDirName.toUtf8().data() );
return QString();
}
}
@ -1159,7 +1134,7 @@ QString MdxDictionary::getCachedFileName( QString filename )
QFileInfo dirInfo( dirName );
if ( !dirInfo.exists() ) {
if ( !dir.mkdir( dirName ) ) {
gdWarning( "Mdx: can't create cache directory \"%s\"", dirName.toUtf8().data() );
qWarning( "Mdx: can't create cache directory \"%s\"", dirName.toUtf8().data() );
return QString();
}
}
@ -1174,14 +1149,14 @@ QString MdxDictionary::getCachedFileName( QString filename )
}
QFile f( fullName );
if ( !f.open( QFile::WriteOnly ) ) {
gdWarning( R"(Mdx: file "%s" creating error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
qWarning( R"(Mdx: file "%s" creating error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
return QString();
}
gd::wstring resourceName = filename.toStdU32String();
std::u32string resourceName = filename.toStdU32String();
vector< char > data;
// In order to prevent recursive internal redirection...
set< wstring, std::less<> > resourceIncluded;
set< std::u32string, std::less<> > resourceIncluded;
for ( ;; ) {
if ( !resourceIncluded.insert( resourceName ).second ) {
@ -1214,16 +1189,16 @@ QString MdxDictionary::getCachedFileName( QString filename )
f.close();
if ( n < (qint64)data.size() ) {
gdWarning( R"(Mdx: file "%s" writing error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
qWarning( R"(Mdx: file "%s" writing error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
return QString();
}
return fullName;
}
void MdxDictionary::loadResourceFile( const wstring & resourceName, vector< char > & data )
void MdxDictionary::loadResourceFile( const std::u32string & resourceName, vector< char > & data )
{
wstring newResourceName = resourceName;
string u8ResourceName = Utf8::encode( resourceName );
std::u32string newResourceName = resourceName;
string u8ResourceName = Text::toUtf8( resourceName );
// Convert to the Windows separator
std::replace( newResourceName.begin(), newResourceName.end(), '/', '\\' );
@ -1307,7 +1282,7 @@ private:
static bool indexIsOldOrBad( vector< string > const & dictFiles, string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || header.signature != kSignature
@ -1362,7 +1337,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( dictFiles, indexFile ) ) {
// Building the index
gdDebug( "MDict: Building the index for dictionary: %s\n", fileName.c_str() );
qDebug( "MDict: Building the index for dictionary: %s", fileName.c_str() );
MdictParser parser;
list< sptr< MdictParser > > mddParsers;
@ -1378,14 +1353,14 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( File::exists( *mddIter ) ) {
sptr< MdictParser > mddParser = std::make_shared< MdictParser >();
if ( !mddParser->open( mddIter->c_str() ) ) {
gdWarning( "Broken mdd (resource) file: %s\n", mddIter->c_str() );
qWarning( "Broken mdd (resource) file: %s", mddIter->c_str() );
continue;
}
mddParsers.push_back( mddParser );
}
}
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
// We write a dummy header first. At the end of the process the header
@ -1453,7 +1428,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Finish with the chunks
idxHeader.chunksOffset = chunks.finish();
GD_DPRINTF( "Writing index...\n" );
qDebug( "Writing index..." );
// Good. Now build the index
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );

View file

@ -2,14 +2,12 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "mediawiki.hh"
#include "wstring_qt.hh"
#include <QNetworkAccessManager>
#include <QNetworkReply>
#include <QUrl>
#include <QtXml>
#include <algorithm>
#include <list>
#include "gddebug.hh"
#include "audiolink.hh"
#include "langcoder.hh"
#include "utils.hh"
@ -57,11 +55,6 @@ public:
return name;
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -72,9 +65,10 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
quint32 getLangFrom() const override
{
@ -139,7 +133,10 @@ class MediaWikiWordSearchRequest: public MediaWikiWordSearchRequestSlots
public:
MediaWikiWordSearchRequest( wstring const &, QString const & url, QString const & lang, QNetworkAccessManager & mgr );
MediaWikiWordSearchRequest( std::u32string const &,
QString const & url,
QString const & lang,
QNetworkAccessManager & mgr );
~MediaWikiWordSearchRequest();
@ -150,13 +147,13 @@ private:
void downloadFinished() override;
};
MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( wstring const & str,
MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( std::u32string const & str,
QString const & url,
QString const & lang,
QNetworkAccessManager & mgr ):
isCancelling( false )
{
GD_DPRINTF( "wiki request begin\n" );
qDebug( "wiki request begin" );
QUrl reqUrl( url + "/api.php?action=query&list=allpages&aplimit=40&format=xml" );
GlobalBroadcaster::instance()->addWhitelist( reqUrl.host() );
@ -180,7 +177,7 @@ MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( wstring const & str,
MediaWikiWordSearchRequest::~MediaWikiWordSearchRequest()
{
GD_DPRINTF( "request end\n" );
qDebug( "request end" );
}
void MediaWikiWordSearchRequest::cancel()
@ -194,7 +191,7 @@ void MediaWikiWordSearchRequest::cancel()
finish();
GD_DPRINTF( "cancel the request" );
qDebug( "cancel the request" );
}
void MediaWikiWordSearchRequest::downloadFinished()
@ -227,7 +224,7 @@ void MediaWikiWordSearchRequest::downloadFinished()
}
}
}
GD_DPRINTF( "done.\n" );
qDebug( "done." );
}
else {
setErrorString( netReply->errorString() );
@ -258,11 +255,11 @@ public:
QDomElement const sectionsElement = parseNode.firstChildElement( "sections" );
if ( sectionsElement.isNull() ) {
gdWarning( "MediaWiki: empty table of contents and missing sections element." );
qWarning( "MediaWiki: empty table of contents and missing sections element." );
return;
}
gdDebug( "MediaWiki: generating table of contents from the sections element." );
qDebug( "MediaWiki: generating table of contents from the sections element." );
MediaWikiSectionsParser parser;
parser.generateTableOfContents( sectionsElement );
articleString.replace( emptyTocPos, emptyTocIndicator.size(), parser.tableOfContents );
@ -343,17 +340,17 @@ bool MediaWikiSectionsParser::addListLevel( QString const & levelString )
int const level = levelString.toInt( &convertedToInt );
if ( !convertedToInt ) {
gdWarning( "MediaWiki: sections level is not an integer: %s", levelString.toUtf8().constData() );
qWarning( "MediaWiki: sections level is not an integer: %s", levelString.toUtf8().constData() );
return false;
}
if ( level <= 0 ) {
gdWarning( "MediaWiki: unsupported nonpositive sections level: %s", levelString.toUtf8().constData() );
qWarning( "MediaWiki: unsupported nonpositive sections level: %s", levelString.toUtf8().constData() );
return false;
}
if ( level > previousLevel + 1 ) {
gdWarning( "MediaWiki: unsupported sections level increase by more than one: from %d to %s",
previousLevel,
levelString.toUtf8().constData() );
qWarning( "MediaWiki: unsupported sections level increase by more than one: from %d to %s",
previousLevel,
levelString.toUtf8().constData() );
return false;
}
@ -396,8 +393,8 @@ class MediaWikiArticleRequest: public MediaWikiDataRequestSlots
public:
MediaWikiArticleRequest( wstring const & word,
vector< wstring > const & alts,
MediaWikiArticleRequest( std::u32string const & word,
vector< std::u32string > const & alts,
QString const & url,
QString const & lang,
QNetworkAccessManager & mgr,
@ -407,7 +404,7 @@ public:
private:
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
void requestFinished( QNetworkReply * ) override;
@ -441,8 +438,8 @@ void MediaWikiArticleRequest::cancel()
finish();
}
MediaWikiArticleRequest::MediaWikiArticleRequest( wstring const & str,
vector< wstring > const & alts,
MediaWikiArticleRequest::MediaWikiArticleRequest( std::u32string const & str,
vector< std::u32string > const & alts,
QString const & url_,
QString const & lang_,
QNetworkAccessManager & mgr,
@ -464,9 +461,9 @@ MediaWikiArticleRequest::MediaWikiArticleRequest( wstring const & str,
}
}
void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const & str )
void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, std::u32string const & str )
{
gdDebug( "MediaWiki: requesting article %s\n", QString::fromStdU32String( str ).toUtf8().data() );
qDebug( "MediaWiki: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() );
QUrl reqUrl( url + "/api.php?action=parse&prop=text|revid|sections&format=xml&redirects" );
@ -490,7 +487,7 @@ void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring con
void MediaWikiArticleRequest::requestFinished( QNetworkReply * r )
{
GD_DPRINTF( "Finished.\n" );
qDebug( "Finished." );
if ( isFinished() ) { // Was cancelled
return;
@ -693,7 +690,7 @@ void MediaWikiArticleRequest::requestFinished( QNetworkReply * r )
}
}
}
GD_DPRINTF( "done.\n" );
qDebug( "done." );
}
else {
setErrorString( netReply->errorString() );
@ -711,7 +708,7 @@ void MediaWikiArticleRequest::requestFinished( QNetworkReply * r )
}
}
sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( wstring const & word, unsigned long maxResults )
sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( std::u32string const & word, unsigned long maxResults )
{
(void)maxResults;
@ -725,8 +722,10 @@ sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( wstring const & word
}
}
sptr< DataRequest >
MediaWikiDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool )
sptr< DataRequest > MediaWikiDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool )
{
if ( word.size() > 80 ) {

View file

@ -4,8 +4,7 @@
#include "programs.hh"
#include "audiolink.hh"
#include "htmlescape.hh"
#include "utf8.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "iconv.hh"
#include "utils.hh"
#include "globalbroadcaster.hh"
@ -36,11 +35,6 @@ public:
return prg.name.toUtf8().data();
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -51,16 +45,17 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const & word, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
protected:
void loadIcon() noexcept override;
};
sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( wstring const & word, unsigned long /*maxResults*/ )
sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( std::u32string const & word, unsigned long /*maxResults*/ )
{
if ( prg.type == Config::Program::PrefixMatch ) {
@ -75,8 +70,10 @@ sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( wstring const & word,
}
}
sptr< Dictionary::DataRequest >
ProgramsDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
sptr< Dictionary::DataRequest > ProgramsDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const &,
std::u32string const &,
bool )
{
switch ( prg.type ) {
@ -84,7 +81,7 @@ ProgramsDictionary::getArticle( wstring const & word, vector< wstring > const &,
// Audio results are instantaneous
string result;
string wordUtf8( Utf8::encode( word ) );
string wordUtf8( Text::toUtf8( word ) );
result += "<table class=\"programs_play\"><tr>";

View file

@ -6,14 +6,13 @@
#include <QProcess>
#include "dictionary.hh"
#include "config.hh"
#include "wstring.hh"
#include "text.hh"
/// Support for arbitrary programs.
namespace Programs {
using std::vector;
using std::string;
using gd::wstring;
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Programs const & );

View file

@ -6,25 +6,18 @@
#include "decompress.hh"
#include "folding.hh"
#include "ftshelpers.hh"
#include "gddebug.hh"
#include "htmlescape.hh"
#include "langcoder.hh"
#include "sdict.hh"
#include "utf8.hh"
#include "text.hh"
#include <map>
#include <QAtomicInt>
#include <QDir>
#include <QRegularExpression>
#include <QSemaphore>
#include <QString>
#include <set>
#include <string>
#include "utils.hh"
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
namespace Sdict {
@ -33,7 +26,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -97,7 +89,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -119,15 +111,6 @@ public:
~SdictDictionary();
string getName() noexcept override
{
return dictionaryName;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
@ -149,8 +132,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
QString const & getDescription() override;
@ -188,19 +173,15 @@ SdictDictionary::SdictDictionary( string const & id,
string const & indexFile,
vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset ),
df( dictionaryFiles[ 0 ], "rb" )
df( dictionaryFiles[ 0 ], QIODevice::ReadOnly )
{
// Read dictionary name
idx.seek( sizeof( idxHeader ) );
vector< char > dName( idx.read< uint32_t >() );
if ( dName.size() > 0 ) {
idx.read( &dName.front(), dName.size() );
dictionaryName = string( &dName.front(), dName.size() );
}
idx.readU32SizeAndData<>( dictionaryName );
// Initialize the index
@ -237,7 +218,7 @@ void SdictDictionary::loadIcon() noexcept
string SdictDictionary::convert( string const & in )
{
// GD_DPRINTF( "Source>>>>>>>>>>: %s\n\n\n", in.c_str() );
// qDebug( "Source>>>>>>>>>>: %s\n\n", in.c_str() );
string inConverted;
@ -388,14 +369,14 @@ void SdictDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "SDict: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "SDict: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "SDict: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "SDict: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( ftsIdxName.c_str() );
}
}
@ -416,7 +397,7 @@ void SdictDictionary::getArticleText( uint32_t articleAddress, QString & headwor
}
}
catch ( std::exception & ex ) {
gdWarning( "SDict: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "SDict: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -436,8 +417,8 @@ SdictDictionary::getSearchResults( QString const & searchString, int searchMode,
class SdictArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
SdictDictionary & dict;
bool ignoreDiacritics;
@ -447,8 +428,8 @@ class SdictArticleRequest: public Dictionary::DataRequest
public:
SdictArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
SdictArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
SdictDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -492,13 +473,13 @@ void SdictArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -527,12 +508,12 @@ void SdictArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -540,7 +521,7 @@ void SdictArticleRequest::run()
articlesIncluded.insert( x.articleOffset );
}
catch ( std::exception & ex ) {
gdWarning( "SDict: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
qWarning( "SDict: Failed loading article from \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
}
}
@ -552,7 +533,7 @@ void SdictArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += dict.isFromLanguageRTL() ? "<h3 dir=\"rtl\">" : "<h3>";
@ -581,9 +562,9 @@ void SdictArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > SdictDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > SdictDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -651,7 +632,7 @@ QString const & SdictDictionary::getDescription()
QObject::tr( "Version: %1%2" ).arg( QString::fromUtf8( str.c_str(), str.size() ) ).arg( "\n\n" );
}
catch ( std::exception & ex ) {
gdWarning( "SDict: Failed description reading for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "SDict: Failed description reading for \"%s\", reason: %s", getName().c_str(), ex.what() );
}
if ( dictionaryDescription.isEmpty() ) {
@ -687,15 +668,15 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
try {
gdDebug( "SDict: Building the index for dictionary: %s\n", fileName.c_str() );
qDebug( "SDict: Building the index for dictionary: %s", fileName.c_str() );
File::Index df( fileName, "rb" );
File::Index df( fileName, QIODevice::ReadOnly );
DCT_header dictHeader;
df.read( &dictHeader, sizeof( dictHeader ) );
if ( strncmp( dictHeader.signature, "sdct", 4 ) ) {
gdWarning( "File \"%s\" is not valid SDictionary file", fileName.c_str() );
qWarning( "File \"%s\" is not valid SDictionary file", fileName.c_str() );
continue;
}
int compression = dictHeader.compression & 0x0F;
@ -722,7 +703,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
initializing.indexingDictionary( dictName );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
@ -761,7 +742,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Insert new entry
indexedWords.addWord( Utf8::decode( string( data.data(), size ) ), articleOffset );
indexedWords.addWord( Text::toUtf32( string( data.data(), size ) ), articleOffset );
pos += el.nextWord;
}
@ -795,11 +776,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
idx.write( &idxHeader, sizeof( idxHeader ) );
}
catch ( std::exception & e ) {
gdWarning( "Sdictionary dictionary indexing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Sdictionary dictionary indexing failed: %s, error: %s", fileName.c_str(), e.what() );
continue;
}
catch ( ... ) {
qWarning( "Sdictionary dictionary indexing failed\n" );
qWarning( "Sdictionary dictionary indexing failed" );
continue;
}
} // if need to rebuild
@ -807,7 +788,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< SdictDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Sdictionary dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Sdictionary dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
}
}
return dictionaries;

View file

@ -6,33 +6,25 @@
#include "btreeidx.hh"
#include "folding.hh"
#include "gddebug.hh"
#include "utf8.hh"
#include "text.hh"
#include "decompress.hh"
#include "langcoder.hh"
#include "wstring_qt.hh"
#include "ftshelpers.hh"
#include "htmlescape.hh"
#include "filetype.hh"
#include "tiff.hh"
#include "utils.hh"
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include "iconv.hh"
#include <QString>
#include <QStringBuilder>
#include <QFile>
#include <QFileInfo>
#include <QDir>
#include <QMap>
#include <QProcess>
#include <QList>
#include <QtEndian>
#include <QRegularExpression>
#include <string>
#include <vector>
#include <utility>
@ -48,7 +40,6 @@ using std::vector;
using std::multimap;
using std::pair;
using std::set;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -97,7 +88,7 @@ struct RefEntry
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -618,16 +609,6 @@ public:
~SlobDictionary();
string getName() noexcept override
{
return dictionaryName;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -648,8 +629,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -702,7 +685,7 @@ private:
SlobDictionary::SlobDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idxFileName( indexFile ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() )
{
// Open data file
@ -871,7 +854,7 @@ void SlobDictionary::loadResource( std::string & resourceName, string & data )
vector< WordArticleLink > link;
RefEntry entry;
link = resourceIndex.findArticles( Utf8::decode( resourceName ) );
link = resourceIndex.findArticles( Text::toUtf32( resourceName ) );
if ( link.empty() ) {
return;
@ -947,7 +930,7 @@ void SlobDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "Slob: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Slob: Building the full-text index for dictionary: %s", getName().c_str() );
try {
const auto slob_dic = std::make_unique< SlobDictionary >( getId(), idxFileName, getDictionaryFilenames() );
@ -955,7 +938,7 @@ void SlobDictionary::makeFTSIndex( QAtomicInt & isCancelled )
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "Slob: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Slob: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( ftsIdxName.c_str() );
}
}
@ -985,7 +968,7 @@ void SlobDictionary::getArticleText( uint32_t articleAddress, QString & headword
}
}
catch ( std::exception & ex ) {
gdWarning( "Slob: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Slob: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -1007,8 +990,8 @@ SlobDictionary::getSearchResults( QString const & searchString, int searchMode,
class SlobArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
SlobDictionary & dict;
bool ignoreDiacritics;
@ -1017,8 +1000,8 @@ class SlobArticleRequest: public Dictionary::DataRequest
public:
SlobArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
SlobArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
SlobDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -1063,13 +1046,13 @@ void SlobArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint64 > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -1102,12 +1085,12 @@ void SlobArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -1123,7 +1106,7 @@ void SlobArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
result += R"(<div class="slobdict"><h3 class="slobdict_headword">)";
@ -1146,9 +1129,9 @@ void SlobArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > SlobDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > SlobDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -1233,10 +1216,10 @@ void SlobResourceRequest::run()
hasAnyData = true;
}
catch ( std::exception & ex ) {
gdWarning( "SLOB: Failed loading resource \"%s\" from \"%s\", reason: %s\n",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
qWarning( "SLOB: Failed loading resource \"%s\" from \"%s\", reason: %s",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
// Resource not loaded -- we don't set the hasAnyData flag then
}
@ -1279,13 +1262,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
SlobFile sf;
gdDebug( "Slob: Building the index for dictionary: %s\n", fileName.c_str() );
qDebug( "Slob: Building the index for dictionary: %s", fileName.c_str() );
sf.open( firstName );
initializing.indexingDictionary( sf.getDictionaryName().toUtf8().constData() );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
@ -1373,11 +1356,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< SlobDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Slob dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Slob dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
continue;
}
catch ( ... ) {
qWarning( "Slob dictionary initializing failed\n" );
qWarning( "Slob dictionary initializing failed" );
continue;
}
}

View file

@ -3,13 +3,12 @@
#include "sounddir.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "btreeidx.hh"
#include "chunkedstorage.hh"
#include "filetype.hh"
#include "htmlescape.hh"
#include "audiolink.hh"
#include "wstring_qt.hh"
#include "utils.hh"
@ -21,7 +20,6 @@
namespace SoundDir {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
@ -51,7 +49,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -61,7 +59,6 @@ bool indexIsOldOrBad( string const & indexFile )
class SoundDirDictionary: public BtreeIndexing::BtreeDictionary
{
string name;
QMutex idxMutex;
File::Index idx;
IdxHeader idxHeader;
@ -76,16 +73,6 @@ public:
vector< string > const & dictionaryFiles,
QString const & iconFilename_ );
string getName() noexcept override
{
return name;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.soundsCount;
@ -96,8 +83,10 @@ public:
return getArticleCount();
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -113,20 +102,21 @@ SoundDirDictionary::SoundDirDictionary( string const & id,
vector< string > const & dictionaryFiles,
QString const & iconFilename_ ):
BtreeDictionary( id, dictionaryFiles ),
name( name_ ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset ),
iconFilename( iconFilename_ )
{
dictionaryName = name_;
// Initialize the index
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
}
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics );
@ -140,13 +130,13 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
}
// maps to the chain number
multimap< wstring, unsigned > mainArticles, alternateArticles;
multimap< std::u32string, unsigned > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -161,12 +151,12 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, unsigned > & mapToUse =
multimap< std::u32string, unsigned > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( chain[ x ].word ), x ) );
@ -180,7 +170,7 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
string result;
multimap< wstring, uint32_t >::const_iterator i;
multimap< std::u32string, uint32_t >::const_iterator i;
string displayedName;
vector< char > chunk;
@ -370,7 +360,7 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getResource( string const &
// Now try loading that file
try {
File::Index f( fileName.toStdString(), "rb" );
File::Index f( fileName.toStdString(), QIODevice::ReadOnly );
sptr< Dictionary::DataRequestInstant > dr = std::make_shared< Dictionary::DataRequestInstant >( true );
@ -409,11 +399,11 @@ void addDir( QDir const & baseDir,
const uint32_t articleOffset = chunks.startNewBlock();
chunks.addToBlock( fileName.c_str(), fileName.size() + 1 );
wstring name = i->fileName().toStdU32String();
std::u32string name = i->fileName().toStdU32String();
const wstring::size_type pos = name.rfind( L'.' );
const std::u32string::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos ) {
if ( pos != std::u32string::npos ) {
name.erase( pos );
}
@ -479,7 +469,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::SoundDirs const &
initializing.indexingDictionary( soundDir.name.toUtf8().data() );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;

View file

@ -4,46 +4,36 @@
#include "stardict.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "dictzip.hh"
#include "xdxf2html.hh"
#include "htmlescape.hh"
#include "langcoder.hh"
#include "gddebug.hh"
#include "filetype.hh"
#include "indexedzip.hh"
#include "tiff.hh"
#include "ftshelpers.hh"
#include "audiolink.hh"
#include <zlib.h>
#include <map>
#include <set>
#include <string>
#include <QString>
#include <QAtomicInt>
#include <QDomDocument>
#include "ufile.hh"
#include "utils.hh"
#include <QRegularExpression>
#include "globalregex.hh"
#include <QDir>
#include <stdlib.h>
#ifndef Q_OS_WIN
#include <arpa/inet.h>
#else
#include <winsock.h>
#endif
#include <stdlib.h>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include <QString>
#include <QSemaphore>
#include <QAtomicInt>
#include <QStringList>
#include <QDomDocument>
#include "ufile.hh"
#include "utils.hh"
#include <QRegularExpression>
#include "globalregex.hh"
namespace Stardict {
@ -52,7 +42,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -76,13 +65,15 @@ DEF_EX_STR( exIncorrectOffset, "Incorrect offset encountered in file", Dictionar
/// Contents of an ifo file
struct Ifo
{
string version;
string bookname;
uint32_t wordcount, synwordcount, idxfilesize, idxoffsetbits;
uint32_t wordcount = 0;
uint32_t synwordcount = 0;
uint32_t idxfilesize = 0;
uint32_t idxoffsetbits = 32;
string sametypesequence, dicttype, description;
string copyright, author, email, website, date;
explicit Ifo( File::Index & );
explicit Ifo( const QString & fileName );
};
enum {
@ -116,7 +107,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -129,9 +120,8 @@ class StardictDictionary: public BtreeIndexing::BtreeDictionary
QMutex idxMutex;
File::Index idx;
IdxHeader idxHeader;
string bookName;
string sameTypeSequence;
ChunkedStorage::Reader chunks;
std::unique_ptr< ChunkedStorage::Reader > chunks;
QMutex dzMutex;
dictData * dz;
QMutex resourceZipMutex;
@ -143,22 +133,6 @@ public:
~StardictDictionary();
string getName() noexcept override
{
return bookName;
}
void setName( string _name ) noexcept override
{
bookName = _name;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.wordCount;
@ -179,10 +153,12 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -235,12 +211,14 @@ StardictDictionary::StardictDictionary( string const & id,
string const & indexFile,
vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idxHeader( idx.read< IdxHeader >() ),
bookName( loadString( idxHeader.bookNameSize ) ),
sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ),
chunks( idx, idxHeader.chunksOffset )
idx( indexFile, QIODevice::ReadOnly )
{
// reading headers, note that reading order matters
idxHeader = idx.read< IdxHeader >();
dictionaryName = loadString( idxHeader.bookNameSize );
sameTypeSequence = loadString( idxHeader.sameTypeSequenceSize );
chunks = std::make_unique< ChunkedStorage::Reader >( idx, idxHeader.chunksOffset );
// Open the .dict file
DZ_ERRORS error;
@ -321,7 +299,7 @@ void StardictDictionary::getArticleProps( uint32_t articleAddress,
QMutexLocker _( &idxMutex );
char * articleData = chunks.getBlock( articleAddress, chunk );
char * articleData = chunks->getBlock( articleAddress, chunk );
memcpy( &offset, articleData, sizeof( uint32_t ) );
articleData += sizeof( uint32_t );
@ -950,9 +928,7 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
entrySize = size;
}
else if ( !size ) {
gdWarning( "Stardict: short entry for the word %s encountered in \"%s\".\n",
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: short entry for the word %s encountered in \"%s\".", headword.c_str(), getName().c_str() );
break;
}
@ -965,9 +941,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
}
if ( size < entrySize ) {
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
headword.c_str(),
getName().c_str() );
break;
}
@ -985,9 +961,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
if ( !entrySizeKnown ) {
if ( size < sizeof( uint32_t ) ) {
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
headword.c_str(),
getName().c_str() );
break;
}
@ -1000,9 +976,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
}
if ( size < entrySize ) {
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
headword.c_str(),
getName().c_str() );
break;
}
@ -1012,10 +988,10 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
size -= entrySize;
}
else {
gdWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".\n",
type,
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".",
type,
headword.c_str(),
getName().c_str() );
break;
}
}
@ -1028,9 +1004,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
size_t len = strlen( ptr + 1 );
if ( size < len + 2 ) {
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
headword.c_str(),
getName().c_str() );
break;
}
@ -1042,9 +1018,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
else if ( isupper( *ptr ) ) {
// An entry which havs its size before contents
if ( size < sizeof( uint32_t ) + 1 ) {
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
headword.c_str(),
getName().c_str() );
break;
}
@ -1055,9 +1031,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
entrySize = ntohl( entrySize );
if ( size < sizeof( uint32_t ) + 1 + entrySize ) {
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
headword.c_str(),
getName().c_str() );
break;
}
@ -1067,10 +1043,10 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
size -= sizeof( uint32_t ) + 1 + entrySize;
}
else {
gdWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".\n",
(unsigned)*ptr,
headword.c_str(),
getName().c_str() );
qWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".",
(unsigned)*ptr,
headword.c_str(),
getName().c_str() );
break;
}
}
@ -1085,40 +1061,36 @@ QString const & StardictDictionary::getDescription()
return dictionaryDescription;
}
File::Index ifoFile( getDictionaryFilenames()[ 0 ], "r" );
Ifo ifo( ifoFile );
Ifo ifo( QString::fromStdString( getDictionaryFilenames()[ 0 ] ) );
if ( !ifo.copyright.empty() ) {
QString copyright = QString::fromUtf8( ifo.copyright.c_str() ).replace( "<br>", "\n", Qt::CaseInsensitive );
dictionaryDescription += QObject::tr( "Copyright: %1%2" ).arg( copyright ).arg( "\n\n" );
QString copyright = QString::fromUtf8( ifo.copyright.c_str() );
dictionaryDescription += QObject::tr( "Copyright: %1%2" ).arg( copyright ).arg( "<br><br>" );
}
if ( !ifo.author.empty() ) {
QString author = QString::fromUtf8( ifo.author.c_str() );
dictionaryDescription += QObject::tr( "Author: %1%2" ).arg( author ).arg( "\n\n" );
dictionaryDescription += QObject::tr( "Author: %1%2" ).arg( author ).arg( "<br><br>" );
}
if ( !ifo.email.empty() ) {
QString email = QString::fromUtf8( ifo.email.c_str() );
dictionaryDescription += QObject::tr( "E-mail: %1%2" ).arg( email ).arg( "\n\n" );
dictionaryDescription += QObject::tr( "E-mail: %1%2" ).arg( email ).arg( "<br><br>" );
}
if ( !ifo.website.empty() ) {
QString website = QString::fromUtf8( ifo.website.c_str() );
dictionaryDescription += QObject::tr( "Website: %1%2" ).arg( website ).arg( "\n\n" );
dictionaryDescription += QObject::tr( "Website: %1%2" ).arg( website ).arg( "<br><br>" );
}
if ( !ifo.date.empty() ) {
QString date = QString::fromUtf8( ifo.date.c_str() );
dictionaryDescription += QObject::tr( "Date: %1%2" ).arg( date ).arg( "\n\n" );
dictionaryDescription += QObject::tr( "Date: %1%2" ).arg( date ).arg( "<br><br>" );
}
if ( !ifo.description.empty() ) {
QString desc = QString::fromUtf8( ifo.description.c_str() );
desc.replace( "\t", "<br/>" );
desc.replace( "\\n", "<br/>" );
desc.replace( "<br>", "<br/>", Qt::CaseInsensitive );
dictionaryDescription += Html::unescape( desc, Html::HtmlOption::Keep );
dictionaryDescription += desc;
}
if ( dictionaryDescription.isEmpty() ) {
@ -1149,16 +1121,14 @@ void StardictDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "Stardict: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Stardict: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "Stardict: Failed building full-text search index for \"%s\", reason: %s\n",
getName().c_str(),
ex.what() );
qWarning( "Stardict: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( ftsIdxName.c_str() );
}
}
@ -1174,7 +1144,7 @@ void StardictDictionary::getArticleText( uint32_t articleAddress, QString & head
text = Html::unescape( QString::fromStdString( articleStr ) );
}
catch ( std::exception & ex ) {
gdWarning( "Stardict: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Stardict: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -1195,7 +1165,7 @@ sptr< Dictionary::DataRequest > StardictDictionary::getSearchResults( QString co
class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
{
wstring word;
std::u32string word;
StardictDictionary & dict;
QAtomicInt isCancelled;
@ -1203,7 +1173,7 @@ class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
public:
StardictHeadwordsRequest( wstring const & word_, StardictDictionary & dict_ ):
StardictHeadwordsRequest( std::u32string const & word_, StardictDictionary & dict_ ):
word( word_ ),
dict( dict_ )
{
@ -1238,7 +1208,7 @@ void StardictHeadwordsRequest::run()
//limited the synomys to at most 10 entries
vector< WordArticleLink > chain = dict.findArticles( word, false, 10 );
wstring caseFolded = Folding::applySimpleCaseOnly( word );
std::u32string caseFolded = Folding::applySimpleCaseOnly( word );
for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
@ -1250,7 +1220,7 @@ void StardictHeadwordsRequest::run()
dict.loadArticle( x.articleOffset, headword, articleText );
wstring headwordDecoded = Utf8::decode( headword );
std::u32string headwordDecoded = Text::toUtf32( headword );
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) {
// The headword seems to differ from the input word, which makes the
@ -1268,7 +1238,7 @@ void StardictHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( std::u32string const & word )
{
return synonymSearchEnabled ? std::make_shared< StardictHeadwordsRequest >( word, *this ) :
Class::findHeadwordsForSynonym( word );
@ -1281,8 +1251,8 @@ sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynony
class StardictArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
StardictDictionary & dict;
bool ignoreDiacritics;
@ -1292,8 +1262,8 @@ class StardictArticleRequest: public Dictionary::DataRequest
public:
StardictArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
StardictArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
StardictDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -1343,13 +1313,13 @@ void StardictArticleRequest::run()
}
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -1376,12 +1346,12 @@ void StardictArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -1397,7 +1367,7 @@ void StardictArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner();
@ -1440,9 +1410,9 @@ void StardictArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > StardictDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > StardictDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -1457,85 +1427,77 @@ static char const * beginsWith( char const * substr, char const * str )
return strncmp( str, substr, len ) == 0 ? str + len : 0;
}
Ifo::Ifo( File::Index & f ):
wordcount( 0 ),
synwordcount( 0 ),
idxfilesize( 0 ),
idxoffsetbits( 32 )
Ifo::Ifo( const QString & fileName )
{
static string const versionEq( "version=" );
QFile f( fileName );
if ( !f.open( QIODevice::ReadOnly ) ) {
throw exCantReadFile( "Cannot open IFO file -> " + fileName.toStdString() );
};
static string const booknameEq( "bookname=" );
//GD_DPRINTF( "%s<\n", f.gets().c_str() );
//GD_DPRINTF( "%s<\n", f.gets().c_str() );
if ( QString::fromUtf8( f.gets().c_str() ) != "StarDict's dict ifo file"
|| f.gets().compare( 0, versionEq.size(), versionEq ) ) {
if ( !f.readLine().startsWith( "StarDict's dict ifo file" ) || !f.readLine().startsWith( "version=" ) ) {
throw exNotAnIfoFile();
}
/// Now go through the file and parse options
{
while ( !f.atEnd() ) {
auto line = f.readLine();
auto option = QByteArrayView( line ).trimmed();
// Empty lines are allowed in .ifo file
try {
char option[ 16384 ];
for ( ;; ) {
if ( !f.gets( option, sizeof( option ), true ) ) {
break;
if ( option.isEmpty() ) {
continue;
}
if ( char const * val = beginsWith( "bookname=", option ) ) {
if ( char const * val = beginsWith( "bookname=", option.data() ) ) {
bookname = val;
}
else if ( char const * val = beginsWith( "wordcount=", option ) ) {
else if ( char const * val = beginsWith( "wordcount=", option.data() ) ) {
if ( sscanf( val, "%u", &wordcount ) != 1 ) {
throw exBadFieldInIfo( option );
throw exBadFieldInIfo( option.data() );
}
}
else if ( char const * val = beginsWith( "synwordcount=", option ) ) {
else if ( char const * val = beginsWith( "synwordcount=", option.data() ) ) {
if ( sscanf( val, "%u", &synwordcount ) != 1 ) {
throw exBadFieldInIfo( option );
throw exBadFieldInIfo( option.data() );
}
}
else if ( char const * val = beginsWith( "idxfilesize=", option ) ) {
else if ( char const * val = beginsWith( "idxfilesize=", option.data() ) ) {
if ( sscanf( val, "%u", &idxfilesize ) != 1 ) {
throw exBadFieldInIfo( option );
throw exBadFieldInIfo( option.data() );
}
}
else if ( char const * val = beginsWith( "idxoffsetbits=", option ) ) {
else if ( char const * val = beginsWith( "idxoffsetbits=", option.data() ) ) {
if ( sscanf( val, "%u", &idxoffsetbits ) != 1 || ( idxoffsetbits != 32 && idxoffsetbits != 64 ) ) {
throw exBadFieldInIfo( option );
throw exBadFieldInIfo( option.data() );
}
}
else if ( char const * val = beginsWith( "sametypesequence=", option ) ) {
else if ( char const * val = beginsWith( "sametypesequence=", option.data() ) ) {
sametypesequence = val;
}
else if ( char const * val = beginsWith( "dicttype=", option ) ) {
else if ( char const * val = beginsWith( "dicttype=", option.data() ) ) {
dicttype = val;
}
else if ( char const * val = beginsWith( "description=", option ) ) {
else if ( char const * val = beginsWith( "description=", option.data() ) ) {
description = val;
}
else if ( char const * val = beginsWith( "copyright=", option ) ) {
else if ( char const * val = beginsWith( "copyright=", option.data() ) ) {
copyright = val;
}
else if ( char const * val = beginsWith( "author=", option ) ) {
else if ( char const * val = beginsWith( "author=", option.data() ) ) {
author = val;
}
else if ( char const * val = beginsWith( "email=", option ) ) {
else if ( char const * val = beginsWith( "email=", option.data() ) ) {
email = val;
}
else if ( char const * val = beginsWith( "website=", option ) ) {
else if ( char const * val = beginsWith( "website=", option.data() ) ) {
website = val;
}
else if ( char const * val = beginsWith( "date=", option ) ) {
else if ( char const * val = beginsWith( "date=", option.data() ) ) {
date = val;
}
}
}
catch ( File::exReadError & ) {
}
}
//// StardictDictionary::getResource()
@ -1595,7 +1557,7 @@ void StardictResourceRequest::run()
string n =
dict.getContainingFolder().toStdString() + Utils::Fs::separator() + "res" + Utils::Fs::separator() + resourceName;
GD_DPRINTF( "startdict resource name is %s\n", n.c_str() );
qDebug( "startdict resource name is %s", n.c_str() );
try {
QMutexLocker _( &dataMutex );
@ -1608,7 +1570,7 @@ void StardictResourceRequest::run()
if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive
}
}
@ -1672,10 +1634,10 @@ void StardictResourceRequest::run()
hasAnyData = true;
}
catch ( std::exception & ex ) {
gdWarning( "Stardict: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
qWarning( "Stardict: Failed loading resource \"%s\" for \"%s\", reason: %s",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
// Resource not loaded -- we don't set the hasAnyData flag then
}
catch ( ... ) {
@ -1757,7 +1719,7 @@ static void handleIdxSynFile( string const & fileName,
size_t wordLen = strlen( ptr );
if ( ptr + wordLen + 1 + ( isSynFile ? sizeof( uint32_t ) : sizeof( uint32_t ) * 2 ) > &image.back() ) {
GD_FDPRINTF( stderr, "Warning: sudden end of file %s\n", fileName.c_str() );
qWarning( "Warning: sudden end of file %s", fileName.c_str() );
break;
}
@ -1840,14 +1802,14 @@ static void handleIdxSynFile( string const & fileName,
// Insert new entry into an index
if ( parseHeadwords ) {
indexedWords.addWord( Utf8::decode( word ), offset );
indexedWords.addWord( Text::toUtf32( word ), offset );
}
else {
indexedWords.addSingleWord( Utf8::decode( word ), offset );
indexedWords.addSingleWord( Text::toUtf32( word ), offset );
}
}
GD_DPRINTF( "%u entires made\n", (unsigned)indexedWords.size() );
qDebug( "%u entires made", (unsigned)indexedWords.size() );
}
@ -1897,11 +1859,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
// Building the index
File::Index ifoFile( fileName, "r" );
Ifo ifo( QString::fromStdString( fileName ) );
Ifo ifo( ifoFile );
gdDebug( "Stardict: Building the index for dictionary: %s\n", ifo.bookname.c_str() );
qDebug( "Stardict: Building the index for dictionary: %s", ifo.bookname.c_str() );
if ( ifo.idxoffsetbits == 64 ) {
throw ex64BitsNotSupported();
@ -1913,24 +1873,24 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( synFileName.empty() ) {
if ( ifo.synwordcount ) {
GD_DPRINTF(
qDebug(
"Warning: dictionary has synwordcount specified, but no "
"corresponding .syn file was found\n" );
ifo.synwordcount = 0; // Pretend it wasn't there
}
}
else if ( !ifo.synwordcount ) {
GD_DPRINTF( "Warning: ignoring .syn file %s, since there's no synwordcount in .ifo specified\n",
synFileName.c_str() );
qDebug( "Warning: ignoring .syn file %s, since there's no synwordcount in .ifo specified",
synFileName.c_str() );
}
GD_DPRINTF( "bookname = %s\n", ifo.bookname.c_str() );
GD_DPRINTF( "wordcount = %u\n", ifo.wordcount );
qDebug( "bookname = %s", ifo.bookname.c_str() );
qDebug( "wordcount = %u", ifo.wordcount );
initializing.indexingDictionary( ifo.bookname );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
@ -2011,7 +1971,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// If there was a zip file, index it too
if ( zipFileName.size() ) {
GD_DPRINTF( "Indexing zip file\n" );
qDebug( "Indexing zip file" );
idxHeader.hasZipFile = 1;
@ -2050,7 +2010,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< StardictDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Stardict dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Stardict dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
}
}

View file

@ -0,0 +1 @@
Translit

View file

@ -1,7 +1,7 @@
/* This file is (c) 2013 Maksim Tamkovicz <quendimax@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "belarusiantranslit.hh"
#include "belarusian.hh"
#include "transliteration.hh"
#include <QCoreApplication>

View file

@ -3,7 +3,6 @@
#pragma once
#include <vector>
#include "dictionary.hh"
// Support for Belarusian transliteration

View file

@ -4,16 +4,12 @@
#include "chinese.hh"
#include <stdexcept>
#include <QCoreApplication>
// #ifdef Q_OS_MAC
#include <opencc/opencc.h>
// #endif
// #include <opencc/SimpleConverter.hpp>
#include "folding.hh"
#include "gddebug.hh"
#include "transliteration.hh"
#include "utf8.hh"
#include "text.hh"
namespace Chinese {
namespace ChineseTranslit {
class CharacterConversionDictionary: public Transliteration::BaseTransliterationDictionary
{
@ -31,7 +27,7 @@ public:
QString const & openccConfig );
~CharacterConversionDictionary();
std::vector< gd::wstring > getAlternateWritings( gd::wstring const & ) noexcept override;
std::vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept override;
};
CharacterConversionDictionary::CharacterConversionDictionary( std::string const & id,
@ -45,18 +41,18 @@ CharacterConversionDictionary::CharacterConversionDictionary( std::string const
// #ifdef Q_OS_MAC
converter = opencc_open( openccConfig.toLocal8Bit().constData() );
if ( converter == reinterpret_cast< opencc_t >( -1 ) ) {
gdWarning( "CharacterConversionDictionary: failed to initialize OpenCC from config %s: %s\n",
openccConfig.toLocal8Bit().constData(),
opencc_error() );
qWarning( "CharacterConversionDictionary: failed to initialize OpenCC from config %s: %s",
openccConfig.toLocal8Bit().constData(),
opencc_error() );
}
// #else
// converter = new opencc::SimpleConverter( openccConfig.toLocal8Bit().constData() );
// #endif
}
catch ( std::runtime_error & e ) {
gdWarning( "CharacterConversionDictionary: failed to initialize OpenCC from config %s: %s\n",
openccConfig.toLocal8Bit().constData(),
e.what() );
qWarning( "CharacterConversionDictionary: failed to initialize OpenCC from config %s: %s",
openccConfig.toLocal8Bit().constData(),
e.what() );
}
}
@ -72,15 +68,15 @@ CharacterConversionDictionary::~CharacterConversionDictionary()
// #endif
}
std::vector< gd::wstring > CharacterConversionDictionary::getAlternateWritings( gd::wstring const & str ) noexcept
std::vector< std::u32string > CharacterConversionDictionary::getAlternateWritings( std::u32string const & str ) noexcept
{
std::vector< gd::wstring > results;
std::vector< std::u32string > results;
if ( converter != NULL ) {
gd::wstring folded = Folding::applySimpleCaseOnly( str );
std::string input = Utf8::encode( folded );
std::u32string folded = Folding::applySimpleCaseOnly( str );
std::string input = Text::toUtf8( folded );
std::string output;
gd::wstring result;
std::u32string result;
try {
// #ifdef Q_OS_MAC
@ -91,16 +87,16 @@ std::vector< gd::wstring > CharacterConversionDictionary::getAlternateWritings(
opencc_convert_utf8_free( tmp );
}
else {
gdWarning( "OpenCC: conversion failed %s\n", opencc_error() );
qWarning( "OpenCC: conversion failed %s", opencc_error() );
}
}
// #else
// output = converter->Convert( input );
// #endif
result = Utf8::decode( output );
result = Text::toUtf32( output );
}
catch ( std::exception & ex ) {
gdWarning( "OpenCC: conversion failed %s\n", ex.what() );
qWarning( "OpenCC: conversion failed %s", ex.what() );
}
if ( !result.empty() && result != folded ) {
@ -162,4 +158,4 @@ std::vector< sptr< Dictionary::Class > > makeDictionaries( Config::Chinese const
return result;
}
} // namespace Chinese
} // namespace ChineseTranslit

View file

@ -3,12 +3,10 @@
#pragma once
#include <map>
#include "config.hh"
#include "dictionary.hh"
/// Chinese character conversion support.
namespace Chinese {
namespace ChineseTranslit {
std::vector< sptr< Dictionary::Class > > makeDictionaries( Config::Chinese const & );
}

View file

@ -1,4 +1,4 @@
#include "customtransliteration.hh"
#include "custom.hh"
#include "dictionary.hh"
#include <QCoreApplication>

View file

@ -1,7 +1,6 @@
#pragma once
#include <vector>
#include "transliteration.hh"
// Support for Belarusian transliteration

View file

@ -1,7 +1,7 @@
/* This file is (c) 2010 Jennie Petoumenou <epetoumenou@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "greektranslit.hh"
#include "greek.hh"
#include "transliteration.hh"
#include <QCoreApplication>

View file

@ -1,7 +1,9 @@
#include "romaji.hh"
#include <QCoreApplication>
namespace Romaji {
namespace RomajiTranslit {
using std::vector;
class HepburnHiragana: public Transliteration::Table
{
@ -375,4 +377,4 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & r )
return result;
}
} // namespace Romaji
} // namespace RomajiTranslit

View file

@ -4,12 +4,9 @@
#pragma once
#include "transliteration.hh"
#include "config.hh"
/// Japanese romanization (Romaji) support.
namespace Romaji {
namespace RomajiTranslit {
using std::vector;
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & );
} // namespace Romaji
std::vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & );
} // namespace RomajiTranslit

View file

@ -1,7 +1,7 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "russiantranslit.hh"
#include "russian.hh"
#include "transliteration.hh"
#include <QCoreApplication>

View file

@ -2,13 +2,11 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "transliteration.hh"
#include "utf8.hh"
#include "text.hh"
#include "folding.hh"
#include "gddebug.hh"
namespace Transliteration {
using gd::wchar;
BaseTransliterationDictionary::BaseTransliterationDictionary( string const & id,
string const & name_,
@ -27,11 +25,6 @@ string BaseTransliterationDictionary::getName() noexcept
return name;
}
map< Dictionary::Property, string > BaseTransliterationDictionary::getProperties() noexcept
{
return map< Dictionary::Property, string >();
}
unsigned long BaseTransliterationDictionary::getArticleCount() noexcept
{
return 0;
@ -42,26 +35,30 @@ unsigned long BaseTransliterationDictionary::getWordCount() noexcept
return 0;
}
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( wstring const &, unsigned long )
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( std::u32string const &,
unsigned long )
{
return std::make_shared< Dictionary::WordSearchRequestInstant >();
}
sptr< Dictionary::DataRequest >
BaseTransliterationDictionary::getArticle( wstring const &, vector< wstring > const &, wstring const &, bool )
sptr< Dictionary::DataRequest > BaseTransliterationDictionary::getArticle( std::u32string const &,
vector< std::u32string > const &,
std::u32string const &,
bool )
{
return std::make_shared< Dictionary::DataRequestInstant >( false );
}
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwordsForSynonym( wstring const & str )
sptr< Dictionary::WordSearchRequest >
BaseTransliterationDictionary::findHeadwordsForSynonym( std::u32string const & str )
{
sptr< Dictionary::WordSearchRequestInstant > result = std::make_shared< Dictionary::WordSearchRequestInstant >();
vector< wstring > alts = getAlternateWritings( str );
vector< std::u32string > alts = getAlternateWritings( str );
GD_DPRINTF( "alts = %u\n", (unsigned)alts.size() );
qDebug( "alts = %u", (unsigned)alts.size() );
for ( const auto & alt : alts ) {
result->getMatches().push_back( alt );
@ -73,13 +70,13 @@ sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwor
void Table::ins( char const * from, char const * to )
{
wstring fr = Utf8::decode( std::string( from ) );
std::u32string fr = Text::toUtf32( std::string( from ) );
if ( fr.size() > maxEntrySize ) {
maxEntrySize = fr.size();
}
insert( std::pair< wstring, wstring >( fr, Utf8::decode( std::string( to ) ) ) );
insert( std::pair< std::u32string, std::u32string >( fr, Text::toUtf32( std::string( to ) ) ) );
}
@ -90,12 +87,12 @@ TransliterationDictionary::TransliterationDictionary(
{
}
vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const & str ) noexcept
vector< std::u32string > TransliterationDictionary::getAlternateWritings( std::u32string const & str ) noexcept
{
vector< wstring > results;
vector< std::u32string > results;
wstring result, folded;
wstring const * target;
std::u32string result, folded;
std::u32string const * target;
if ( caseSensitive ) {
// Don't do any transform -- the transliteration is case-sensitive
@ -106,8 +103,8 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
target = &folded;
}
wchar const * ptr = target->c_str();
size_t left = target->size();
char32_t const * ptr = target->c_str();
size_t left = target->size();
Table::const_iterator i;
@ -116,7 +113,7 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
for ( x = table.getMaxEntrySize(); x >= 1; --x ) {
if ( left >= x ) {
i = table.find( wstring( ptr, x ) );
i = table.find( std::u32string( ptr, x ) );
if ( i != table.end() ) {
result.append( i->second );

View file

@ -9,7 +9,6 @@
namespace Transliteration {
using std::map;
using gd::wstring;
using std::string;
using std::vector;
@ -28,24 +27,22 @@ public:
virtual string getName() noexcept;
virtual map< Dictionary::Property, string > getProperties() noexcept;
virtual unsigned long getArticleCount() noexcept;
virtual unsigned long getWordCount() noexcept;
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept = 0;
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept = 0;
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & );
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( std::u32string const &, unsigned long );
virtual sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const &, wstring const &, bool );
getArticle( std::u32string const &, vector< std::u32string > const &, std::u32string const &, bool );
};
class Table: public map< wstring, wstring >
class Table: public map< std::u32string, std::u32string >
{
unsigned maxEntrySize;
@ -79,7 +76,7 @@ public:
TransliterationDictionary(
string const & id, string const & name, QIcon icon, Table const & table, bool caseSensitive = true );
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept;
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept;
};
} // namespace Transliteration

View file

@ -37,50 +37,18 @@ bool tryPossibleZipName( std::string const & name, std::string & copyTo )
void loadFromFile( std::string const & filename, std::vector< char > & data )
{
File::Index f( filename, "rb" );
File::Index f( filename, QIODevice::ReadOnly );
auto size = f.file().size(); // QFile::size() obtains size via statx on Linux
data.resize( size );
f.read( data.data(), size );
}
void Index::open( char const * mode )
{
QFile::OpenMode openMode = QIODevice::Text;
const char * pch = mode;
while ( *pch ) {
switch ( *pch ) {
case 'r':
openMode |= QIODevice::ReadOnly;
break;
case 'w':
openMode |= QIODevice::WriteOnly;
break;
case '+':
openMode &= ~( QIODevice::ReadOnly | QIODevice::WriteOnly );
openMode |= QIODevice::ReadWrite;
break;
case 'a':
openMode |= QIODevice::Append;
break;
case 'b':
openMode &= ~QIODevice::Text;
break;
default:
break;
}
++pch;
}
if ( !f.open( openMode ) ) {
throw exCantOpen( f.fileName().toStdString() + ": " + f.errorString().toUtf8().data() );
}
}
Index::Index( std::string_view filename, char const * mode )
Index::Index( std::string_view filename, QIODevice::OpenMode mode )
{
f.setFileName( QString::fromUtf8( filename.data(), filename.size() ) );
open( mode );
if ( !f.open( mode ) ) {
throw exCantOpen( ( f.fileName() + ": " + f.errorString() ).toStdString() );
}
}
void Index::read( void * buf, qint64 size )

View file

@ -43,7 +43,7 @@ public:
QMutex lock;
// Create QFile Object and open() it.
Index( std::string_view filename, char const * mode );
Index( std::string_view filename, QIODevice::OpenMode mode );
/// QFile::read & QFile::write , but with exception throwing
void read( void * buf, qint64 size );
@ -81,6 +81,18 @@ public:
/// Like the above, but uses its own local internal buffer and strips newlines by default.
std::string gets( bool stripNl = true );
/// Read 32bit as uint, then reading the subsequent data into a container
template< typename T >
void readU32SizeAndData( T & container )
{
uint32_t size = 0;
read( &size, sizeof( uint32_t ) );
if ( size > 0 ) {
container.resize( size );
read( container.data(), size );
}
};
/// export QFile::readall
QByteArray readall();
@ -113,8 +125,6 @@ public:
~Index() noexcept;
private:
// QFile::open but with fopen-like mode settings.
void open( char const * mode );
template< typename T >
void readType( T & value )

View file

@ -4,10 +4,8 @@
#include "indexedzip.hh"
#include "zipfile.hh"
#include <zlib.h>
#include "gddebug.hh"
#include "utf8.hh"
#include "text.hh"
#include "iconv.hh"
#include "wstring_qt.hh"
#include <QtCore5Compat/QTextCodec>
#include <QMutexLocker>
@ -24,7 +22,7 @@ bool IndexedZip::openZipFile( QString const & name )
return zipIsOpen;
}
bool IndexedZip::hasFile( gd::wstring const & name )
bool IndexedZip::hasFile( std::u32string const & name )
{
if ( !zipIsOpen ) {
return false;
@ -35,7 +33,7 @@ bool IndexedZip::hasFile( gd::wstring const & name )
return !links.empty();
}
bool IndexedZip::loadFile( gd::wstring const & name, vector< char > & data )
bool IndexedZip::loadFile( std::u32string const & name, vector< char > & data )
{
if ( !zipIsOpen ) {
return false;
@ -67,7 +65,7 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )
if ( !ZipFile::readLocalHeader( zip, header ) ) {
vector< string > zipFileNames;
zip.getFilenames( zipFileNames );
GD_DPRINTF( "Failed to load header" );
qDebug( "Failed to load header" );
string filename;
if ( zip.getCurrentFile() < zipFileNames.size() ) {
filename = zipFileNames.at( zip.getCurrentFile() );
@ -81,7 +79,7 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )
switch ( header.compressionMethod ) {
case ZipFile::Uncompressed:
GD_DPRINTF( "Uncompressed" );
qDebug( "Uncompressed" );
data.resize( header.uncompressedSize );
return (size_t)zip.read( &data.front(), data.size() ) == data.size();
@ -111,7 +109,7 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )
}
if ( inflate( &stream, Z_FINISH ) != Z_STREAM_END ) {
GD_DPRINTF( "Not zstream end!" );
qDebug( "Not zstream end!" );
data.clear();
@ -156,7 +154,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
while ( ZipFile::readNextEntry( zip, entry ) ) {
if ( entry.compressionMethod == ZipFile::Unsupported ) {
qWarning( "Zip warning: compression method unsupported -- skipping file \"%s\"\n", entry.fileName.data() );
qWarning( "Zip warning: compression method unsupported -- skipping file \"%s\"", entry.fileName.data() );
continue;
}
@ -181,7 +179,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
if ( !hasNonAscii ) {
// Add entry as is
zipFileNames.addSingleWord( Utf8::decode( entry.fileName.data() ), entry.localHeaderOffset );
zipFileNames.addSingleWord( Text::toUtf32( entry.fileName.data() ), entry.localHeaderOffset );
if ( filesCount ) {
*filesCount += 1;
}
@ -193,7 +191,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// Utf8
try {
wstring decoded = Utf8::decode( entry.fileName.constData() );
std::u32string decoded = Text::toUtf32( entry.fileName.constData() );
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
if ( filesCount != 0 && !alreadyCounted ) {
@ -201,12 +199,12 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
alreadyCounted = true;
}
}
catch ( Utf8::exCantDecode & ) {
catch ( Text::exCantDecode & ) {
// Failed to decode
}
if ( !entry.fileNameInUTF8 ) {
wstring nameInSystemLocale;
std::u32string nameInSystemLocale;
// System locale
if ( localeCodec ) {
@ -225,7 +223,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// CP866
try {
wstring decoded = Iconv::toWstring( "CP866", entry.fileName.constData(), entry.fileName.size() );
std::u32string decoded = Iconv::toWstring( "CP866", entry.fileName.constData(), entry.fileName.size() );
if ( nameInSystemLocale != decoded ) {
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
@ -242,7 +240,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
// CP1251
try {
wstring decoded = Iconv::toWstring( "CP1251", entry.fileName.constData(), entry.fileName.size() );
std::u32string decoded = Iconv::toWstring( "CP1251", entry.fileName.constData(), entry.fileName.size() );
if ( nameInSystemLocale != decoded ) {
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );

View file

@ -37,11 +37,11 @@ public:
/// Checks whether the given file exists in the zip file or not.
/// Note that this function is thread-safe, since it does not access zip file.
bool hasFile( gd::wstring const & name );
bool hasFile( std::u32string const & name );
/// Attempts loading the given file into the given vector. Returns true on
/// success, false otherwise.
bool loadFile( gd::wstring const & name, std::vector< char > & );
bool loadFile( std::u32string const & name, std::vector< char > & );
bool loadFile( uint32_t offset, std::vector< char > & );
/// Index compressed files in zip file

View file

@ -1,12 +1,11 @@
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
#include "voiceengines.hh"
#include "audiolink.hh"
#include "htmlescape.hh"
#include "utf8.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include <string>
#include <map>
@ -21,6 +20,7 @@ namespace VoiceEngines {
using namespace Dictionary;
using std::string;
using std::u32string;
using std::map;
inline string toMd5( QByteArray const & b )
@ -47,10 +47,6 @@ public:
return voiceEngine.name.toUtf8().data();
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
@ -62,16 +58,18 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long maxResults ) override;
sptr< WordSearchRequest > prefixMatch( u32string const & word, unsigned long maxResults ) override;
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
sptr< DataRequest >
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ) override;
protected:
void loadIcon() noexcept override;
};
sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ )
sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( u32string const & /*word*/,
unsigned long /*maxResults*/ )
{
WordSearchRequestInstant * sr = new WordSearchRequestInstant();
@ -80,11 +78,11 @@ sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( wstring const & /
}
sptr< Dictionary::DataRequest >
VoiceEnginesDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
VoiceEnginesDictionary::getArticle( u32string const & word, vector< u32string > const &, u32string const &, bool )
{
string result;
string wordUtf8( Utf8::encode( word ) );
string wordUtf8( Text::toUtf8( word ) );
result += "<table class=\"voiceengines_play\"><tr>";
@ -139,4 +137,4 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const
} // namespace VoiceEngines
#endif
#endif

View file

@ -1,20 +1,17 @@
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#pragma once
#ifndef NO_TTS_SUPPORT
#ifdef TTS_SUPPORT
#include "dictionary.hh"
#include "config.hh"
#include "wstring.hh"
#include "text.hh"
#include <QCryptographicHash>
namespace VoiceEngines {
using std::vector;
using std::string;
using gd::wstring;
vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const & voiceEngines );

View file

@ -2,13 +2,11 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "website.hh"
#include "wstring_qt.hh"
#include "utf8.hh"
#include "text.hh"
#include <QUrl>
#include <QTextCodec>
#include <QDir>
#include <QFileInfo>
#include "gddebug.hh"
#include "globalbroadcaster.hh"
#include "fmt/compile.h"
@ -22,7 +20,6 @@ namespace {
class WebSiteDictionary: public Dictionary::Class
{
string name;
QByteArray urlTemplate;
bool experimentalIframe;
QString iconFilename;
@ -38,12 +35,13 @@ public:
bool inside_iframe_,
QNetworkAccessManager & netMgr_ ):
Dictionary::Class( id, vector< string >() ),
name( name_ ),
iconFilename( iconFilename_ ),
inside_iframe( inside_iframe_ ),
netMgr( netMgr_ ),
experimentalIframe( false )
{
dictionaryName = name_;
if ( urlTemplate_.startsWith( "http://" ) || urlTemplate_.startsWith( "https://" ) ) {
experimentalIframe = true;
}
@ -53,16 +51,6 @@ public:
dictionaryDescription = urlTemplate_;
}
string getName() noexcept override
{
return name;
}
map< Property, string > getProperties() noexcept override
{
return map< Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return 0;
@ -73,10 +61,12 @@ public:
return 0;
}
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long ) override;
sptr< WordSearchRequest > prefixMatch( std::u32string const & word, unsigned long ) override;
sptr< DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const & context, bool ) override;
sptr< DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const & context,
bool ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -101,7 +91,7 @@ protected slots:
virtual void requestFinished( QNetworkReply * ) {}
};
sptr< WordSearchRequest > WebSiteDictionary::prefixMatch( wstring const & /*word*/, unsigned long )
sptr< WordSearchRequest > WebSiteDictionary::prefixMatch( std::u32string const & /*word*/, unsigned long )
{
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
@ -304,9 +294,9 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
}
else {
if ( netReply->url().scheme() == "file" ) {
gdWarning( "WebSites: Failed loading article from \"%s\", reason: %s\n",
dictPtr->getName().c_str(),
netReply->errorString().toUtf8().data() );
qWarning( "WebSites: Failed loading article from \"%s\", reason: %s",
dictPtr->getName().c_str(),
netReply->errorString().toUtf8().data() );
}
else {
setErrorString( netReply->errorString() );
@ -319,9 +309,9 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
finish();
}
sptr< DataRequest > WebSiteDictionary::getArticle( wstring const & str,
vector< wstring > const & /*alts*/,
wstring const & context,
sptr< DataRequest > WebSiteDictionary::getArticle( std::u32string const & str,
vector< std::u32string > const & /*alts*/,
std::u32string const & context,
bool /*ignoreDiacritics*/ )
{
QString urlString = Utils::WebSite::urlReplaceWord( QString( urlTemplate ), QString::fromStdU32String( str ) );
@ -478,7 +468,8 @@ void WebSiteDictionary::loadIcon() noexcept
loadIconFromFile( fInfo.absoluteFilePath(), true );
}
}
if ( dictionaryIcon.isNull() && !loadIconFromText( ":/icons/webdict.svg", QString::fromStdString( name ) ) ) {
if ( dictionaryIcon.isNull()
&& !loadIconFromText( ":/icons/webdict.svg", QString::fromStdString( dictionaryName ) ) ) {
dictionaryIcon = QIcon( ":/icons/webdict.svg" );
}
dictionaryIconLoaded = true;

View file

@ -4,11 +4,10 @@
#include "xdxf.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "chunkedstorage.hh"
#include "dictzip.hh"
#include "htmlescape.hh"
#include <map>
#include <set>
#include <string>
@ -16,30 +15,19 @@
#include <list>
#include <wctype.h>
#include <stdlib.h>
#include "gddebug.hh"
#include "wstring_qt.hh"
#include "xdxf2html.hh"
#include "ufile.hh"
#include "dictzip.hh"
#include "langcoder.hh"
#include "indexedzip.hh"
#include "filetype.hh"
#include "tiff.hh"
#include "ftshelpers.hh"
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include <QIODevice>
#include <QXmlStreamReader>
#include <QTextDocument>
#include <QFileInfo>
#include <QDir>
#include <QPainter>
#include <QRegularExpression>
#include <QSemaphore>
#include <QThreadPool>
#include <QAtomicInt>
#include "utils.hh"
@ -51,7 +39,6 @@ using std::multimap;
using std::pair;
using std::set;
using std::string;
using gd::wstring;
using std::vector;
using std::list;
@ -125,7 +112,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -152,16 +139,6 @@ public:
~XdxfDictionary();
string getName() noexcept override
{
return dictionaryName;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
return idxHeader.articleCount;
@ -182,8 +159,10 @@ public:
return idxHeader.langTo;
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -228,7 +207,7 @@ private:
XdxfDictionary::XdxfDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() )
{
// Read the dictionary name
@ -392,14 +371,14 @@ void XdxfDictionary::makeFTSIndex( QAtomicInt & isCancelled )
}
gdDebug( "Xdxf: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Xdxf: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "Xdxf: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Xdxf: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( ftsIdxName.c_str() );
}
}
@ -413,7 +392,7 @@ void XdxfDictionary::getArticleText( uint32_t articleAddress, QString & headword
text = Html::unescape( QString::fromStdString( articleStr ) );
}
catch ( std::exception & ex ) {
gdWarning( "Xdxf: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Xdxf: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -433,8 +412,8 @@ XdxfDictionary::getSearchResults( QString const & searchString, int searchMode,
class XdxfArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
std::u32string word;
vector< std::u32string > alts;
XdxfDictionary & dict;
bool ignoreDiacritics;
@ -443,8 +422,8 @@ class XdxfArticleRequest: public Dictionary::DataRequest
public:
XdxfArticleRequest( wstring const & word_,
vector< wstring > const & alts_,
XdxfArticleRequest( std::u32string const & word_,
vector< std::u32string > const & alts_,
XdxfDictionary & dict_,
bool ignoreDiacritics_ ):
word( word_ ),
@ -489,13 +468,13 @@ void XdxfArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -524,12 +503,12 @@ void XdxfArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< std::u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -537,7 +516,7 @@ void XdxfArticleRequest::run()
articlesIncluded.insert( x.articleOffset );
}
catch ( std::exception & ex ) {
gdWarning( "XDXF: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
qWarning( "XDXF: Failed loading article from \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
}
}
@ -549,7 +528,7 @@ void XdxfArticleRequest::run()
string result;
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< std::u32string, pair< string, string > >::const_iterator i;
string cleaner = Utils::Html::getHtmlCleaner();
@ -576,9 +555,9 @@ void XdxfArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > XdxfDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > XdxfDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -878,7 +857,7 @@ void indexArticle( GzippedFile & gzFile,
if ( words.empty() ) {
// Nothing to index, this article didn't have any tags
gdWarning( "No <k> tags found in an article at offset 0x%x, article skipped.\n", (unsigned)articleOffset );
qWarning( "No <k> tags found in an article at offset 0x%x, article skipped.", (unsigned)articleOffset );
}
else {
// Add an entry
@ -898,7 +877,7 @@ void indexArticle( GzippedFile & gzFile,
// Add also first header - it's needed for full-text search
chunks.addToBlock( words.begin()->toUtf8().data(), words.begin()->toUtf8().length() + 1 );
// GD_DPRINTF( "%x: %s\n", articleOffset, words.begin()->toUtf8().data() );
// qDebug( "%x: %s", articleOffset, words.begin()->toUtf8().data() );
// Add words to index
@ -973,7 +952,7 @@ void XdxfResourceRequest::run()
string n = dict.getContainingFolder().toStdString() + Utils::Fs::separator() + resourceName;
GD_DPRINTF( "xdxf resource name is %s\n", n.c_str() );
qDebug( "xdxf resource name is %s", n.c_str() );
try {
try {
@ -995,7 +974,7 @@ void XdxfResourceRequest::run()
if ( dict.resourceZip.isOpen() ) {
QMutexLocker _( &dataMutex );
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
throw; // Make it fail since we couldn't read the archive
}
}
@ -1016,10 +995,10 @@ void XdxfResourceRequest::run()
hasAnyData = true;
}
catch ( std::exception & ex ) {
gdWarning( "XDXF: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
qWarning( "XDXF: Failed loading resource \"%s\" for \"%s\", reason: %s",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
// Resource not loaded -- we don't set the hasAnyData flag then
}
@ -1073,11 +1052,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
// Building the index
gdDebug( "Xdxf: Building the index for dictionary: %s\n", fileName.c_str() );
qDebug( "Xdxf: Building the index for dictionary: %s", fileName.c_str() );
//initializing.indexingDictionary( nameFromFileName( dictFiles[ 0 ] ) );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
map< string, string > abrv;
@ -1162,7 +1141,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
chunks.addToBlock( n.data(), n.size() );
}
else {
GD_DPRINTF( "Warning: duplicate full_name in %s\n", dictFiles[ 0 ].c_str() );
qDebug( "Warning: duplicate full_name in %s", dictFiles[ 0 ].c_str() );
}
}
else if ( stream.name() == u"description" ) {
@ -1186,7 +1165,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
chunks.addToBlock( n.data(), n.size() );
}
else {
GD_DPRINTF( "Warning: duplicate description in %s\n", dictFiles[ 0 ].c_str() );
qDebug( "Warning: duplicate description in %s", dictFiles[ 0 ].c_str() );
}
}
else if ( stream.name() == u"languages" ) {
@ -1216,7 +1195,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
else if ( stream.name() == u"abbreviations" ) {
QString s;
string value;
list< wstring > keys;
list< std::u32string > keys;
while ( !( stream.isEndElement() && stream.name() == u"abbreviations" ) && !stream.atEnd() ) {
if ( !stream.readNextStartElement() ) {
break;
@ -1232,7 +1211,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
s = readElementText( stream );
value = Folding::trimWhitespace( s ).toStdString();
for ( const auto & key : keys ) {
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
}
keys.clear();
}
@ -1252,7 +1231,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
s = readElementText( stream );
value = Folding::trimWhitespace( s ).toStdString();
for ( const auto & key : keys ) {
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
}
keys.clear();
}
@ -1312,7 +1291,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// If there was a zip file, index it too
if ( zipFileName.size() ) {
GD_DPRINTF( "Indexing zip file\n" );
qDebug( "Indexing zip file" );
idxHeader.hasZipFile = 1;
@ -1363,17 +1342,17 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
if ( stream.hasError() ) {
gdWarning( "%s had a parse error %s at line %lu, and therefore was indexed only up to the point of error.",
dictFiles[ 0 ].c_str(),
stream.errorString().toUtf8().data(),
(unsigned long)stream.lineNumber() );
qWarning( "%s had a parse error %s at line %lu, and therefore was indexed only up to the point of error.",
dictFiles[ 0 ].c_str(),
stream.errorString().toUtf8().data(),
(unsigned long)stream.lineNumber() );
}
}
dictionaries.push_back( std::make_shared< XdxfDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Xdxf dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Xdxf dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
}
}

View file

@ -3,9 +3,7 @@
#include "xdxf2html.hh"
#include <QtXml>
#include "gddebug.hh"
#include "utf8.hh"
#include "wstring_qt.hh"
#include "text.hh"
#include "folding.hh"
#include "audiolink.hh"
@ -131,21 +129,21 @@ string convert( string const & in,
#if ( QT_VERSION < QT_VERSION_CHECK( 6, 5, 0 ) )
if ( !dd.setContent( QByteArray( in_data.c_str() ), false, &errorStr, &errorLine, &errorColumn ) ) {
qWarning( "Xdxf2html error, xml parse failed: %s at %d,%d\n",
qWarning( "Xdxf2html error, xml parse failed: %s at %d,%d",
errorStr.toLocal8Bit().constData(),
errorLine,
errorColumn );
gdWarning( "The input was: %s\n", in_data.c_str() );
qWarning( "The input was: %s", in_data.c_str() );
return in;
}
#else
auto setContentResult = dd.setContent( QByteArray::fromStdString( in_data ) );
if ( !setContentResult ) {
qWarning( "Xdxf2html error, xml parse failed: %s at %lld,%lld\n",
qWarning( "Xdxf2html error, xml parse failed: %s at %lld,%lld",
setContentResult.errorMessage.toStdString().c_str(),
setContentResult.errorLine,
setContentResult.errorColumn );
gdWarning( "The input was: %s\n", in_data.c_str() );
qWarning( "The input was: %s", in_data.c_str() );
return in;
}
#endif
@ -443,7 +441,7 @@ string convert( string const & in,
if ( i != pAbrv->end() ) {
string title;
if ( Utf8::decode( i->second ).size() < 70 ) {
if ( Text::toUtf32( i->second ).size() < 70 ) {
// Replace all spaces with non-breakable ones, since that's how Lingvo shows tooltips
title.reserve( i->second.size() );
@ -467,7 +465,7 @@ string convert( string const & in,
else {
title = i->second;
}
el.setAttribute( "title", QString::fromStdU32String( Utf8::decode( title ) ) );
el.setAttribute( "title", QString::fromStdU32String( Text::toUtf32( title ) ) );
}
}
}
@ -629,7 +627,7 @@ string convert( string const & in,
// if( type == XDXF && dictPtr != NULL && !el.hasAttribute( "start" ) )
if ( dictPtr != NULL && !el.hasAttribute( "start" ) ) {
string filename = Utf8::encode( el.text().toStdU32String() );
string filename = Text::toUtf8( el.text().toStdU32String() );
if ( Filetype::isNameOfPicture( filename ) ) {
QUrl url;

View file

@ -5,10 +5,8 @@
#include "zim.hh"
#include "btreeidx.hh"
#include "folding.hh"
#include "gddebug.hh"
#include "utf8.hh"
#include "text.hh"
#include "langcoder.hh"
#include "filetype.hh"
#include "dictfile.hh"
@ -17,24 +15,18 @@
#include "ftshelpers.hh"
#include "htmlescape.hh"
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include <QByteArray>
#include <QFile>
#include <QString>
#include <QAtomicInt>
#include <QImage>
#include <QDir>
#include <QRegularExpression>
#include <string>
#include <set>
#include <map>
#include <algorithm>
#include <QtConcurrent>
#include <QtConcurrentRun>
#include <utility>
#include "globalregex.hh"
#include <zim/zim.h>
@ -46,12 +38,12 @@
namespace Zim {
using std::string;
using std::u32string;
using std::map;
using std::vector;
using std::multimap;
using std::pair;
using std::set;
using gd::wstring;
using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
@ -93,7 +85,7 @@ static_assert( alignof( IdxHeader ) == 1 );
// Some supporting functions
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -168,15 +160,6 @@ public:
~ZimDictionary() = default;
string getName() noexcept override
{
return dictionaryName;
}
map< Dictionary::Property, string > getProperties() noexcept override
{
return {};
}
unsigned long getArticleCount() noexcept override
{
@ -199,7 +182,7 @@ public:
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -241,7 +224,7 @@ private:
ZimDictionary::ZimDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() ),
df( dictionaryFiles[ 0 ] )
{
@ -295,7 +278,7 @@ void ZimDictionary::loadIcon() noexcept
return;
}
catch ( zim::EntryNotFound & e ) {
gdDebug( "ZIM icon not loaded for: %s", dictionaryName.c_str() );
qDebug( "ZIM icon not loaded for: %s", dictionaryName.c_str() );
}
}
@ -497,13 +480,13 @@ void ZimDictionary::makeFTSIndex( QAtomicInt & isCancelled )
return;
}
gdDebug( "Zim: Building the full-text index for dictionary: %s\n", getName().c_str() );
qDebug( "Zim: Building the full-text index for dictionary: %s", getName().c_str() );
try {
FtsHelpers::makeFTSIndex( this, isCancelled );
FTS_index_completed.ref();
}
catch ( std::exception & ex ) {
gdWarning( "Zim: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Zim: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
QFile::remove( ftsIdxName.c_str() );
}
}
@ -518,7 +501,7 @@ void ZimDictionary::getArticleText( uint32_t articleAddress, QString & headword,
text = Html::unescape( QString::fromUtf8( articleText.data(), articleText.size() ) );
}
catch ( std::exception & ex ) {
gdWarning( "Zim: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
qWarning( "Zim: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
}
}
@ -536,8 +519,8 @@ ZimDictionary::getSearchResults( QString const & searchString, int searchMode, b
class ZimArticleRequest: public Dictionary::DataRequest
{
wstring word;
vector< wstring > alts;
u32string word;
vector< u32string > alts;
ZimDictionary & dict;
bool ignoreDiacritics;
@ -546,7 +529,10 @@ class ZimArticleRequest: public Dictionary::DataRequest
public:
ZimArticleRequest( wstring word_, vector< wstring > const & alts_, ZimDictionary & dict_, bool ignoreDiacritics_ ):
ZimArticleRequest( u32string word_,
vector< u32string > const & alts_,
ZimDictionary & dict_,
bool ignoreDiacritics_ ):
word( std::move( word_ ) ),
alts( alts_ ),
dict( dict_ ),
@ -588,13 +574,13 @@ void ZimArticleRequest::run()
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
multimap< u32string, pair< string, string > > mainArticles, alternateArticles;
set< quint32 > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -631,12 +617,12 @@ void ZimArticleRequest::run()
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, pair< string, string > > & mapToUse =
multimap< u32string, pair< string, string > > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
@ -655,7 +641,7 @@ void ZimArticleRequest::run()
// See Issue #271: A mechanism to clean-up invalid HTML cards.
string cleaner = Utils::Html::getHtmlCleaner();
multimap< wstring, pair< string, string > >::const_iterator i;
multimap< u32string, pair< string, string > >::const_iterator i;
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
@ -683,9 +669,9 @@ void ZimArticleRequest::run()
finish();
}
sptr< Dictionary::DataRequest > ZimDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > ZimDictionary::getArticle( u32string const & word,
vector< u32string > const & alts,
u32string const &,
bool ignoreDiacritics )
{
@ -767,10 +753,10 @@ void ZimResourceRequest::run()
hasAnyData = true;
}
catch ( std::exception & ex ) {
gdWarning( "ZIM: Failed loading resource \"%s\" from \"%s\", reason: %s\n",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
qWarning( "ZIM: Failed loading resource \"%s\" from \"%s\", reason: %s",
resourceName.c_str(),
dict.getName().c_str(),
ex.what() );
// Resource not loaded -- we don't set the hasAnyData flag then
}
@ -783,7 +769,7 @@ sptr< Dictionary::DataRequest > ZimDictionary::getResource( string const & name
return std::make_shared< ZimResourceRequest >( *this, noLeadingDot.toStdString() );
}
wstring normalizeWord( const std::string & url );
u32string normalizeWord( const std::string & url );
vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames,
string const & indicesDir,
Dictionary::Initializing & initializing,
@ -822,7 +808,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
try {
//only check zim file.
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
gdDebug( "Zim: Building the index for dictionary: %s\n", fileName.c_str() );
qDebug( "Zim: Building the index for dictionary: %s", fileName.c_str() );
unsigned articleCount = df.getArticleCount();
unsigned wordCount = 0;
@ -832,7 +818,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
initializing.indexingDictionary( firstName.mid( n + 1 ).toUtf8().constData() );
}
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
idxHeader.namePtr = 0xFFFFFFFF;
@ -866,7 +852,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
if ( maxHeadwordsToExpand > 0 && ( articleCount >= maxHeadwordsToExpand ) ) {
if ( !title.empty() ) {
wstring word = Utf8::decode( title );
u32string word = Text::toUtf32( title );
indexedWords.addSingleWord( word, index );
}
else if ( !url.empty() ) {
@ -875,7 +861,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
}
else {
if ( !title.empty() ) {
auto word = Utf8::decode( title );
auto word = Text::toUtf32( title );
indexedWords.addWord( word, index );
wordCount++;
}
@ -910,17 +896,17 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< ZimDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Zim dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Zim dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
continue;
}
catch ( ... ) {
qWarning( "Zim dictionary initializing failed\n" );
qWarning( "Zim dictionary initializing failed" );
continue;
}
}
return dictionaries;
}
wstring normalizeWord( const std::string & url )
u32string normalizeWord( const std::string & url )
{
auto formattedUrl = QString::fromStdString( url ).remove( RX::Zim::leadingDotSlash );
return formattedUrl.toStdU32String();

View file

@ -4,13 +4,12 @@
#include "zipsounds.hh"
#include "dictfile.hh"
#include "folding.hh"
#include "utf8.hh"
#include "text.hh"
#include "btreeidx.hh"
#include "audiolink.hh"
#include "indexedzip.hh"
#include "filetype.hh"
#include "gddebug.hh"
#include "chunkedstorage.hh"
#include "htmlescape.hh"
@ -19,16 +18,12 @@
#include <QFile>
#include <QDir>
#ifdef _MSC_VER
#include <stub_msvc.h>
#endif
#include "utils.hh"
namespace ZipSounds {
using std::string;
using gd::wstring;
using std::map;
using std::multimap;
using std::set;
@ -60,7 +55,7 @@ static_assert( alignof( IdxHeader ) == 1 );
bool indexIsOldOrBad( string const & indexFile )
{
File::Index idx( indexFile, "rb" );
File::Index idx( indexFile, QIODevice::ReadOnly );
IdxHeader header;
@ -68,19 +63,19 @@ bool indexIsOldOrBad( string const & indexFile )
|| header.formatVersion != CurrentFormatVersion;
}
wstring stripExtension( string const & str )
std::u32string stripExtension( string const & str )
{
wstring name;
std::u32string name;
try {
name = Utf8::decode( str );
name = Text::toUtf32( str );
}
catch ( Utf8::exCantDecode & ) {
catch ( Text::exCantDecode & ) {
return name;
}
if ( Filetype::isNameOfSound( str ) ) {
wstring::size_type pos = name.rfind( L'.' );
if ( pos != wstring::npos ) {
std::u32string::size_type pos = name.rfind( L'.' );
if ( pos != std::u32string::npos ) {
name.erase( pos );
}
@ -111,10 +106,6 @@ public:
string getName() noexcept override;
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
}
unsigned long getArticleCount() noexcept override
{
@ -126,8 +117,10 @@ public:
return getArticleCount();
}
sptr< Dictionary::DataRequest >
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics ) override;
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
@ -140,7 +133,7 @@ ZipSoundsDictionary::ZipSoundsDictionary( string const & id,
string const & indexFile,
vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idx( indexFile, QIODevice::ReadOnly ),
idxHeader( idx.read< IdxHeader >() )
{
chunks = std::shared_ptr< ChunkedStorage::Reader >( new ChunkedStorage::Reader( idx, idxHeader.chunksOffset ) );
@ -165,9 +158,9 @@ string ZipSoundsDictionary::getName() noexcept
return result;
}
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const & word,
vector< wstring > const & alts,
wstring const &,
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( std::u32string const & word,
vector< std::u32string > const & alts,
std::u32string const &,
bool ignoreDiacritics )
{
@ -181,13 +174,13 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
multimap< wstring, uint32_t > mainArticles, alternateArticles;
multimap< std::u32string, uint32_t > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
if ( ignoreDiacritics ) {
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
}
@ -202,12 +195,12 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
// We do the case-folded comparison here.
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
if ( ignoreDiacritics ) {
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
}
multimap< wstring, uint32_t > & mapToUse =
multimap< std::u32string, uint32_t > & mapToUse =
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.articleOffset ) );
@ -221,7 +214,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
string result;
multimap< wstring, uint32_t >::const_iterator i;
multimap< std::u32string, uint32_t >::const_iterator i;
result += "<table class=\"lsa_play\">";
@ -252,7 +245,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
nameBlock += sz;
string displayedName =
mainArticles.size() + alternateArticles.size() > 1 ? name : Utf8::encode( stripExtension( name ) );
mainArticles.size() + alternateArticles.size() > 1 ? name : Text::toUtf8( stripExtension( name ) );
result += "<tr>";
@ -294,7 +287,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
nameBlock += sz;
string displayedName =
mainArticles.size() + alternateArticles.size() > 1 ? name : Utf8::encode( stripExtension( name ) );
mainArticles.size() + alternateArticles.size() > 1 ? name : Text::toUtf8( stripExtension( name ) );
result += "<tr>";
@ -324,7 +317,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getResource( string const &
{
// Remove extension for sound files (like in sound dirs)
wstring strippedName = stripExtension( name );
std::u32string strippedName = stripExtension( name );
vector< WordArticleLink > chain = findArticles( strippedName );
@ -403,9 +396,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
string indexFile = indicesDir + dictId;
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
gdDebug( "Zips: Building the index for dictionary: %s\n", fileName.c_str() );
qDebug( "Zips: Building the index for dictionary: %s", fileName.c_str() );
File::Index idx( indexFile, "wb" );
File::Index idx( indexFile, QIODevice::WriteOnly );
IdxHeader idxHeader;
memset( &idxHeader, 0, sizeof( idxHeader ) );
@ -438,7 +431,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Remove extension for sound files (like in sound dirs)
wstring word = stripExtension( link.word );
std::u32string word = stripExtension( link.word );
if ( !word.empty() ) {
names.addWord( word, offset );
}
@ -477,7 +470,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
dictionaries.push_back( std::make_shared< ZipSoundsDictionary >( dictId, indexFile, dictFiles ) );
}
catch ( std::exception & e ) {
gdWarning( "Zipped sounds pack reading failed: %s, error: %s\n", fileName.c_str(), e.what() );
qWarning( "Zipped sounds pack reading failed: %s, error: %s", fileName.c_str(), e.what() );
}
}

View file

@ -4,7 +4,6 @@
#include <QDir>
#include <QTimer>
#include "externalviewer.hh"
#include "gddebug.hh"
ExternalViewer::ExternalViewer(
const char * data, int size, QString const & extension, QString const & viewerCmdLine_, QObject * parent ):
@ -26,7 +25,7 @@ ExternalViewer::ExternalViewer(
tempFile.close();
GD_DPRINTF( "%s\n", tempFile.fileName().toLocal8Bit().data() );
qDebug( "%s", tempFile.fileName().toLocal8Bit().data() );
}
void ExternalViewer::start()

View file

@ -5,9 +5,7 @@
#include <cstdlib>
#include "fulltextsearch.hh"
#include "ftshelpers.hh"
#include "wstring_qt.hh"
#include "dictfile.hh"
#include "gddebug.hh"
#include "folding.hh"
#include "utils.hh"
@ -253,7 +251,7 @@ void FTSResultsRequest::run()
qWarning() << e.get_description().c_str();
}
catch ( std::exception & ex ) {
gdWarning( "FTS: Failed full-text search for \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
qWarning( "FTS: Failed full-text search for \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
// Results not loaded -- we don't set the hasAnyData flag then
}

Some files were not shown because too many files have changed in this diff Show more