mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-12-05 00:24:06 +00:00
Merge pull request #2007 from xiaoyifang/staged
Some checks failed
Release All / build_macOS (macos-13, 6.6.3) (push) Has been cancelled
Release All / build_macOS (macos-13, 6.7.2) (push) Has been cancelled
Release All / build_macOS (macos-14, 6.6.3) (push) Has been cancelled
Release All / build_macOS (macos-14, 6.7.2) (push) Has been cancelled
Release All / build_Windows (windows-2022, 6.6.3) (push) Has been cancelled
Release All / build_Windows (windows-2022, 6.7.2) (push) Has been cancelled
Release All / generate_other_staffs (push) Has been cancelled
Release All / publish (push) Has been cancelled
Some checks failed
Release All / build_macOS (macos-13, 6.6.3) (push) Has been cancelled
Release All / build_macOS (macos-13, 6.7.2) (push) Has been cancelled
Release All / build_macOS (macos-14, 6.6.3) (push) Has been cancelled
Release All / build_macOS (macos-14, 6.7.2) (push) Has been cancelled
Release All / build_Windows (windows-2022, 6.6.3) (push) Has been cancelled
Release All / build_Windows (windows-2022, 6.7.2) (push) Has been cancelled
Release All / generate_other_staffs (push) Has been cancelled
Release All / publish (push) Has been cancelled
daily merge,staged->dev
This commit is contained in:
commit
bed76f366b
|
@ -26,6 +26,7 @@ Checks: >
|
|||
-google-readability-casting,
|
||||
-hicpp-deprecated-headers,
|
||||
-hicpp-no-array-decay,
|
||||
-misc-confusable-identifiers,
|
||||
-misc-const-correctness,
|
||||
-misc-include-cleaner,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
|
@ -33,6 +34,7 @@ Checks: >
|
|||
-modernize-deprecated-headers,
|
||||
-modernize-use-nodiscard,
|
||||
-modernize-use-trailing-return-type,
|
||||
-performance-enum-size,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-identifier-length,
|
||||
-readability-magic-numbers,
|
||||
|
|
|
@ -17,4 +17,10 @@ a11c9e3aeca4329e1982d8fe26bacbb21ab50ddf
|
|||
d15081e723756eef053550dc9e06e31d7828dec3
|
||||
|
||||
# remove gd::toWString
|
||||
c8af0450f1f7f8188004db96e3f53e7e33e2ccad
|
||||
c8af0450f1f7f8188004db96e3f53e7e33e2ccad
|
||||
|
||||
# remove gddebug.hh and associated functions
|
||||
76aaed116bdc3aeb53fd61553aedb877baf9b510
|
||||
|
||||
# wstring & wchar -> std::u32string & char32_t
|
||||
f1e158578f62c96059bef1a616b75495adb6e2c6
|
||||
|
|
9
.github/workflows/PR-check-cmake.yml
vendored
9
.github/workflows/PR-check-cmake.yml
vendored
|
@ -51,6 +51,9 @@ jobs:
|
|||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: false
|
||||
- name: Update brew
|
||||
run: |
|
||||
brew update
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
brew install \
|
||||
|
@ -66,11 +69,11 @@ jobs:
|
|||
xapian \
|
||||
libzim \
|
||||
qt
|
||||
|
||||
wget ftp://ftp.sra.co.jp/pub/misc/eb/eb-4.4.3.tar.bz2
|
||||
- name: Install eb
|
||||
run: |
|
||||
wget https://github.com/mistydemeo/eb/releases/download/v4.4.3/eb-4.4.3.tar.bz2
|
||||
tar xvjf eb-4.4.3.tar.bz2
|
||||
cd eb-4.4.3 && ./configure && make -j 8 && sudo make install && cd ..
|
||||
|
||||
- name: Run build
|
||||
run: |
|
||||
mkdir build_dir
|
||||
|
|
9
.github/workflows/Release-all.yml
vendored
9
.github/workflows/Release-all.yml
vendored
|
@ -26,6 +26,9 @@ jobs:
|
|||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Update brew
|
||||
run: |
|
||||
brew update
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
brew install \
|
||||
|
@ -40,7 +43,8 @@ jobs:
|
|||
ninja \
|
||||
opencc \
|
||||
xapian
|
||||
|
||||
- name: Install eb
|
||||
run: |
|
||||
git clone https://github.com/xiaoyifang/eb.git
|
||||
cd eb && ./configure && make -j 8 && sudo make install && cd ..
|
||||
- uses: jurplel/install-qt-action@v4
|
||||
|
@ -61,6 +65,9 @@ jobs:
|
|||
- name: Package
|
||||
run: |
|
||||
cmake --install build_dir/
|
||||
- name: Print package content
|
||||
run: |
|
||||
ls -Rl ./build_dir/redist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: macOS-${{ matrix.os }}-Qt${{ matrix.qt_ver }}
|
||||
|
|
242
CMakeLists.txt
242
CMakeLists.txt
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.25) # ubuntu 23.04 Fedora 36
|
||||
cmake_minimum_required(VERSION 3.25) # Debian 11 Ubuntu 24.04 Fedora 36
|
||||
|
||||
option(WITH_FFMPEG_PLAYER "Enable support for FFMPEG player" ON)
|
||||
option(WITH_EPWING_SUPPORT "Enable epwing support" ON)
|
||||
|
@ -9,20 +9,12 @@ option(WITH_TTS "enable QTexttoSpeech support" OFF)
|
|||
option(USE_SYSTEM_FMT "use system fmt instead of bundled one" OFF)
|
||||
option(USE_SYSTEM_TOML "use system toml++ instead of bundled one" OFF)
|
||||
|
||||
option(WITH_VCPKG_BREAKPAD "build with Breakpad support for VCPKG build only" OFF)
|
||||
|
||||
## Change binary & resources folder to parallel install with original GD.
|
||||
## This flag should be avoided because it leads to small regressions:
|
||||
## 1. There are personal scripts assuming the binary name to be "goldendict" -> require everyone to change the name in their script
|
||||
## 2. There are icon themes that assuming the icon name to be "goldendict" -> invalidate the GD icon when using a icon theme
|
||||
## 3. There are dictionary packages that install files to "/usr/share/goldendict/content" -> nullify the auto dict discovery
|
||||
option(USE_ALTERNATIVE_NAME "Force the name goldendict-ng " OFF)
|
||||
|
||||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}") # to put staff in the ./cmake folder
|
||||
|
||||
## This should be avoided because of small regressions, as some scripts and icons themes assume the binary name and resources folder to be `goldendict`
|
||||
option(USE_ALTERNATIVE_NAME "For Linux, change the binary name and resource folder to goldendict-ng to parallel install with the original GD" OFF)
|
||||
|
||||
# vcpkg handling code, must be placed before project()
|
||||
if (WIN32)
|
||||
option(WITH_VCPKG_BREAKPAD "build with Breakpad support for VCPKG build only" OFF)
|
||||
if (DEFINED CMAKE_TOOLCHAIN_FILE)
|
||||
message(STATUS "Using toolchain file: ${CMAKE_TOOLCHAIN_FILE}")
|
||||
else ()
|
||||
|
@ -37,11 +29,9 @@ if (WIN32)
|
|||
set(VCPKG_MANIFEST_MODE OFF CACHE BOOL "disable existing manifest mode caused by the existrance of vcpkg.json" FORCE)
|
||||
set(CMAKE_TOOLCHAIN_FILE "${CMAKE_BINARY_DIR}/_deps/vcpkg-export-src/scripts/buildsystems/vcpkg.cmake")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
if (WITH_VCPKG_BREAKPAD)
|
||||
list(APPEND VCPKG_MANIFEST_FEATURES "breakpad")
|
||||
if (WITH_VCPKG_BREAKPAD)
|
||||
list(APPEND VCPKG_MANIFEST_FEATURES "breakpad")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
include(FeatureSummary)
|
||||
|
@ -49,7 +39,7 @@ include(FeatureSummary)
|
|||
project(goldendict-ng
|
||||
VERSION 24.11.0
|
||||
LANGUAGES CXX C)
|
||||
|
||||
|
||||
if (APPLE)
|
||||
enable_language(OBJCXX)
|
||||
set(CMAKE_OBJCXX_STANDARD 17)
|
||||
|
@ -60,13 +50,12 @@ set(CMAKE_CXX_STANDARD 17)
|
|||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(GOLDENDICT "goldendict") # binary/executable name
|
||||
if (USE_ALTERNATIVE_NAME )
|
||||
if (USE_ALTERNATIVE_NAME)
|
||||
set(GOLDENDICT "goldendict-ng")
|
||||
endif ()
|
||||
if (APPLE)
|
||||
set(GOLDENDICT "GoldenDict-ng")
|
||||
endif()
|
||||
|
||||
endif ()
|
||||
|
||||
#### Qt
|
||||
|
||||
|
@ -78,11 +67,10 @@ endif ()
|
|||
|
||||
find_package(Qt6 REQUIRED COMPONENTS ${GD_QT_COMPONENTS})
|
||||
|
||||
qt_standard_project_setup() # availiable after find_package(Qt6 .... Core
|
||||
qt_standard_project_setup()
|
||||
set(CMAKE_AUTORCC ON) # not included in the qt_standard_project_setup
|
||||
|
||||
#### Things required during configuration
|
||||
|
||||
block() # generate version.txt
|
||||
string(TIMESTAMP build_time UTC)
|
||||
find_package(Git)
|
||||
|
@ -163,11 +151,8 @@ target_link_libraries(${GOLDENDICT} PRIVATE
|
|||
Qt6::WebEngineWidgets
|
||||
Qt6::Widgets
|
||||
Qt6::Svg
|
||||
)
|
||||
|
||||
if (WITH_TTS)
|
||||
target_link_libraries(${GOLDENDICT} PRIVATE Qt6::TextToSpeech)
|
||||
endif ()
|
||||
$<$<BOOL:${WITH_TTS}>:Qt6::TextToSpeech>
|
||||
)
|
||||
|
||||
target_include_directories(${GOLDENDICT} PRIVATE
|
||||
${PROJECT_SOURCE_DIR}/thirdparty/qtsingleapplication/src
|
||||
|
@ -176,11 +161,7 @@ target_include_directories(${GOLDENDICT} PRIVATE
|
|||
${PROJECT_SOURCE_DIR}/src/dict
|
||||
${PROJECT_SOURCE_DIR}/src/dict/utils
|
||||
${PROJECT_SOURCE_DIR}/src/ui
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/src/windows)
|
||||
endif ()
|
||||
)
|
||||
|
||||
if (NOT USE_SYSTEM_TOML)
|
||||
target_include_directories(${GOLDENDICT} PRIVATE ${PROJECT_SOURCE_DIR}/thirdparty/tomlplusplus)
|
||||
|
@ -199,45 +180,22 @@ target_compile_definitions(${GOLDENDICT} PRIVATE
|
|||
)
|
||||
|
||||
target_compile_definitions(${GOLDENDICT} PUBLIC
|
||||
CMAKE_USED_HACK # temporal hack to avoid breaking qmake build
|
||||
MAKE_QTMULTIMEDIA_PLAYER
|
||||
MAKE_CHINESE_CONVERSION_SUPPORT
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
target_compile_definitions(${GOLDENDICT} PUBLIC
|
||||
__WIN32
|
||||
INCLUDE_LIBRARY_PATH
|
||||
)
|
||||
endif ()
|
||||
|
||||
if (WITH_FFMPEG_PLAYER)
|
||||
target_compile_definitions(${GOLDENDICT} PUBLIC MAKE_FFMPEG_PLAYER)
|
||||
endif ()
|
||||
|
||||
if(NOT WITH_TTS)
|
||||
target_compile_definitions(${GOLDENDICT} PUBLIC NO_TTS_SUPPORT)
|
||||
endif()
|
||||
|
||||
|
||||
if (NOT WITH_EPWING_SUPPORT)
|
||||
target_compile_definitions(${GOLDENDICT} PUBLIC NO_EPWING_SUPPORT)
|
||||
endif ()
|
||||
|
||||
if (WITH_ZIM)
|
||||
target_compile_definitions(${GOLDENDICT} PUBLIC MAKE_ZIM_SUPPORT)
|
||||
endif ()
|
||||
|
||||
if (WITH_VCPKG_BREAKPAD)
|
||||
target_compile_definitions(${GOLDENDICT} PUBLIC USE_BREAKPAD)
|
||||
endif ()
|
||||
$<$<BOOL:${WIN32}>:__WIN32>
|
||||
$<$<BOOL:${WITH_FFMPEG_PLAYER}>:MAKE_FFMPEG_PLAYER>
|
||||
$<$<BOOL:${WITH_TTS}>:TTS_SUPPORT>
|
||||
$<$<BOOL:${WITH_EPWING_SUPPORT}>:EPWING_SUPPORT>
|
||||
$<$<BOOL:${WITH_ZIM}>:MAKE_ZIM_SUPPORT>
|
||||
$<$<BOOL:${WITH_VCPKG_BREAKPAD}>:USE_BREAKPAD>
|
||||
)
|
||||
|
||||
#### libraries linking && includes for Win or Unix
|
||||
|
||||
if (WIN32)
|
||||
include(Deps_Vcpkg)
|
||||
include(cmake/Deps_Vcpkg.cmake)
|
||||
else ()
|
||||
include(Deps_Unix)
|
||||
include(cmake/Deps_Unix.cmake)
|
||||
endif ()
|
||||
|
||||
#### add translations
|
||||
|
@ -261,155 +219,11 @@ add_dependencies(${GOLDENDICT} "release_translations")
|
|||
#### installation or assemble redistribution
|
||||
|
||||
if (APPLE)
|
||||
set(PLIST_FILE "${CMAKE_BINARY_DIR}/info_generated.plist")
|
||||
configure_file("${CMAKE_SOURCE_DIR}/redist/mac_info_plist_template_cmake.plist" "${PLIST_FILE}" @ONLY)
|
||||
|
||||
set_target_properties(${GOLDENDICT} PROPERTIES
|
||||
MACOSX_BUNDLE TRUE
|
||||
MACOSX_BUNDLE_INFO_PLIST "${PLIST_FILE}"
|
||||
)
|
||||
|
||||
set(Assembling_Dir "${CMAKE_BINARY_DIR}/redist")
|
||||
set(App_Name "${GOLDENDICT}.app")
|
||||
set(Redistributable_APP "${Assembling_Dir}/${App_Name}")
|
||||
|
||||
# if anything wrong, delete this and affect lines, and see what's Qt will generate by default.
|
||||
set(QtConfPath "${Redistributable_APP}/Contents/Resources/qt.conf")
|
||||
|
||||
qt_generate_deploy_script(
|
||||
TARGET ${GOLDENDICT}
|
||||
OUTPUT_SCRIPT deploy_script
|
||||
CONTENT "
|
||||
set(QT_DEPLOY_PREFIX \"${Redistributable_APP}\")
|
||||
set(QT_DEPLOY_TRANSLATIONS_DIR \"Contents/Resources/translations\")
|
||||
qt_deploy_runtime_dependencies(
|
||||
EXECUTABLE \"${Redistributable_APP}\"
|
||||
GENERATE_QT_CONF
|
||||
NO_APP_STORE_COMPLIANCE)
|
||||
qt_deploy_translations()
|
||||
qt_deploy_qt_conf(\"${QtConfPath}\"
|
||||
PLUGINS_DIR PlugIns
|
||||
TRANSLATIONS_DIR Resources/translations)
|
||||
"
|
||||
)
|
||||
|
||||
install(TARGETS ${GOLDENDICT} BUNDLE DESTINATION "${Assembling_Dir}")
|
||||
install(FILES ${qm_files} DESTINATION "${Redistributable_APP}/Contents/MacOS/locale")
|
||||
|
||||
if (IS_READABLE "/opt/homebrew/share/opencc/")
|
||||
set(OPENCC_DATA_PATH "/opt/homebrew/share/opencc/" CACHE PATH "opencc's data path")
|
||||
elseif (IS_READABLE "/usr/local/share/opencc/")
|
||||
set(OPENCC_DATA_PATH "/usr/local/share/opencc/" CACHE PATH "opencc's data path")
|
||||
else ()
|
||||
message(FATAL_ERROR "Cannot find opencc's data folder!")
|
||||
endif ()
|
||||
|
||||
file(REAL_PATH "${OPENCC_DATA_PATH}" OPENCC_DATA_PATH_FOR_REAL)
|
||||
|
||||
message(STATUS "OPENCC data is found -> ${OPENCC_DATA_PATH_FOR_REAL}")
|
||||
install(DIRECTORY "${OPENCC_DATA_PATH_FOR_REAL}" DESTINATION "${Redistributable_APP}/Contents/MacOS")
|
||||
|
||||
install(SCRIPT ${deploy_script})
|
||||
|
||||
install(CODE "execute_process(COMMAND codesign --force --deep -s - ${Redistributable_APP})")
|
||||
|
||||
find_program(CREATE-DMG "create-dmg")
|
||||
if (CREATE-DMG)
|
||||
install(CODE "
|
||||
execute_process(COMMAND ${CREATE-DMG} \
|
||||
--skip-jenkins \
|
||||
--format \"ULMO\"
|
||||
--volname ${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${CMAKE_SYSTEM_PROCESSOR} \
|
||||
--volicon ${CMAKE_SOURCE_DIR}/icons/macicon.icns \
|
||||
--icon \"${App_Name}\" 100 100
|
||||
--app-drop-link 300 100 \
|
||||
\"GoldenDict-ng-${CMAKE_PROJECT_VERSION}-Qt${Qt6_VERSION}-macOS-${CMAKE_SYSTEM_PROCESSOR}.dmg\" \
|
||||
\"${Assembling_Dir}\")"
|
||||
)
|
||||
else ()
|
||||
message(WARNING "create-dmg not found. No .dmg will be created")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
if (LINUX OR BSD)
|
||||
install(TARGETS ${GOLDENDICT})
|
||||
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop DESTINATION share/applications)
|
||||
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.metainfo.xml DESTINATION share/metainfo)
|
||||
|
||||
if (NOT USE_ALTERNATIVE_NAME)
|
||||
# see: config.cc -> getProgramDataDir
|
||||
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict")
|
||||
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps)
|
||||
install(FILES ${qm_files} DESTINATION share/goldendict/locale)
|
||||
else ()
|
||||
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict-ng")
|
||||
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps
|
||||
RENAME goldendict-ng.png)
|
||||
install(FILES ${qm_files} DESTINATION share/goldendict-ng/locale)
|
||||
|
||||
block() # patch the desktop file to adapt the binary & icon file's name change
|
||||
file(READ "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" DESKTOP_FILE_CONTENT)
|
||||
string(REGEX REPLACE "\nIcon=goldendict\n" "\nIcon=goldendict-ng\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
|
||||
string(REGEX REPLACE "\nExec=goldendict %u\n" "\nExec=goldendict-ng %u\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
|
||||
file(WRITE "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" "${DESKTOP_FILE_CONTENT}")
|
||||
endblock()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (WIN32)
|
||||
|
||||
set_target_properties(${GOLDENDICT}
|
||||
PROPERTIES
|
||||
WIN32_EXECUTABLE TRUE
|
||||
RUNTIME_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
|
||||
)
|
||||
|
||||
set(CMAKE_INSTALL_PREFIX "${GD_WIN_OUTPUT_DIR}" CACHE PATH "If you see this message, don't change this unless you want look into CMake build script. If you are an expert, yes, this is wrong. Help welcomed." FORCE)
|
||||
|
||||
qt_generate_deploy_script(
|
||||
TARGET ${GOLDENDICT}
|
||||
OUTPUT_SCRIPT deploy_script
|
||||
CONTENT "qt_deploy_runtime_dependencies(
|
||||
EXECUTABLE \"${CMAKE_INSTALL_PREFIX}/goldendict.exe\"
|
||||
BIN_DIR .
|
||||
LIB_DIR .
|
||||
)"
|
||||
)
|
||||
|
||||
install(SCRIPT ${deploy_script})
|
||||
install(DIRECTORY "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/share/opencc" DESTINATION .)
|
||||
# TODO: do we really need to carry a copy of openSSL?
|
||||
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libssl-3-x64.dll" DESTINATION .)
|
||||
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libcrypto-3-x64.dll" DESTINATION .)
|
||||
|
||||
# trick CPack to make the output folder as NSIS installer
|
||||
install(DIRECTORY "${GD_WIN_OUTPUT_DIR}/"
|
||||
DESTINATION .
|
||||
FILES_MATCHING
|
||||
PATTERN "*"
|
||||
PATTERN "*.pdb" EXCLUDE
|
||||
PATTERN "*.ilk" EXCLUDE)
|
||||
|
||||
|
||||
set(CPACK_PACKAGE_FILE_NAME "GoldenDict-ng-${PROJECT_VERSION}-Qt${Qt6Widgets_VERSION}")
|
||||
set(CPACK_GENERATOR "7Z;NSIS64")
|
||||
|
||||
# override the default install path, which is $PROGRAMFILES64\${project-name} ${project-version} in NSIS
|
||||
set(CPACK_PACKAGE_INSTALL_DIRECTORY "GoldenDict-ng")
|
||||
|
||||
# NSIS specificS
|
||||
set(CPACK_NSIS_MANIFEST_DPI_AWARE ON)
|
||||
set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/icons/programicon.ico")
|
||||
set(CPACK_NSIS_PACKAGE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
|
||||
set(CPACK_NSIS_DISPLAY_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
|
||||
set(CPACK_NSIS_URL_INFO_ABOUT [=[https://xiaoyifang.github.io/goldendict-ng/]=])
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
|
||||
set(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\GoldenDict-ng.lnk' '$INSTDIR\\\\${GOLDENDICT}.exe'")
|
||||
set(CPACK_NSIS_DELETE_ICONS_EXTRA "Delete '$SMPROGRAMS\\\\$START_MENU\\\\GoldenDict-ng.lnk'")
|
||||
|
||||
include(CPack)
|
||||
include(cmake/Package_macOS.cmake)
|
||||
elseif (LINUX OR BSD)
|
||||
include(cmake/Package_Linux.cmake)
|
||||
elseif (WIN32)
|
||||
include(cmake/Package_Windows.cmake)
|
||||
endif ()
|
||||
|
||||
feature_summary(WHAT ALL DESCRIPTION "Build configuration:")
|
||||
|
|
|
@ -1,13 +1,7 @@
|
|||
#### Various workarounds
|
||||
|
||||
if (APPLE)
|
||||
# old & new homebrew's include paths
|
||||
target_include_directories(${GOLDENDICT} PRIVATE /usr/local/include /opt/homebrew/include)
|
||||
|
||||
# libzim depends on ICU, but the ICU from homebrew is "key-only", we need to manually prioritize it
|
||||
# See `brew info icu4c` if this no longer works
|
||||
# Note: Remove icu4c@75 if it fails again
|
||||
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:/usr/local/opt/icu4c@75/lib/pkgconfig:/opt/homebrew/opt/icu4c@75/lib/pkgconfig:/usr/local/opt/icu4c/lib/pkgconfig:/opt/homebrew/opt/icu4c/lib/pkgconfig")
|
||||
endif ()
|
||||
|
||||
target_include_directories(${GOLDENDICT} PRIVATE
|
||||
|
@ -34,25 +28,20 @@ endif ()
|
|||
##### Finding packages from package manager
|
||||
|
||||
find_package(PkgConfig REQUIRED)
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(BZip2 REQUIRED)
|
||||
|
||||
# Consider all PkgConfig dependencies as one
|
||||
pkg_check_modules(PKGCONFIG_DEPS IMPORTED_TARGET
|
||||
# Import all PkgConfig dependencies as one
|
||||
pkg_check_modules(DEPS REQUIRED IMPORTED_TARGET
|
||||
hunspell
|
||||
liblzma
|
||||
lzo2
|
||||
opencc
|
||||
vorbis # .ogg
|
||||
vorbisfile
|
||||
liblzma
|
||||
xapian-core
|
||||
zlib
|
||||
)
|
||||
|
||||
target_link_libraries(${GOLDENDICT} PRIVATE
|
||||
PkgConfig::PKGCONFIG_DEPS
|
||||
BZip2::BZip2
|
||||
ZLIB::ZLIB
|
||||
)
|
||||
target_link_libraries(${GOLDENDICT} PRIVATE PkgConfig::DEPS BZip2::BZip2)
|
||||
|
||||
# On FreeBSD, there are two iconv, libc iconv & GNU libiconv.
|
||||
# The system one is good enough, the following is a workaround to use libc iconv on freeBSD.
|
||||
|
@ -82,14 +71,27 @@ if (WITH_EPWING_SUPPORT)
|
|||
endif ()
|
||||
|
||||
if (WITH_ZIM)
|
||||
if (APPLE)
|
||||
# ICU from homebrew is "key-only", we need to manually prioritize it -> see `brew info icu4c`
|
||||
# And we needs to find the correct one if multiple versions co exists.
|
||||
set(ENV{PATH} "$ENV{PATH}:/usr/local/bin/:/opt/homebrew/bin") # add brew command into PATH
|
||||
execute_process(
|
||||
COMMAND sh -c [=[brew --prefix $(brew deps libzim | grep icu4c)]=]
|
||||
OUTPUT_VARIABLE ICU_REQUIRED_BY_ZIM_PREFIX
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
COMMAND_ERROR_IS_FATAL ANY)
|
||||
message(STATUS "Found correct homebrew icu path -> ${ICU_REQUIRED_BY_ZIM_PREFIX}")
|
||||
set(ENV{PKG_CONFIG_PATH} "$ENV{PKG_CONFIG_PATH}:${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/pkgconfig")
|
||||
message(STATUS "Updated pkg_config_path -> $ENV{PKG_CONFIG_PATH}")
|
||||
|
||||
# icu4c as transitive dependency of libzim may not be automatically copied into app bundle
|
||||
# so we manually discover the icu4c from homebrew, then find the relevent dylibs
|
||||
set(BREW_ICU_ADDITIONAL_DYLIBS "${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/libicudata.dylib ${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/libicui18n.dylib ${ICU_REQUIRED_BY_ZIM_PREFIX}/lib/libicuuc.dylib")
|
||||
message(STATUS "Additional ICU `.dylib`s -> ${BREW_ICU_ADDITIONAL_DYLIBS}")
|
||||
endif ()
|
||||
|
||||
pkg_check_modules(ZIM REQUIRED IMPORTED_TARGET libzim)
|
||||
target_link_libraries(${GOLDENDICT} PRIVATE PkgConfig::ZIM)
|
||||
if (APPLE)
|
||||
# For some reason, icu4c as transitive dependency of libzim may not be copied into app bundle,
|
||||
# so we directly depends on it to help macdeployqt or whatever
|
||||
pkg_check_modules(BREW_ICU_FOR_LIBZIM_FORCE_LINK REQUIRED IMPORTED_TARGET icu-i18n icu-uc)
|
||||
target_link_libraries(${GOLDENDICT} PUBLIC PkgConfig::BREW_ICU_FOR_LIBZIM_FORCE_LINK)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_SYSTEM_FMT)
|
||||
|
|
22
cmake/Package_Linux.cmake
Normal file
22
cmake/Package_Linux.cmake
Normal file
|
@ -0,0 +1,22 @@
|
|||
install(TARGETS ${GOLDENDICT})
|
||||
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop DESTINATION share/applications)
|
||||
install(FILES ${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.metainfo.xml DESTINATION share/metainfo)
|
||||
|
||||
if (NOT USE_ALTERNATIVE_NAME)
|
||||
# see: config.cc -> getProgramDataDir
|
||||
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict")
|
||||
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps)
|
||||
install(FILES ${qm_files} DESTINATION share/goldendict/locale)
|
||||
else ()
|
||||
add_compile_definitions(PROGRAM_DATA_DIR="${CMAKE_INSTALL_PREFIX}/share/goldendict-ng")
|
||||
install(FILES ${CMAKE_SOURCE_DIR}/redist/icons/goldendict.png DESTINATION share/pixmaps
|
||||
RENAME goldendict-ng.png)
|
||||
install(FILES ${qm_files} DESTINATION share/goldendict-ng/locale)
|
||||
|
||||
block() # patch the desktop file to adapt the binary & icon file's name change
|
||||
file(READ "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" DESKTOP_FILE_CONTENT)
|
||||
string(REGEX REPLACE "\nIcon=goldendict\n" "\nIcon=goldendict-ng\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
|
||||
string(REGEX REPLACE "\nExec=goldendict %u\n" "\nExec=goldendict-ng %u\n" DESKTOP_FILE_CONTENT "${DESKTOP_FILE_CONTENT}")
|
||||
file(WRITE "${CMAKE_SOURCE_DIR}/redist/io.github.xiaoyifang.goldendict_ng.desktop" "${DESKTOP_FILE_CONTENT}")
|
||||
endblock()
|
||||
endif ()
|
55
cmake/Package_Windows.cmake
Normal file
55
cmake/Package_Windows.cmake
Normal file
|
@ -0,0 +1,55 @@
|
|||
set_target_properties(${GOLDENDICT}
|
||||
PROPERTIES
|
||||
WIN32_EXECUTABLE TRUE
|
||||
RUNTIME_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${GD_WIN_OUTPUT_DIR}"
|
||||
)
|
||||
|
||||
# TODO: this breaks "Multi-Config" build systems like VisualStudio.
|
||||
set(CMAKE_INSTALL_PREFIX "${GD_WIN_OUTPUT_DIR}" CACHE PATH "If you see this message, don't change this unless you want look into CMake build script. If you are an expert, yes, this is wrong. Help welcomed." FORCE)
|
||||
|
||||
qt_generate_deploy_script(
|
||||
TARGET ${GOLDENDICT}
|
||||
OUTPUT_SCRIPT deploy_script
|
||||
CONTENT "qt_deploy_runtime_dependencies(
|
||||
EXECUTABLE \"${CMAKE_INSTALL_PREFIX}/goldendict.exe\"
|
||||
BIN_DIR .
|
||||
LIB_DIR .
|
||||
)"
|
||||
)
|
||||
|
||||
install(SCRIPT ${deploy_script})
|
||||
install(DIRECTORY "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/share/opencc" DESTINATION .)
|
||||
# Note: This is runtime dependency that aren't copied automatically
|
||||
# See Qt's network -> SSDL documentation https://doc.qt.io/qt-6/ssl.html#considerations-while-packaging-your-application
|
||||
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libssl-3-x64.dll" DESTINATION .)
|
||||
install(FILES "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/bin/libcrypto-3-x64.dll" DESTINATION .)
|
||||
|
||||
# trick CPack to make the output folder as NSIS installer
|
||||
install(DIRECTORY "${GD_WIN_OUTPUT_DIR}/"
|
||||
DESTINATION .
|
||||
FILES_MATCHING
|
||||
PATTERN "*"
|
||||
PATTERN "*.pdb" EXCLUDE
|
||||
PATTERN "*.ilk" EXCLUDE)
|
||||
|
||||
|
||||
set(CPACK_PACKAGE_FILE_NAME "GoldenDict-ng-${PROJECT_VERSION}-Qt${Qt6Widgets_VERSION}")
|
||||
set(CPACK_GENERATOR "7Z;NSIS64")
|
||||
|
||||
# override the default install path, which is $PROGRAMFILES64\${project-name} ${project-version} in NSIS
|
||||
set(CPACK_PACKAGE_INSTALL_DIRECTORY "GoldenDict-ng")
|
||||
|
||||
# NSIS specificS
|
||||
set(CPACK_NSIS_MANIFEST_DPI_AWARE ON)
|
||||
set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/icons/programicon.ico")
|
||||
set(CPACK_NSIS_PACKAGE_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
|
||||
set(CPACK_NSIS_DISPLAY_NAME "${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}")
|
||||
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt")
|
||||
|
||||
# Copied from https://crascit.com/2015/08/07/cmake_cpack_nsis_shortcuts_with_parameters/
|
||||
set(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\GoldenDict-ng.lnk' '$INSTDIR\\\\${GOLDENDICT}.exe'")
|
||||
set(CPACK_NSIS_DELETE_ICONS_EXTRA "Delete '$SMPROGRAMS\\\\$START_MENU\\\\GoldenDict-ng.lnk'")
|
||||
|
||||
include(CPack)
|
69
cmake/Package_macOS.cmake
Normal file
69
cmake/Package_macOS.cmake
Normal file
|
@ -0,0 +1,69 @@
|
|||
set(PLIST_FILE "${CMAKE_BINARY_DIR}/info_generated.plist")
|
||||
configure_file("${CMAKE_SOURCE_DIR}/redist/mac_info_plist_template_cmake.plist" "${PLIST_FILE}" @ONLY)
|
||||
|
||||
set_target_properties(${GOLDENDICT} PROPERTIES
|
||||
MACOSX_BUNDLE TRUE
|
||||
MACOSX_BUNDLE_INFO_PLIST "${PLIST_FILE}"
|
||||
)
|
||||
|
||||
set(Assembling_Dir "${CMAKE_BINARY_DIR}/redist")
|
||||
set(App_Name "${GOLDENDICT}.app")
|
||||
set(Redistributable_APP "${Assembling_Dir}/${App_Name}")
|
||||
|
||||
# if anything wrong, delete this and affect lines, and see what's Qt will generate by default.
|
||||
set(QtConfPath "${Redistributable_APP}/Contents/Resources/qt.conf")
|
||||
|
||||
qt_generate_deploy_script(
|
||||
TARGET ${GOLDENDICT}
|
||||
OUTPUT_SCRIPT deploy_script
|
||||
CONTENT "
|
||||
set(QT_DEPLOY_PREFIX \"${Redistributable_APP}\")
|
||||
set(QT_DEPLOY_TRANSLATIONS_DIR \"Contents/Resources/translations\")
|
||||
qt_deploy_runtime_dependencies(
|
||||
EXECUTABLE \"${Redistributable_APP}\"
|
||||
ADDITIONAL_LIBRARIES ${BREW_ICU_ADDITIONAL_DYLIBS}
|
||||
GENERATE_QT_CONF
|
||||
NO_APP_STORE_COMPLIANCE)
|
||||
qt_deploy_translations()
|
||||
qt_deploy_qt_conf(\"${QtConfPath}\"
|
||||
PLUGINS_DIR PlugIns
|
||||
TRANSLATIONS_DIR Resources/translations)
|
||||
"
|
||||
)
|
||||
|
||||
install(TARGETS ${GOLDENDICT} BUNDLE DESTINATION "${Assembling_Dir}")
|
||||
install(FILES ${qm_files} DESTINATION "${Redistributable_APP}/Contents/MacOS/locale")
|
||||
|
||||
if (IS_READABLE "/opt/homebrew/share/opencc/")
|
||||
set(OPENCC_DATA_PATH "/opt/homebrew/share/opencc/" CACHE PATH "opencc's data path")
|
||||
elseif (IS_READABLE "/usr/local/share/opencc/")
|
||||
set(OPENCC_DATA_PATH "/usr/local/share/opencc/" CACHE PATH "opencc's data path")
|
||||
else ()
|
||||
message(FATAL_ERROR "Cannot find opencc's data folder!")
|
||||
endif ()
|
||||
|
||||
file(REAL_PATH "${OPENCC_DATA_PATH}" OPENCC_DATA_PATH_FOR_REAL)
|
||||
|
||||
message(STATUS "OPENCC data is found -> ${OPENCC_DATA_PATH_FOR_REAL}")
|
||||
install(DIRECTORY "${OPENCC_DATA_PATH_FOR_REAL}" DESTINATION "${Redistributable_APP}/Contents/MacOS")
|
||||
|
||||
install(SCRIPT ${deploy_script})
|
||||
|
||||
install(CODE "execute_process(COMMAND codesign --force --deep -s - ${Redistributable_APP})")
|
||||
|
||||
find_program(CREATE-DMG "create-dmg")
|
||||
if (CREATE-DMG)
|
||||
install(CODE "
|
||||
execute_process(COMMAND ${CREATE-DMG} \
|
||||
--skip-jenkins \
|
||||
--format \"ULMO\"
|
||||
--volname ${CMAKE_PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${CMAKE_SYSTEM_PROCESSOR} \
|
||||
--volicon ${CMAKE_SOURCE_DIR}/icons/macicon.icns \
|
||||
--icon \"${App_Name}\" 100 100
|
||||
--app-drop-link 300 100 \
|
||||
\"GoldenDict-ng-${CMAKE_PROJECT_VERSION}-Qt${Qt6_VERSION}-macOS-${CMAKE_SYSTEM_PROCESSOR}.dmg\" \
|
||||
\"${Assembling_Dir}\")"
|
||||
)
|
||||
else ()
|
||||
message(WARNING "create-dmg not found. No .dmg will be created")
|
||||
endif ()
|
|
@ -4,13 +4,11 @@
|
|||
#include "article_maker.hh"
|
||||
#include "config.hh"
|
||||
#include "folding.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "globalbroadcaster.hh"
|
||||
#include "globalregex.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "utils.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include <QDir>
|
||||
#include <QFile>
|
||||
#include <QTextDocumentFragment>
|
||||
|
@ -22,7 +20,6 @@
|
|||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using std::set;
|
||||
using std::list;
|
||||
|
||||
|
@ -136,19 +133,6 @@ std::string ArticleMaker::makeHtmlHeader( QString const & word, QString const &
|
|||
R"(<link rel="icon" type="image/png" href="qrc:///flags/)" + Html::escape( icon.toUtf8().data() ) + "\" >\n";
|
||||
}
|
||||
|
||||
result += QString::fromUtf8( R"(
|
||||
<script>
|
||||
function tr(key) {
|
||||
var tr_map = {
|
||||
"Expand article": "%1", "Collapse article": "%2"
|
||||
};
|
||||
return tr_map[key] || '';
|
||||
}
|
||||
</script>
|
||||
)" )
|
||||
.arg( tr( "Expand article" ), tr( "Collapse article" ) )
|
||||
.toStdString();
|
||||
|
||||
result += R"(<script src="qrc:///scripts/gd-builtin.js"></script>)";
|
||||
result += R"(<script src="qrc:///scripts/mark.min.js"></script>)";
|
||||
|
||||
|
@ -162,7 +146,10 @@ std::string ArticleMaker::makeHtmlHeader( QString const & word, QString const &
|
|||
|
||||
#if QT_VERSION >= QT_VERSION_CHECK( 6, 5, 0 )
|
||||
if ( GlobalBroadcaster::instance()->getPreference()->darkReaderMode == Config::Dark::Auto
|
||||
&& QGuiApplication::styleHints()->colorScheme() == Qt::ColorScheme::Dark ) {
|
||||
#if !defined( Q_OS_WINDOWS ) // not properly works on Windows.
|
||||
&& QGuiApplication::styleHints()->colorScheme() == Qt::ColorScheme::Dark
|
||||
#endif
|
||||
&& GlobalBroadcaster::instance()->getPreference()->darkMode == Config::Dark::On ) {
|
||||
darkReaderModeEnabled = true;
|
||||
}
|
||||
#endif
|
||||
|
@ -482,7 +469,7 @@ ArticleRequest::ArticleRequest( QString const & word,
|
|||
|
||||
// Accumulate main forms
|
||||
for ( const auto & activeDict : activeDicts ) {
|
||||
auto const s = activeDict->findHeadwordsForSynonym( gd::removeTrailingZero( word ) );
|
||||
auto const s = activeDict->findHeadwordsForSynonym( Text::removeTrailingZero( word ) );
|
||||
|
||||
connect( s.get(), &Dictionary::Request::finished, this, &ArticleRequest::altSearchFinished, Qt::QueuedConnection );
|
||||
|
||||
|
@ -519,9 +506,9 @@ void ArticleRequest::altSearchFinished()
|
|||
|
||||
altsDone = true; // So any pending signals in queued mode won't mess us up
|
||||
|
||||
vector< wstring > altsVector( alts.begin(), alts.end() );
|
||||
vector< std::u32string > altsVector( alts.begin(), alts.end() );
|
||||
|
||||
wstring wordStd = word.toStdU32String();
|
||||
std::u32string wordStd = word.toStdU32String();
|
||||
|
||||
if ( activeDicts.size() <= 1 ) {
|
||||
articleSizeLimit = -1; // Don't collapse article if only one dictionary presented
|
||||
|
@ -532,7 +519,7 @@ void ArticleRequest::altSearchFinished()
|
|||
sptr< Dictionary::DataRequest > r = activeDict->getArticle(
|
||||
wordStd,
|
||||
altsVector,
|
||||
gd::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
|
||||
Text::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
|
||||
ignoreDiacritics );
|
||||
|
||||
connect( r.get(), &Dictionary::Request::finished, this, &ArticleRequest::bodyFinished, Qt::QueuedConnection );
|
||||
|
@ -540,7 +527,7 @@ void ArticleRequest::altSearchFinished()
|
|||
bodyRequests.push_back( r );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "getArticle request error (%s) in \"%s\"\n", e.what(), activeDict->getName().c_str() );
|
||||
qWarning( "getArticle request error (%s) in \"%s\"", e.what(), activeDict->getName().c_str() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -618,7 +605,7 @@ void ArticleRequest::bodyFinished()
|
|||
return;
|
||||
}
|
||||
|
||||
GD_DPRINTF( "some body finished" );
|
||||
qDebug( "some body finished" );
|
||||
|
||||
bool wasUpdated = false;
|
||||
|
||||
|
@ -628,7 +615,7 @@ void ArticleRequest::bodyFinished()
|
|||
if ( bodyRequests.front()->isFinished() ) {
|
||||
// Good
|
||||
|
||||
GD_DPRINTF( "one finished." );
|
||||
qDebug( "one finished." );
|
||||
|
||||
Dictionary::DataRequest & req = *bodyRequests.front();
|
||||
|
||||
|
@ -676,11 +663,11 @@ void ArticleRequest::bodyFinished()
|
|||
</div>)" ),
|
||||
dictId,
|
||||
collapse ? R"(style="cursor:pointer;")" : "",
|
||||
collapse ? tr( "Expand article" ).toStdString() : "",
|
||||
"",
|
||||
Html::escape( tr( "From " ).toStdString() ),
|
||||
Html::escape( activeDict->getName() ),
|
||||
collapse ? "gdexpandicon" : "gdcollapseicon",
|
||||
collapse ? "" : tr( "Collapse article" ).toStdString() );
|
||||
"" );
|
||||
|
||||
head += R"(<div class="gddictnamebodyseparator"></div>)";
|
||||
|
||||
|
@ -718,7 +705,7 @@ void ArticleRequest::bodyFinished()
|
|||
}
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "getDataSlice error: %s\n", e.what() );
|
||||
qWarning( "getDataSlice error: %s", e.what() );
|
||||
}
|
||||
|
||||
wasUpdated = true;
|
||||
|
@ -728,12 +715,12 @@ void ArticleRequest::bodyFinished()
|
|||
//signal finished dictionary for pronounciation
|
||||
GlobalBroadcaster::instance()->pronounce_engine.finishDictionary( dictId );
|
||||
}
|
||||
GD_DPRINTF( "erasing.." );
|
||||
qDebug( "erasing.." );
|
||||
bodyRequests.pop_front();
|
||||
GD_DPRINTF( "erase done.." );
|
||||
qDebug( "erase done.." );
|
||||
}
|
||||
else {
|
||||
GD_DPRINTF( "one not finished." );
|
||||
qDebug( "one not finished." );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -976,7 +963,7 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
|
|||
|
||||
// Look it up
|
||||
|
||||
// GD_DPRINTF( "Looking up %s\n", qPrintable( currentSplittedWordCompound ) );
|
||||
// qDebug( "Looking up %s", qPrintable( currentSplittedWordCompound ) );
|
||||
|
||||
stemmedWordFinder->expressionMatch( currentSplittedWordCompound,
|
||||
activeDicts,
|
||||
|
@ -1006,7 +993,7 @@ void ArticleRequest::individualWordFinished()
|
|||
WordFinder::SearchResults const & results = stemmedWordFinder->getResults();
|
||||
|
||||
if ( results.size() ) {
|
||||
wstring source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );
|
||||
std::u32string source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );
|
||||
|
||||
bool hadSomething = false;
|
||||
|
||||
|
@ -1020,7 +1007,7 @@ void ArticleRequest::individualWordFinished()
|
|||
|
||||
// Prefix match found. Check if the aliases are acceptable.
|
||||
|
||||
wstring result( Folding::applySimpleCaseOnly( results[ x ].first ) );
|
||||
std::u32string result( Folding::applySimpleCaseOnly( results[ x ].first ) );
|
||||
|
||||
if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 ) {
|
||||
// The resulting string begins with the source one
|
||||
|
|
|
@ -88,7 +88,7 @@ class ArticleRequest: public Dictionary::DataRequest
|
|||
QMap< QString, QString > contexts;
|
||||
std::vector< sptr< Dictionary::Class > > activeDicts;
|
||||
|
||||
std::set< gd::wstring, std::less<> > alts; // Accumulated main forms
|
||||
std::set< std::u32string, std::less<> > alts; // Accumulated main forms
|
||||
std::list< sptr< Dictionary::WordSearchRequest > > altSearches;
|
||||
std::list< sptr< Dictionary::DataRequest > > bodyRequests;
|
||||
bool altsDone{ false };
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#include <stdint.h>
|
||||
#include <QUrl>
|
||||
#include "article_netmgr.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "utils.hh"
|
||||
#include <QNetworkAccessManager>
|
||||
#include "globalbroadcaster.hh"
|
||||
|
@ -93,7 +92,7 @@ QNetworkReply * ArticleNetworkAccessManager::getArticleReply( QNetworkRequest co
|
|||
//if not external url,can be blocked from here. no need to continue execute the following code.
|
||||
//such as bres://upload.wikimedia.... etc .
|
||||
if ( !Utils::isExternalLink( url ) ) {
|
||||
gdWarning( R"(Blocking element "%s" as built-in link )", req.url().toEncoded().data() );
|
||||
qWarning( R"(Blocking element "%s" as built-in link )", req.url().toEncoded().data() );
|
||||
return new BlockedNetworkReply( this );
|
||||
}
|
||||
|
||||
|
@ -108,7 +107,7 @@ QNetworkReply * ArticleNetworkAccessManager::getArticleReply( QNetworkRequest co
|
|||
if ( !url.host().endsWith( refererUrl.host() )
|
||||
&& Utils::Url::getHostBaseFromUrl( url ) != Utils::Url::getHostBaseFromUrl( refererUrl )
|
||||
&& !url.scheme().startsWith( "data" ) ) {
|
||||
gdWarning( R"(Blocking element "%s" due to not same domain)", url.toEncoded().data() );
|
||||
qWarning( R"(Blocking element "%s" due to not same domain)", url.toEncoded().data() );
|
||||
|
||||
return new BlockedNetworkReply( this );
|
||||
}
|
||||
|
@ -238,7 +237,7 @@ sptr< Dictionary::DataRequest > ArticleNetworkAccessManager::getResource( QUrl c
|
|||
return dictionary->getResource( Utils::Url::path( url ).mid( 1 ).toUtf8().data() );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "getResource request error (%s) in \"%s\"\n", e.what(), dictionary->getName().c_str() );
|
||||
qWarning( "getResource request error (%s) in \"%s\"", e.what(), dictionary->getName().c_str() );
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
@ -284,7 +283,7 @@ ArticleResourceReply::ArticleResourceReply( QObject * parent,
|
|||
|
||||
if ( req->isFinished() ) {
|
||||
emit finishedSignal();
|
||||
GD_DPRINTF( "In-place finish.\n" );
|
||||
qDebug( "In-place finish." );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -353,11 +352,11 @@ qint64 ArticleResourceReply::readData( char * out, qint64 maxSize )
|
|||
return 0;
|
||||
}
|
||||
|
||||
GD_DPRINTF( "====reading %lld of (%lld) bytes, %lld bytes readed . Finish status: %d",
|
||||
toRead,
|
||||
avail,
|
||||
alreadyRead,
|
||||
finished );
|
||||
qDebug( "====reading %lld of (%lld) bytes, %lld bytes readed . Finish status: %d",
|
||||
toRead,
|
||||
avail,
|
||||
alreadyRead,
|
||||
finished );
|
||||
|
||||
try {
|
||||
req->getDataSlice( alreadyRead, toRead, out );
|
||||
|
|
|
@ -1,3 +1 @@
|
|||
Code to support GD's internal/external audio players.
|
||||
|
||||
Only `audioplayerinterface.hh` is supposed to be used outside this folder.
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#include "audiooutput.hh"
|
||||
|
||||
#include <QAudioFormat>
|
||||
#include <QtConcurrent/qtconcurrentrun.h>
|
||||
#include <QtConcurrentRun>
|
||||
#include <QFuture>
|
||||
#include <QWaitCondition>
|
||||
#include <QCoreApplication>
|
||||
|
|
|
@ -3,40 +3,42 @@
|
|||
|
||||
#include <QScopedPointer>
|
||||
#include <QObject>
|
||||
#include <utility>
|
||||
#include "audioplayerfactory.hh"
|
||||
#include "ffmpegaudioplayer.hh"
|
||||
#include "multimediaaudioplayer.hh"
|
||||
#include "externalaudioplayer.hh"
|
||||
#include "gddebug.hh"
|
||||
|
||||
AudioPlayerFactory::AudioPlayerFactory( Config::Preferences const & p ):
|
||||
useInternalPlayer( p.useInternalPlayer ),
|
||||
internalPlayerBackend( p.internalPlayerBackend ),
|
||||
audioPlaybackProgram( p.audioPlaybackProgram )
|
||||
AudioPlayerFactory::AudioPlayerFactory( bool useInternalPlayer,
|
||||
InternalPlayerBackend internalPlayerBackend,
|
||||
QString audioPlaybackProgram ):
|
||||
useInternalPlayer( useInternalPlayer ),
|
||||
internalPlayerBackend( std::move( internalPlayerBackend ) ),
|
||||
audioPlaybackProgram( std::move( audioPlaybackProgram ) )
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
void AudioPlayerFactory::setPreferences( Config::Preferences const & p )
|
||||
void AudioPlayerFactory::setPreferences( bool new_useInternalPlayer,
|
||||
const InternalPlayerBackend & new_internalPlayerBackend,
|
||||
const QString & new_audioPlaybackProgram )
|
||||
{
|
||||
if ( p.useInternalPlayer != useInternalPlayer ) {
|
||||
useInternalPlayer = p.useInternalPlayer;
|
||||
internalPlayerBackend = p.internalPlayerBackend;
|
||||
audioPlaybackProgram = p.audioPlaybackProgram;
|
||||
if ( useInternalPlayer != new_useInternalPlayer ) {
|
||||
useInternalPlayer = new_useInternalPlayer;
|
||||
internalPlayerBackend = new_internalPlayerBackend;
|
||||
audioPlaybackProgram = new_audioPlaybackProgram;
|
||||
reset();
|
||||
}
|
||||
else if ( useInternalPlayer && p.internalPlayerBackend != internalPlayerBackend ) {
|
||||
internalPlayerBackend = p.internalPlayerBackend;
|
||||
else if ( useInternalPlayer && internalPlayerBackend != new_internalPlayerBackend ) {
|
||||
internalPlayerBackend = new_internalPlayerBackend;
|
||||
reset();
|
||||
}
|
||||
else if ( !useInternalPlayer && p.audioPlaybackProgram != audioPlaybackProgram ) {
|
||||
audioPlaybackProgram = p.audioPlaybackProgram;
|
||||
else if ( !useInternalPlayer && new_audioPlaybackProgram != audioPlaybackProgram ) {
|
||||
audioPlaybackProgram = new_audioPlaybackProgram;
|
||||
ExternalAudioPlayer * const externalPlayer = qobject_cast< ExternalAudioPlayer * >( playerPtr.data() );
|
||||
if ( externalPlayer ) {
|
||||
setAudioPlaybackProgram( *externalPlayer );
|
||||
}
|
||||
else {
|
||||
gdWarning( "External player was expected, but it does not exist.\n" );
|
||||
qWarning( "External player was expected, but it does not exist." );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -44,29 +46,8 @@ void AudioPlayerFactory::setPreferences( Config::Preferences const & p )
|
|||
void AudioPlayerFactory::reset()
|
||||
{
|
||||
if ( useInternalPlayer ) {
|
||||
// qobject_cast checks below account for the case when an unsupported backend
|
||||
// is stored in config. After this backend is replaced with the default one
|
||||
// upon preferences saving, the code below does not reset playerPtr with
|
||||
// another object of the same type.
|
||||
|
||||
#ifdef MAKE_FFMPEG_PLAYER
|
||||
Q_ASSERT( Config::InternalPlayerBackend::defaultBackend().isFfmpeg()
|
||||
&& "Adjust the code below after changing the default backend." );
|
||||
|
||||
if ( !internalPlayerBackend.isQtmultimedia() ) {
|
||||
if ( !playerPtr || !qobject_cast< Ffmpeg::AudioPlayer * >( playerPtr.data() ) ) {
|
||||
playerPtr.reset( new Ffmpeg::AudioPlayer );
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MAKE_QTMULTIMEDIA_PLAYER
|
||||
if ( !playerPtr || !qobject_cast< MultimediaAudioPlayer * >( playerPtr.data() ) ) {
|
||||
playerPtr.reset( new MultimediaAudioPlayer );
|
||||
}
|
||||
playerPtr.reset( internalPlayerBackend.getActualPlayer() );
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
std::unique_ptr< ExternalAudioPlayer > externalPlayer( new ExternalAudioPlayer );
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "audioplayerinterface.hh"
|
||||
#include "config.hh"
|
||||
#include "internalplayerbackend.hh"
|
||||
|
||||
class ExternalAudioPlayer;
|
||||
|
||||
|
@ -13,8 +13,12 @@ class AudioPlayerFactory
|
|||
Q_DISABLE_COPY( AudioPlayerFactory )
|
||||
|
||||
public:
|
||||
explicit AudioPlayerFactory( Config::Preferences const & );
|
||||
void setPreferences( Config::Preferences const & );
|
||||
explicit AudioPlayerFactory( bool useInternalPlayer,
|
||||
InternalPlayerBackend internalPlayerBackend,
|
||||
QString audioPlaybackProgram );
|
||||
void setPreferences( bool new_useInternalPlayer,
|
||||
const InternalPlayerBackend & new_internalPlayerBackend,
|
||||
const QString & new_audioPlaybackProgram );
|
||||
/// The returned reference to a smart pointer is valid as long as this object
|
||||
/// exists. The pointer to the owned AudioPlayerInterface may change after the
|
||||
/// call to setPreferences(), but it is guaranteed to never be null.
|
||||
|
@ -28,7 +32,7 @@ private:
|
|||
void setAudioPlaybackProgram( ExternalAudioPlayer & externalPlayer );
|
||||
|
||||
bool useInternalPlayer;
|
||||
Config::InternalPlayerBackend internalPlayerBackend;
|
||||
InternalPlayerBackend internalPlayerBackend;
|
||||
QString audioPlaybackProgram;
|
||||
AudioPlayerPtr playerPtr;
|
||||
};
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
#include "audiooutput.hh"
|
||||
#include "ffmpegaudio.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "utils.hh"
|
||||
#include <QAudioDevice>
|
||||
#include <QDataStream>
|
||||
|
@ -88,7 +87,7 @@ static int readAudioData( void * opaque, unsigned char * buffer, int bufferSize
|
|||
// QDataStream::readRawData() returns 0 at EOF => return AVERROR_EOF in this case.
|
||||
// An error is unlikely here, so just print a warning and return AVERROR_EOF too.
|
||||
if ( bytesRead < 0 ) {
|
||||
gdWarning( "readAudioData: error while reading raw data." );
|
||||
qWarning( "readAudioData: error while reading raw data." );
|
||||
}
|
||||
return bytesRead > 0 ? bytesRead : AVERROR_EOF;
|
||||
}
|
||||
|
@ -170,11 +169,11 @@ bool DecoderContext::openCodec( QString & errorString )
|
|||
|
||||
// 61 = FFmpeg 7.0 -> https://github.com/FFmpeg/FFmpeg/blob/release/7.0/libavcodec/version_major.h
|
||||
#if LIBAVCODEC_VERSION_MAJOR >= 61
|
||||
gdDebug( "Codec open: %s: channels: %d, rate: %d, format: %s\n",
|
||||
codec_->long_name,
|
||||
codecContext_->ch_layout.nb_channels,
|
||||
codecContext_->sample_rate,
|
||||
av_get_sample_fmt_name( codecContext_->sample_fmt ) );
|
||||
qDebug( "Codec open: %s: channels: %d, rate: %d, format: %s",
|
||||
codec_->long_name,
|
||||
codecContext_->ch_layout.nb_channels,
|
||||
codecContext_->sample_rate,
|
||||
av_get_sample_fmt_name( codecContext_->sample_fmt ) );
|
||||
|
||||
if ( !av_channel_layout_check( &codecContext_->ch_layout ) ) {
|
||||
av_channel_layout_default( &codecContext_->ch_layout, codecContext_->ch_layout.nb_channels );
|
||||
|
@ -193,11 +192,11 @@ bool DecoderContext::openCodec( QString & errorString )
|
|||
qDebug() << "swr_alloc_set_opts2 failed.";
|
||||
}
|
||||
#else
|
||||
gdDebug( "Codec open: %s: channels: %d, rate: %d, format: %s\n",
|
||||
codec_->long_name,
|
||||
codecContext_->channels,
|
||||
codecContext_->sample_rate,
|
||||
av_get_sample_fmt_name( codecContext_->sample_fmt ) );
|
||||
qDebug( "Codec open: %s: channels: %d, rate: %d, format: %s",
|
||||
codec_->long_name,
|
||||
codecContext_->channels,
|
||||
codecContext_->sample_rate,
|
||||
av_get_sample_fmt_name( codecContext_->sample_fmt ) );
|
||||
|
||||
auto layout = codecContext_->channel_layout;
|
||||
if ( !layout ) {
|
||||
|
@ -367,7 +366,7 @@ bool DecoderContext::normalizeAudio( AVFrame * frame, vector< uint8_t > & sample
|
|||
return false;
|
||||
}
|
||||
else {
|
||||
// qDebug( "out_count:%d, out_nb_samples:%d, frame->nb_samples:%d \n", out_count, out_nb_samples, frame->nb_samples );
|
||||
// qDebug( "out_count:%d, out_nb_samples:%d, frame->nb_samples:%d ", out_count, out_nb_samples, frame->nb_samples );
|
||||
}
|
||||
|
||||
int actual_size = av_samples_get_buffer_size( nullptr, dst_channels, out_nb_samples, AV_SAMPLE_FMT_S16, 1 );
|
||||
|
|
54
src/audio/internalplayerbackend.cc
Normal file
54
src/audio/internalplayerbackend.cc
Normal file
|
@ -0,0 +1,54 @@
|
|||
#include "internalplayerbackend.hh"
|
||||
#include "ffmpegaudioplayer.hh"
|
||||
#include "multimediaaudioplayer.hh"
|
||||
|
||||
#ifdef MAKE_FFMPEG_PLAYER
|
||||
constexpr auto ffmpeg = "FFmpeg";
|
||||
#endif
|
||||
|
||||
#ifdef MAKE_QTMULTIMEDIA_PLAYER
|
||||
constexpr auto qtmultimedia = "Qt Multimedia";
|
||||
#endif
|
||||
|
||||
bool InternalPlayerBackend::anyAvailable()
|
||||
{
|
||||
#if defined( MAKE_FFMPEG_PLAYER ) || defined( MAKE_QTMULTIMEDIA_PLAYER )
|
||||
return true;
|
||||
#else
|
||||
static_assert( false, "No audio player backend. Please enable one." );
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
QStringList InternalPlayerBackend::availableBackends()
|
||||
{
|
||||
QStringList result;
|
||||
#ifdef MAKE_QTMULTIMEDIA_PLAYER
|
||||
result.push_back( qtmultimedia );
|
||||
#endif
|
||||
#ifdef MAKE_FFMPEG_PLAYER
|
||||
result.push_back( ffmpeg );
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
AudioPlayerInterface * InternalPlayerBackend::getActualPlayer()
|
||||
{
|
||||
// The one in user's config is not availiable,
|
||||
// fall back to the default one
|
||||
if ( name.isEmpty() || !availableBackends().contains( name ) ) {
|
||||
name = availableBackends().constFirst();
|
||||
}
|
||||
|
||||
#ifdef MAKE_FFMPEG_PLAYER
|
||||
if ( name == ffmpeg ) {
|
||||
return new Ffmpeg::AudioPlayer();
|
||||
};
|
||||
#endif
|
||||
#ifdef MAKE_QTMULTIMEDIA_PLAYER
|
||||
if ( name == qtmultimedia ) {
|
||||
return new MultimediaAudioPlayer();
|
||||
};
|
||||
#endif
|
||||
qCritical( "Impossible situation. If ever reached, fix elsewhere. " );
|
||||
return nullptr;
|
||||
}
|
41
src/audio/internalplayerbackend.hh
Normal file
41
src/audio/internalplayerbackend.hh
Normal file
|
@ -0,0 +1,41 @@
|
|||
#pragma once
|
||||
#include "audioplayerinterface.hh"
|
||||
#include "ffmpegaudioplayer.hh"
|
||||
#include "multimediaaudioplayer.hh"
|
||||
#include <QScopedPointer>
|
||||
#include <QStringList>
|
||||
|
||||
/// Overly engineered dummy/helper/wrapper "backend", which is not, to manage backends.
|
||||
class InternalPlayerBackend
|
||||
{
|
||||
public:
|
||||
/// Returns true if at least one backend is available.
|
||||
static bool anyAvailable();
|
||||
AudioPlayerInterface * getActualPlayer();
|
||||
/// Returns the name list of supported backends.
|
||||
/// The first one willl be the default one
|
||||
static QStringList availableBackends();
|
||||
|
||||
QString const & getName() const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
void setName( QString const & name_ )
|
||||
{
|
||||
name = name_;
|
||||
}
|
||||
|
||||
bool operator==( InternalPlayerBackend const & other ) const
|
||||
{
|
||||
return name == other.name;
|
||||
}
|
||||
|
||||
bool operator!=( InternalPlayerBackend const & other ) const
|
||||
{
|
||||
return !operator==( other );
|
||||
}
|
||||
|
||||
private:
|
||||
QString name;
|
||||
};
|
|
@ -2,7 +2,7 @@
|
|||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "filetype.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include <ctype.h>
|
||||
|
||||
namespace Filetype {
|
||||
|
@ -26,13 +26,13 @@ string simplifyString( string const & str, bool lowercase )
|
|||
|
||||
size_t beginPos = 0;
|
||||
|
||||
while ( beginPos < str.size() && Utf8::isspace( str[ beginPos ] ) ) {
|
||||
while ( beginPos < str.size() && Text::isspace( str[ beginPos ] ) ) {
|
||||
++beginPos;
|
||||
}
|
||||
|
||||
size_t endPos = str.size();
|
||||
|
||||
while ( endPos && Utf8::isspace( str[ endPos - 1 ] ) ) {
|
||||
while ( endPos && Text::isspace( str[ endPos - 1 ] ) ) {
|
||||
--endPos;
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "folding.hh"
|
||||
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "globalregex.hh"
|
||||
#include "inc_case_folding.hh"
|
||||
|
||||
|
@ -13,12 +13,12 @@ namespace Folding {
|
|||
/// caught by the diacritics folding table, but they are only handled there
|
||||
/// when they come with their main characters, not by themselves. The rest
|
||||
/// are caught here.
|
||||
bool isCombiningMark( wchar ch )
|
||||
bool isCombiningMark( char32_t ch )
|
||||
{
|
||||
return QChar::isMark( ch );
|
||||
}
|
||||
|
||||
wstring apply( wstring const & in, bool preserveWildcards )
|
||||
std::u32string apply( std::u32string const & in, bool preserveWildcards )
|
||||
{
|
||||
// remove diacritics (normalization), white space, punt,
|
||||
auto temp = QString::fromStdU32String( in )
|
||||
|
@ -32,7 +32,7 @@ wstring apply( wstring const & in, bool preserveWildcards )
|
|||
// case folding
|
||||
std::u32string caseFolded;
|
||||
caseFolded.reserve( temp.size() );
|
||||
wchar buf[ foldCaseMaxOut ];
|
||||
char32_t buf[ foldCaseMaxOut ];
|
||||
for ( const char32_t ch : temp ) {
|
||||
auto n = foldCase( ch, buf );
|
||||
caseFolded.append( buf, n );
|
||||
|
@ -40,11 +40,11 @@ wstring apply( wstring const & in, bool preserveWildcards )
|
|||
return caseFolded;
|
||||
}
|
||||
|
||||
wstring applySimpleCaseOnly( wstring const & in )
|
||||
std::u32string applySimpleCaseOnly( std::u32string const & in )
|
||||
{
|
||||
wchar const * nextChar = in.data();
|
||||
char32_t const * nextChar = in.data();
|
||||
|
||||
wstring out;
|
||||
std::u32string out;
|
||||
|
||||
out.reserve( in.size() );
|
||||
|
||||
|
@ -55,27 +55,27 @@ wstring applySimpleCaseOnly( wstring const & in )
|
|||
return out;
|
||||
}
|
||||
|
||||
wstring applySimpleCaseOnly( QString const & in )
|
||||
std::u32string applySimpleCaseOnly( QString const & in )
|
||||
{
|
||||
//qt only support simple case folding.
|
||||
return in.toCaseFolded().toStdU32String();
|
||||
}
|
||||
|
||||
wstring applySimpleCaseOnly( std::string const & in )
|
||||
std::u32string applySimpleCaseOnly( std::string const & in )
|
||||
{
|
||||
return applySimpleCaseOnly( Utf8::decode( in ) );
|
||||
return applySimpleCaseOnly( Text::toUtf32( in ) );
|
||||
// return QString::fromStdString( in ).toCaseFolded().toStdU32String();
|
||||
}
|
||||
|
||||
wstring applyFullCaseOnly( wstring const & in )
|
||||
std::u32string applyFullCaseOnly( std::u32string const & in )
|
||||
{
|
||||
wstring caseFolded;
|
||||
std::u32string caseFolded;
|
||||
|
||||
caseFolded.reserve( in.size() * foldCaseMaxOut );
|
||||
|
||||
wchar const * nextChar = in.data();
|
||||
char32_t const * nextChar = in.data();
|
||||
|
||||
wchar buf[ foldCaseMaxOut ];
|
||||
char32_t buf[ foldCaseMaxOut ];
|
||||
|
||||
for ( size_t left = in.size(); left--; ) {
|
||||
caseFolded.append( buf, foldCase( *nextChar++, buf ) );
|
||||
|
@ -84,17 +84,17 @@ wstring applyFullCaseOnly( wstring const & in )
|
|||
return caseFolded;
|
||||
}
|
||||
|
||||
wstring applyDiacriticsOnly( wstring const & in )
|
||||
std::u32string applyDiacriticsOnly( std::u32string const & in )
|
||||
{
|
||||
auto noAccent = QString::fromStdU32String( in ).normalized( QString::NormalizationForm_KD ).remove( RX::accentMark );
|
||||
return noAccent.toStdU32String();
|
||||
}
|
||||
|
||||
wstring applyPunctOnly( wstring const & in )
|
||||
std::u32string applyPunctOnly( std::u32string const & in )
|
||||
{
|
||||
wchar const * nextChar = in.data();
|
||||
char32_t const * nextChar = in.data();
|
||||
|
||||
wstring out;
|
||||
std::u32string out;
|
||||
|
||||
out.reserve( in.size() );
|
||||
|
||||
|
@ -119,11 +119,11 @@ QString applyPunctOnly( QString const & in )
|
|||
return out;
|
||||
}
|
||||
|
||||
wstring applyWhitespaceOnly( wstring const & in )
|
||||
std::u32string applyWhitespaceOnly( std::u32string const & in )
|
||||
{
|
||||
wchar const * nextChar = in.data();
|
||||
char32_t const * nextChar = in.data();
|
||||
|
||||
wstring out;
|
||||
std::u32string out;
|
||||
|
||||
out.reserve( in.size() );
|
||||
|
||||
|
@ -136,11 +136,11 @@ wstring applyWhitespaceOnly( wstring const & in )
|
|||
return out;
|
||||
}
|
||||
|
||||
wstring applyWhitespaceAndPunctOnly( wstring const & in )
|
||||
std::u32string applyWhitespaceAndPunctOnly( std::u32string const & in )
|
||||
{
|
||||
wchar const * nextChar = in.data();
|
||||
char32_t const * nextChar = in.data();
|
||||
|
||||
wstring out;
|
||||
std::u32string out;
|
||||
|
||||
out.reserve( in.size() );
|
||||
|
||||
|
@ -153,26 +153,26 @@ wstring applyWhitespaceAndPunctOnly( wstring const & in )
|
|||
return out;
|
||||
}
|
||||
|
||||
bool isWhitespace( wchar ch )
|
||||
bool isWhitespace( char32_t ch )
|
||||
{
|
||||
//invisible character should be treated as whitespace as well.
|
||||
return QChar::isSpace( ch ) || !QChar::isPrint( ch );
|
||||
}
|
||||
|
||||
bool isWhitespaceOrPunct( wchar ch )
|
||||
bool isWhitespaceOrPunct( char32_t ch )
|
||||
{
|
||||
return isWhitespace( ch ) || QChar::isPunct( ch );
|
||||
}
|
||||
|
||||
bool isPunct( wchar ch )
|
||||
bool isPunct( char32_t ch )
|
||||
{
|
||||
return QChar::isPunct( ch );
|
||||
}
|
||||
|
||||
wstring trimWhitespaceOrPunct( wstring const & in )
|
||||
std::u32string trimWhitespaceOrPunct( std::u32string const & in )
|
||||
{
|
||||
wchar const * wordBegin = in.c_str();
|
||||
wstring::size_type wordSize = in.size();
|
||||
char32_t const * wordBegin = in.c_str();
|
||||
std::u32string::size_type wordSize = in.size();
|
||||
|
||||
// Skip any leading whitespace
|
||||
while ( *wordBegin && Folding::isWhitespaceOrPunct( *wordBegin ) ) {
|
||||
|
@ -185,7 +185,7 @@ wstring trimWhitespaceOrPunct( wstring const & in )
|
|||
--wordSize;
|
||||
}
|
||||
|
||||
return wstring( wordBegin, wordSize );
|
||||
return std::u32string( wordBegin, wordSize );
|
||||
}
|
||||
|
||||
QString trimWhitespaceOrPunct( QString const & in )
|
||||
|
@ -209,13 +209,13 @@ QString trimWhitespaceOrPunct( QString const & in )
|
|||
return in.mid( wordBegin, wordSize );
|
||||
}
|
||||
|
||||
wstring trimWhitespace( wstring const & in )
|
||||
std::u32string trimWhitespace( std::u32string const & in )
|
||||
{
|
||||
if ( in.empty() ) {
|
||||
return in;
|
||||
}
|
||||
wchar const * wordBegin = in.c_str();
|
||||
wstring::size_type wordSize = in.size();
|
||||
char32_t const * wordBegin = in.c_str();
|
||||
std::u32string::size_type wordSize = in.size();
|
||||
|
||||
// Skip any leading whitespace
|
||||
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
|
||||
|
@ -228,7 +228,7 @@ wstring trimWhitespace( wstring const & in )
|
|||
--wordSize;
|
||||
}
|
||||
|
||||
return wstring( wordBegin, wordSize );
|
||||
return std::u32string( wordBegin, wordSize );
|
||||
}
|
||||
|
||||
QString trimWhitespace( QString const & in )
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "wstring.hh"
|
||||
#include "text.hh"
|
||||
#include <QString>
|
||||
|
||||
/// Folding provides means to translate several possible ways to write a
|
||||
|
@ -17,8 +17,6 @@
|
|||
|
||||
namespace Folding {
|
||||
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
|
||||
/// The algorithm's version.
|
||||
enum {
|
||||
|
@ -27,48 +25,48 @@ enum {
|
|||
|
||||
/// Applies the folding algorithm to each character in the given string,
|
||||
/// making another one as a result.
|
||||
wstring apply( wstring const &, bool preserveWildcards = false );
|
||||
std::u32string apply( std::u32string const &, bool preserveWildcards = false );
|
||||
|
||||
/// Applies only simple case folding algorithm. Since many dictionaries have
|
||||
/// different case style, we interpret words differing only by case as synonyms.
|
||||
wstring applySimpleCaseOnly( wstring const & );
|
||||
wstring applySimpleCaseOnly( QString const & in );
|
||||
wstring applySimpleCaseOnly( std::string const & in );
|
||||
std::u32string applySimpleCaseOnly( std::u32string const & );
|
||||
std::u32string applySimpleCaseOnly( QString const & in );
|
||||
std::u32string applySimpleCaseOnly( std::string const & in );
|
||||
|
||||
/// Applies only full case folding algorithm. This includes simple case, but also
|
||||
/// decomposing ligatures and complex letters.
|
||||
wstring applyFullCaseOnly( wstring const & );
|
||||
std::u32string applyFullCaseOnly( std::u32string const & );
|
||||
|
||||
/// Applies only diacritics folding algorithm.
|
||||
wstring applyDiacriticsOnly( wstring const & );
|
||||
std::u32string applyDiacriticsOnly( std::u32string const & );
|
||||
|
||||
/// Applies only punctuation folding algorithm.
|
||||
wstring applyPunctOnly( wstring const & );
|
||||
std::u32string applyPunctOnly( std::u32string const & );
|
||||
QString applyPunctOnly( QString const & in );
|
||||
/// Applies only whitespace folding algorithm.
|
||||
wstring applyWhitespaceOnly( wstring const & );
|
||||
std::u32string applyWhitespaceOnly( std::u32string const & );
|
||||
|
||||
/// Applies only whitespace&punctuation folding algorithm.
|
||||
wstring applyWhitespaceAndPunctOnly( wstring const & );
|
||||
std::u32string applyWhitespaceAndPunctOnly( std::u32string const & );
|
||||
|
||||
/// Returns true if the given character is any form of whitespace, false
|
||||
/// otherwise. Whitespace corresponds to Zl/Zp/Zs Unicode classes, and also
|
||||
/// includes \n, \r and \t.
|
||||
bool isWhitespace( wchar ch );
|
||||
bool isWhitespaceOrPunct( wchar ch );
|
||||
bool isWhitespace( char32_t ch );
|
||||
bool isWhitespaceOrPunct( char32_t ch );
|
||||
|
||||
/// Returns true if the given character is any form of punctuation, false
|
||||
/// otherwise. Punctuation corresponds to Pc/Pd/Pe/Pf/Pi/Po/Ps classes.
|
||||
bool isPunct( wchar ch );
|
||||
bool isPunct( char32_t ch );
|
||||
|
||||
/// Removes any whitespace or punctuation from the beginning and the end of
|
||||
/// the word.
|
||||
wstring trimWhitespaceOrPunct( wstring const & );
|
||||
std::u32string trimWhitespaceOrPunct( std::u32string const & );
|
||||
QString trimWhitespaceOrPunct( QString const & in );
|
||||
|
||||
/// Removes any whitespace from the beginning and the end of
|
||||
/// the word.
|
||||
wstring trimWhitespace( wstring const & );
|
||||
std::u32string trimWhitespace( std::u32string const & );
|
||||
QString trimWhitespace( QString const & in );
|
||||
|
||||
/// Same as apply( wstring ), but without any heap operations, therefore
|
||||
|
@ -86,6 +84,6 @@ QString unescapeWildcardSymbols( QString const & );
|
|||
QString escapeWildcardSymbols( QString const & );
|
||||
|
||||
/// Tests if the given char is one of the Unicode combining marks.
|
||||
bool isCombiningMark( wchar ch );
|
||||
bool isCombiningMark( char32_t ch );
|
||||
|
||||
} // namespace Folding
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
/* This file is (c) 2013 Abs62
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "gddebug.hh"
|
||||
#include <QDebug>
|
||||
#include <QString>
|
||||
#include <QtCore5Compat/QTextCodec>
|
||||
|
||||
QFile * logFilePtr;
|
||||
|
||||
void gdWarning( const char * msg, ... )
|
||||
{
|
||||
va_list ap;
|
||||
va_start( ap, msg );
|
||||
|
||||
qWarning() << QString().vasprintf( msg, ap );
|
||||
|
||||
va_end( ap );
|
||||
}
|
||||
|
||||
void gdDebug( const char * msg, ... )
|
||||
{
|
||||
va_list ap;
|
||||
va_start( ap, msg );
|
||||
|
||||
qDebug().noquote() << QString().vasprintf( msg, ap );
|
||||
|
||||
va_end( ap );
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <QFile>
|
||||
|
||||
#ifdef NO_CONSOLE
|
||||
#define GD_DPRINTF( ... ) \
|
||||
do { \
|
||||
} while ( 0 )
|
||||
#define GD_FDPRINTF( ... ) \
|
||||
do { \
|
||||
} while ( 0 )
|
||||
#else
|
||||
#define GD_DPRINTF( ... ) gdDebug( __VA_ARGS__ )
|
||||
#define GD_FDPRINTF( ... ) fprintf( __VA_ARGS__ )
|
||||
#endif
|
||||
|
||||
void gdWarning( const char *, ... ) /* print warning message */
|
||||
#if defined( Q_CC_GNU ) && !defined( __INSURE__ )
|
||||
__attribute__( ( format( printf, 1, 2 ) ) )
|
||||
#endif
|
||||
;
|
||||
|
||||
void gdDebug( const char *, ... )
|
||||
#if defined( Q_CC_GNU ) && !defined( __INSURE__ )
|
||||
__attribute__( ( format( printf, 1, 2 ) ) )
|
||||
#endif
|
||||
;
|
||||
|
||||
extern QFile * logFilePtr;
|
|
@ -5,14 +5,9 @@
|
|||
#include <vector>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include "wstring_qt.hh"
|
||||
|
||||
char const * const Iconv::GdWchar = "UTF-32LE";
|
||||
char const * const Iconv::Utf16Le = "UTF-16LE";
|
||||
char const * const Iconv::Utf8 = "UTF-8";
|
||||
|
||||
Iconv::Iconv( char const * from ):
|
||||
state( iconv_open( Utf8, from ) )
|
||||
state( iconv_open( Text::utf8, from ) )
|
||||
{
|
||||
if ( state == (iconv_t)-1 ) {
|
||||
throw exCantInit( strerror( errno ) );
|
||||
|
@ -80,7 +75,7 @@ QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
|
|||
return QString::fromUtf8( &outBuf.front(), datasize );
|
||||
}
|
||||
|
||||
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
|
||||
std::u32string Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
|
||||
|
||||
{
|
||||
/// Special-case the dataSize == 0 to avoid any kind of iconv-specific
|
||||
|
@ -111,6 +106,12 @@ std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData, siz
|
|||
return outStr.toStdString();
|
||||
}
|
||||
|
||||
std::string Iconv::toUtf8( char const * fromEncoding, std::u32string_view str )
|
||||
{
|
||||
// u32string::size -> returns the number of char32_t instead of the length of bytes
|
||||
return toUtf8( fromEncoding, str.data(), str.size() * sizeof( char32_t ) );
|
||||
}
|
||||
|
||||
QString Iconv::toQString( char const * fromEncoding, void const * fromData, size_t dataSize )
|
||||
{
|
||||
if ( dataSize == 0 ) {
|
||||
|
|
|
@ -3,14 +3,11 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <QString>
|
||||
|
||||
#include "wstring.hh"
|
||||
#include "ex.hh"
|
||||
|
||||
#include "text.hh"
|
||||
#include <QString>
|
||||
#include <iconv.h>
|
||||
|
||||
|
||||
/// "Internationalization conversion" for char encoding conversion, currently implemented with iconv()
|
||||
/// Only supports converting from a known "from" to UTF8
|
||||
class Iconv
|
||||
|
@ -22,12 +19,6 @@ public:
|
|||
DEF_EX( Ex, "Iconv exception", std::exception )
|
||||
DEF_EX_STR( exCantInit, "Can't initialize iconv conversion:", Ex )
|
||||
|
||||
// Some predefined character sets' names
|
||||
|
||||
static char const * const GdWchar;
|
||||
static char const * const Utf16Le;
|
||||
static char const * const Utf8;
|
||||
|
||||
explicit Iconv( char const * from );
|
||||
|
||||
~Iconv();
|
||||
|
@ -35,11 +26,12 @@ public:
|
|||
QString convert( void const *& inBuf, size_t & inBytesLeft );
|
||||
|
||||
// Converts a given block of data from the given encoding to a wide string.
|
||||
static gd::wstring toWstring( char const * fromEncoding, void const * fromData, size_t dataSize );
|
||||
static std::u32string toWstring( char const * fromEncoding, void const * fromData, size_t dataSize );
|
||||
|
||||
// Converts a given block of data from the given encoding to an utf8-encoded
|
||||
// string.
|
||||
static std::string toUtf8( char const * fromEncoding, void const * fromData, size_t dataSize );
|
||||
static std::string toUtf8( char const * fromEncoding, std::u32string_view str );
|
||||
|
||||
static QString toQString( char const * fromEncoding, void const * fromData, size_t dataSize );
|
||||
|
||||
|
|
2
src/common/logfileptr.cc
Normal file
2
src/common/logfileptr.cc
Normal file
|
@ -0,0 +1,2 @@
|
|||
#include "logfileptr.hh"
|
||||
QFile * logFilePtr;
|
3
src/common/logfileptr.hh
Normal file
3
src/common/logfileptr.hh
Normal file
|
@ -0,0 +1,3 @@
|
|||
#pragma once
|
||||
#include <QFile>
|
||||
extern QFile * logFilePtr;
|
321
src/common/text.cc
Normal file
321
src/common/text.cc
Normal file
|
@ -0,0 +1,321 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "text.hh"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <QByteArray>
|
||||
#include <QString>
|
||||
#include <QList>
|
||||
|
||||
namespace Text {
|
||||
|
||||
const char * getEncodingNameFor( Encoding e )
|
||||
{
|
||||
switch ( e ) {
|
||||
case Encoding::Utf32LE:
|
||||
return utf32_le;
|
||||
case Encoding::Utf32BE:
|
||||
return utf32_be;
|
||||
case Encoding::Utf32:
|
||||
return utf32;
|
||||
case Encoding::Utf16LE:
|
||||
return utf16_le;
|
||||
case Encoding::Utf16BE:
|
||||
return utf16_be;
|
||||
case Encoding::Windows1252:
|
||||
return windows_1252;
|
||||
case Encoding::Windows1251:
|
||||
return windows_1251;
|
||||
case Encoding::Windows1250:
|
||||
return windows_1250;
|
||||
case Encoding::Utf8:
|
||||
default:
|
||||
return utf8;
|
||||
}
|
||||
}
|
||||
|
||||
Encoding getEncodingForName( const QByteArray & name )
|
||||
{
|
||||
auto const n = name.toUpper();
|
||||
if ( n == utf32_le ) {
|
||||
return Encoding::Utf32LE;
|
||||
}
|
||||
if ( n == utf32_be ) {
|
||||
return Encoding::Utf32BE;
|
||||
}
|
||||
if ( n == utf32 ) {
|
||||
return Encoding::Utf32;
|
||||
}
|
||||
if ( n == utf16_le ) {
|
||||
return Encoding::Utf16LE;
|
||||
}
|
||||
if ( n == utf16_be ) {
|
||||
return Encoding::Utf16BE;
|
||||
}
|
||||
if ( n == windows_1252 ) {
|
||||
return Encoding::Windows1252;
|
||||
}
|
||||
if ( n == windows_1251 ) {
|
||||
return Encoding::Windows1251;
|
||||
}
|
||||
if ( n == windows_1250 ) {
|
||||
return Encoding::Windows1250;
|
||||
}
|
||||
return Encoding::Utf8;
|
||||
}
|
||||
|
||||
/// Encodes the given UTF-32 into UTF-8. The inSize specifies the number
|
||||
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
|
||||
/// at least inSize * 4 bytes long. The function returns the number of chars
|
||||
/// stored in the 'out' buffer. The result is not 0-terminated.
|
||||
size_t encode( char32_t const * in, size_t inSize, char * out_ )
|
||||
{
|
||||
unsigned char * out = (unsigned char *)out_;
|
||||
|
||||
while ( inSize-- ) {
|
||||
if ( *in < 0x80 ) {
|
||||
*out++ = *in++;
|
||||
}
|
||||
else if ( *in < 0x800 ) {
|
||||
*out++ = 0xC0 | ( *in >> 6 );
|
||||
*out++ = 0x80 | ( *in++ & 0x3F );
|
||||
}
|
||||
else if ( *in < 0x10000 ) {
|
||||
*out++ = 0xE0 | ( *in >> 12 );
|
||||
*out++ = 0x80 | ( ( *in >> 6 ) & 0x3F );
|
||||
*out++ = 0x80 | ( *in++ & 0x3F );
|
||||
}
|
||||
else {
|
||||
*out++ = 0xF0 | ( *in >> 18 );
|
||||
*out++ = 0x80 | ( ( *in >> 12 ) & 0x3F );
|
||||
*out++ = 0x80 | ( ( *in >> 6 ) & 0x3F );
|
||||
*out++ = 0x80 | ( *in++ & 0x3F );
|
||||
}
|
||||
}
|
||||
|
||||
return out - (unsigned char *)out_;
|
||||
}
|
||||
|
||||
/// Decodes the given UTF-8 into UTF-32. The inSize specifies the number
|
||||
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
|
||||
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
|
||||
/// function returns -1, otherwise it returns the number of wide characters
|
||||
/// stored in the 'out' buffer. The result is not 0-terminated.
|
||||
long decode( char const * in_, size_t inSize, char32_t * out_ )
|
||||
{
|
||||
unsigned char const * in = (unsigned char const *)in_;
|
||||
char32_t * out = out_;
|
||||
|
||||
while ( inSize-- ) {
|
||||
char32_t result;
|
||||
|
||||
if ( *in & 0x80 ) {
|
||||
if ( *in & 0x40 ) {
|
||||
if ( *in & 0x20 ) {
|
||||
if ( *in & 0x10 ) {
|
||||
// Four-byte sequence
|
||||
if ( *in & 8 ) {
|
||||
// This can't be
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( inSize < 3 ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
inSize -= 3;
|
||||
|
||||
result = ( (char32_t)*in++ & 7 ) << 18;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= ( (char32_t)*in++ & 0x3F ) << 12;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= ( (char32_t)*in++ & 0x3F ) << 6;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= (char32_t)*in++ & 0x3F;
|
||||
}
|
||||
else {
|
||||
// Three-byte sequence
|
||||
|
||||
if ( inSize < 2 ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
inSize -= 2;
|
||||
|
||||
result = ( (char32_t)*in++ & 0xF ) << 12;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= ( (char32_t)*in++ & 0x3F ) << 6;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= (char32_t)*in++ & 0x3F;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Two-byte sequence
|
||||
if ( !inSize ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
--inSize;
|
||||
|
||||
result = ( (char32_t)*in++ & 0x1F ) << 6;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= (char32_t)*in++ & 0x3F;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// This char is from the middle of encoding, it can't be leading
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// One-byte encoding
|
||||
result = *in++;
|
||||
}
|
||||
|
||||
*out++ = result;
|
||||
}
|
||||
|
||||
return out - out_;
|
||||
}
|
||||
|
||||
std::string toUtf8( std::u32string const & in ) noexcept
|
||||
{
|
||||
if ( in.empty() ) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector< char > buffer( in.size() * 4 );
|
||||
|
||||
return { &buffer.front(), encode( in.data(), in.size(), &buffer.front() ) };
|
||||
}
|
||||
|
||||
std::u32string toUtf32( std::string const & in )
|
||||
{
|
||||
if ( in.empty() ) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector< char32_t > buffer( in.size() );
|
||||
|
||||
long result = decode( in.data(), in.size(), &buffer.front() );
|
||||
|
||||
if ( result < 0 ) {
|
||||
throw exCantDecode( in );
|
||||
}
|
||||
|
||||
return std::u32string( &buffer.front(), result );
|
||||
}
|
||||
|
||||
bool isspace( int c )
|
||||
{
|
||||
switch ( c ) {
|
||||
case ' ':
|
||||
case '\f':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\t':
|
||||
case '\v':
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//get the first line in string s1. -1 if not found
|
||||
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length )
|
||||
{
|
||||
char * pos = std::search( s1, s1 + s1length, s2, s2 + s2length );
|
||||
|
||||
if ( pos == s1 + s1length ) {
|
||||
return pos - s1;
|
||||
}
|
||||
|
||||
//the line size.
|
||||
return pos - s1 + s2length;
|
||||
}
|
||||
|
||||
|
||||
LineFeed initLineFeed( const Encoding e )
|
||||
{
|
||||
LineFeed lf{};
|
||||
switch ( e ) {
|
||||
case Encoding::Utf32LE:
|
||||
lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 };
|
||||
lf.length = 4;
|
||||
break;
|
||||
case Encoding::Utf32BE:
|
||||
lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A };
|
||||
lf.length = 4;
|
||||
break;
|
||||
case Encoding::Utf16LE:
|
||||
lf.lineFeed = new char[ 2 ]{ 0x0A, 0 };
|
||||
lf.length = 2;
|
||||
break;
|
||||
case Encoding::Utf16BE:
|
||||
lf.lineFeed = new char[ 2 ]{ 0, 0x0A };
|
||||
lf.length = 2;
|
||||
break;
|
||||
case Encoding::Windows1252:
|
||||
case Encoding::Windows1251:
|
||||
case Encoding::Windows1250:
|
||||
case Encoding::Utf8:
|
||||
default:
|
||||
lf.length = 1;
|
||||
lf.lineFeed = new char[ 1 ]{ 0x0A };
|
||||
}
|
||||
return lf;
|
||||
}
|
||||
|
||||
// When convert non-BMP characters to wstring,the ending char maybe \0 .This method remove the tailing \0 from the wstring
|
||||
// as \0 is sensitive in the index. This method will be only used with index related operations like store/query.
|
||||
std::u32string removeTrailingZero( std::u32string const & v )
|
||||
{
|
||||
int n = v.size();
|
||||
while ( n > 0 && v[ n - 1 ] == 0 ) {
|
||||
n--;
|
||||
}
|
||||
return std::u32string( v.data(), n );
|
||||
}
|
||||
|
||||
std::u32string removeTrailingZero( QString const & in )
|
||||
{
|
||||
QList< unsigned int > v = in.toUcs4();
|
||||
|
||||
int n = v.size();
|
||||
while ( n > 0 && v[ n - 1 ] == 0 ) {
|
||||
n--;
|
||||
}
|
||||
if ( n != v.size() ) {
|
||||
v.resize( n );
|
||||
}
|
||||
|
||||
return std::u32string( (const char32_t *)v.constData(), v.size() );
|
||||
}
|
||||
|
||||
std::u32string normalize( const std::u32string & str )
|
||||
{
|
||||
return QString::fromStdU32String( str ).normalized( QString::NormalizationForm_C ).toStdU32String();
|
||||
}
|
||||
|
||||
|
||||
} // namespace Text
|
64
src/common/text.hh
Normal file
64
src/common/text.hh
Normal file
|
@ -0,0 +1,64 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
#pragma once
|
||||
|
||||
#include "ex.hh"
|
||||
#include <QByteArray>
|
||||
#include <string>
|
||||
|
||||
/// Facilities to process Text, focusing on Unicode
|
||||
namespace Text {
|
||||
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
|
||||
|
||||
/// Encoding names. Ref -> IANA's encoding names https://www.iana.org/assignments/character-sets/character-sets.xhtml
|
||||
/// Notice: The ordering must not be changed before Utf32LE. The current .dsl format index file depends on it.
|
||||
enum class Encoding {
|
||||
Utf16LE = 0,
|
||||
Utf16BE,
|
||||
Windows1252,
|
||||
Windows1251,
|
||||
Windows1250,
|
||||
Utf8,
|
||||
Utf32BE,
|
||||
Utf32LE,
|
||||
Utf32,
|
||||
};
|
||||
|
||||
inline constexpr auto utf16_be = "UTF-16BE";
|
||||
inline constexpr auto utf16_le = "UTF-16LE";
|
||||
inline constexpr auto utf32 = "UTF-32";
|
||||
inline constexpr auto utf32_be = "UTF-32BE";
|
||||
inline constexpr auto utf32_le = "UTF-32LE";
|
||||
inline constexpr auto utf8 = "UTF-8";
|
||||
inline constexpr auto windows_1250 = "WINDOWS-1250";
|
||||
inline constexpr auto windows_1251 = "WINDOWS-1251";
|
||||
inline constexpr auto windows_1252 = "WINDOWS-1252";
|
||||
|
||||
const char * getEncodingNameFor( Encoding e );
|
||||
Encoding getEncodingForName( const QByteArray & name );
|
||||
|
||||
/// utf32 -> utf8
|
||||
std::string toUtf8( std::u32string const & ) noexcept;
|
||||
/// utf8 -> utf32
|
||||
std::u32string toUtf32( std::string const & );
|
||||
|
||||
/// Since the standard isspace() is locale-specific, we need something
|
||||
/// that would never mess up our utf8 input. The stock one worked fine under
|
||||
/// Linux but was messing up strings under Windows.
|
||||
bool isspace( int c );
|
||||
|
||||
//get the first line in string s1. -1 if not found
|
||||
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
|
||||
|
||||
struct LineFeed
|
||||
{
|
||||
int length;
|
||||
char * lineFeed;
|
||||
};
|
||||
|
||||
LineFeed initLineFeed( Encoding e );
|
||||
|
||||
std::u32string removeTrailingZero( std::u32string const & v );
|
||||
std::u32string removeTrailingZero( QString const & in );
|
||||
std::u32string normalize( std::u32string const & );
|
||||
} // namespace Text
|
|
@ -1,280 +0,0 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "utf8.hh"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <QByteArray>
|
||||
#include <QString>
|
||||
|
||||
namespace Utf8 {
|
||||
|
||||
size_t encode( wchar const * in, size_t inSize, char * out_ )
|
||||
{
|
||||
unsigned char * out = (unsigned char *)out_;
|
||||
|
||||
while ( inSize-- ) {
|
||||
if ( *in < 0x80 ) {
|
||||
*out++ = *in++;
|
||||
}
|
||||
else if ( *in < 0x800 ) {
|
||||
*out++ = 0xC0 | ( *in >> 6 );
|
||||
*out++ = 0x80 | ( *in++ & 0x3F );
|
||||
}
|
||||
else if ( *in < 0x10000 ) {
|
||||
*out++ = 0xE0 | ( *in >> 12 );
|
||||
*out++ = 0x80 | ( ( *in >> 6 ) & 0x3F );
|
||||
*out++ = 0x80 | ( *in++ & 0x3F );
|
||||
}
|
||||
else {
|
||||
*out++ = 0xF0 | ( *in >> 18 );
|
||||
*out++ = 0x80 | ( ( *in >> 12 ) & 0x3F );
|
||||
*out++ = 0x80 | ( ( *in >> 6 ) & 0x3F );
|
||||
*out++ = 0x80 | ( *in++ & 0x3F );
|
||||
}
|
||||
}
|
||||
|
||||
return out - (unsigned char *)out_;
|
||||
}
|
||||
|
||||
long decode( char const * in_, size_t inSize, wchar * out_ )
|
||||
{
|
||||
unsigned char const * in = (unsigned char const *)in_;
|
||||
wchar * out = out_;
|
||||
|
||||
while ( inSize-- ) {
|
||||
wchar result;
|
||||
|
||||
if ( *in & 0x80 ) {
|
||||
if ( *in & 0x40 ) {
|
||||
if ( *in & 0x20 ) {
|
||||
if ( *in & 0x10 ) {
|
||||
// Four-byte sequence
|
||||
if ( *in & 8 ) {
|
||||
// This can't be
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( inSize < 3 ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
inSize -= 3;
|
||||
|
||||
result = ( (wchar)*in++ & 7 ) << 18;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= ( (wchar)*in++ & 0x3F ) << 12;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= ( (wchar)*in++ & 0x3F ) << 6;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= (wchar)*in++ & 0x3F;
|
||||
}
|
||||
else {
|
||||
// Three-byte sequence
|
||||
|
||||
if ( inSize < 2 ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
inSize -= 2;
|
||||
|
||||
result = ( (wchar)*in++ & 0xF ) << 12;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= ( (wchar)*in++ & 0x3F ) << 6;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= (wchar)*in++ & 0x3F;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Two-byte sequence
|
||||
if ( !inSize ) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
--inSize;
|
||||
|
||||
result = ( (wchar)*in++ & 0x1F ) << 6;
|
||||
|
||||
if ( ( *in & 0xC0 ) != 0x80 ) {
|
||||
return -1;
|
||||
}
|
||||
result |= (wchar)*in++ & 0x3F;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// This char is from the middle of encoding, it can't be leading
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// One-byte encoding
|
||||
result = *in++;
|
||||
}
|
||||
|
||||
*out++ = result;
|
||||
}
|
||||
|
||||
return out - out_;
|
||||
}
|
||||
|
||||
string encode( wstring const & in ) noexcept
|
||||
{
|
||||
if ( in.empty() ) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector< char > buffer( in.size() * 4 );
|
||||
|
||||
return string( &buffer.front(), encode( in.data(), in.size(), &buffer.front() ) );
|
||||
}
|
||||
|
||||
wstring decode( string const & in )
|
||||
{
|
||||
if ( in.empty() ) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector< wchar > buffer( in.size() );
|
||||
|
||||
long result = decode( in.data(), in.size(), &buffer.front() );
|
||||
|
||||
if ( result < 0 ) {
|
||||
throw exCantDecode( in );
|
||||
}
|
||||
|
||||
return wstring( &buffer.front(), result );
|
||||
}
|
||||
|
||||
bool isspace( int c )
|
||||
{
|
||||
switch ( c ) {
|
||||
case ' ':
|
||||
case '\f':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case '\t':
|
||||
case '\v':
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//get the first line in string s1. -1 if not found
|
||||
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length )
|
||||
{
|
||||
char * pos = std::search( s1, s1 + s1length, s2, s2 + s2length );
|
||||
|
||||
if ( pos == s1 + s1length ) {
|
||||
return pos - s1;
|
||||
}
|
||||
|
||||
//the line size.
|
||||
return pos - s1 + s2length;
|
||||
}
|
||||
|
||||
char const * getEncodingNameFor( Encoding e )
|
||||
{
|
||||
switch ( e ) {
|
||||
case Utf32LE:
|
||||
return "UTF-32LE";
|
||||
case Utf32BE:
|
||||
return "UTF-32BE";
|
||||
case Utf16LE:
|
||||
return "UTF-16LE";
|
||||
case Utf16BE:
|
||||
return "UTF-16BE";
|
||||
case Windows1252:
|
||||
return "WINDOWS-1252";
|
||||
case Windows1251:
|
||||
return "WINDOWS-1251";
|
||||
case Utf8:
|
||||
return "UTF-8";
|
||||
case Windows1250:
|
||||
return "WINDOWS-1250";
|
||||
default:
|
||||
return "UTF-8";
|
||||
}
|
||||
}
|
||||
|
||||
Encoding getEncodingForName( const QByteArray & _name )
|
||||
{
|
||||
const auto name = _name.toUpper();
|
||||
if ( name == "UTF-32LE" ) {
|
||||
return Utf32LE;
|
||||
}
|
||||
if ( name == "UTF-32BE" ) {
|
||||
return Utf32BE;
|
||||
}
|
||||
if ( name == "UTF-16LE" ) {
|
||||
return Utf16LE;
|
||||
}
|
||||
if ( name == "UTF-16BE" ) {
|
||||
return Utf16BE;
|
||||
}
|
||||
if ( name == "WINDOWS-1252" ) {
|
||||
return Windows1252;
|
||||
}
|
||||
if ( name == "WINDOWS-1251" ) {
|
||||
return Windows1251;
|
||||
}
|
||||
if ( name == "UTF-8" ) {
|
||||
return Utf8;
|
||||
}
|
||||
if ( name == "WINDOWS-1250" ) {
|
||||
return Windows1250;
|
||||
}
|
||||
return Utf8;
|
||||
}
|
||||
|
||||
LineFeed initLineFeed( const Encoding e )
|
||||
{
|
||||
LineFeed lf{};
|
||||
switch ( e ) {
|
||||
case Utf8::Utf32LE:
|
||||
lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 };
|
||||
lf.length = 4;
|
||||
break;
|
||||
case Utf8::Utf32BE:
|
||||
lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A };
|
||||
lf.length = 4;
|
||||
break;
|
||||
case Utf8::Utf16LE:
|
||||
lf.lineFeed = new char[ 2 ]{ 0x0A, 0 };
|
||||
lf.length = 2;
|
||||
break;
|
||||
case Utf8::Utf16BE:
|
||||
lf.lineFeed = new char[ 2 ]{ 0, 0x0A };
|
||||
lf.length = 2;
|
||||
break;
|
||||
case Utf8::Windows1252:
|
||||
|
||||
case Utf8::Windows1251:
|
||||
|
||||
case Utf8::Utf8:
|
||||
|
||||
case Utf8::Windows1250:
|
||||
default:
|
||||
lf.length = 1;
|
||||
lf.lineFeed = new char[ 1 ]{ 0x0A };
|
||||
}
|
||||
return lf;
|
||||
}
|
||||
|
||||
} // namespace Utf8
|
|
@ -1,68 +0,0 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
#pragma once
|
||||
|
||||
#include <cstdio>
|
||||
#include <QByteArray>
|
||||
#include <string>
|
||||
#include "ex.hh"
|
||||
#include "wstring.hh"
|
||||
|
||||
/// A simple UTF-8 encoder/decoder. Some dictionary backends only require
|
||||
/// utf8, so we have this separately, removing the iconv dependency for them.
|
||||
/// Besides, utf8 is quite ubiquitous now, and its use is spreaded over many
|
||||
/// places.
|
||||
namespace Utf8 {
|
||||
|
||||
// Those are possible encodings for .dsl files
|
||||
enum Encoding {
|
||||
Utf16LE,
|
||||
Utf16BE,
|
||||
Windows1252,
|
||||
Windows1251,
|
||||
Windows1250,
|
||||
Utf8, // This is an extension. Detected solely by the UTF8 BOM.
|
||||
Utf32BE,
|
||||
Utf32LE,
|
||||
};
|
||||
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
|
||||
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
|
||||
|
||||
/// Encodes the given UCS-4 into UTF-8. The inSize specifies the number
|
||||
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
|
||||
/// at least inSize * 4 bytes long. The function returns the number of chars
|
||||
/// stored in the 'out' buffer. The result is not 0-terminated.
|
||||
size_t encode( wchar const * in, size_t inSize, char * out );
|
||||
/// Decodes the given UTF-8 into UCS-32. The inSize specifies the number
|
||||
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
|
||||
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
|
||||
/// function returns -1, otherwise it returns the number of wide characters
|
||||
/// stored in the 'out' buffer. The result is not 0-terminated.
|
||||
long decode( char const * in, size_t inSize, wchar * out );
|
||||
|
||||
/// Versions for non time-critical code.
|
||||
string encode( wstring const & ) noexcept;
|
||||
wstring decode( string const & );
|
||||
|
||||
/// Since the standard isspace() is locale-specific, we need something
|
||||
/// that would never mess up our utf8 input. The stock one worked fine under
|
||||
/// Linux but was messing up strings under Windows.
|
||||
bool isspace( int c );
|
||||
|
||||
//get the first line in string s1. -1 if not found
|
||||
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
|
||||
char const * getEncodingNameFor( Encoding e );
|
||||
Encoding getEncodingForName( const QByteArray & name );
|
||||
|
||||
struct LineFeed
|
||||
{
|
||||
int length;
|
||||
char * lineFeed;
|
||||
};
|
||||
|
||||
LineFeed initLineFeed( Encoding e );
|
||||
} // namespace Utf8
|
|
@ -4,9 +4,6 @@
|
|||
#include <QStyle>
|
||||
#include <QMessageBox>
|
||||
#include <string>
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
#include <QBuffer>
|
||||
#include <QTextCodec>
|
||||
|
||||
|
@ -31,10 +28,10 @@ std::string c_string( const QString & str )
|
|||
return std::string( str.toUtf8().constData() );
|
||||
}
|
||||
|
||||
bool endsWithIgnoreCase( const string & str1, string str2 )
|
||||
bool endsWithIgnoreCase( QByteArrayView str, QByteArrayView extension )
|
||||
{
|
||||
return ( str1.size() >= (unsigned)str2.size() )
|
||||
&& ( strcasecmp( str1.c_str() + ( str1.size() - str2.size() ), str2.data() ) == 0 );
|
||||
return ( str.size() >= extension.size() )
|
||||
&& ( str.last( extension.size() ).compare( extension, Qt::CaseInsensitive ) == 0 );
|
||||
}
|
||||
|
||||
QString escapeAmps( QString const & str )
|
||||
|
|
|
@ -40,7 +40,7 @@ inline QString rstrip( const QString & str )
|
|||
}
|
||||
|
||||
std::string c_string( const QString & str );
|
||||
bool endsWithIgnoreCase( const string & str1, string str2 );
|
||||
bool endsWithIgnoreCase( QByteArrayView str, QByteArrayView extension );
|
||||
/**
|
||||
* remove punctuation , space, symbol
|
||||
*
|
||||
|
@ -257,9 +257,14 @@ inline bool isAudioUrl( QUrl const & url )
|
|||
{
|
||||
if ( !url.isValid() )
|
||||
return false;
|
||||
// Note: we check for forvo sound links explicitly, as they don't have extensions
|
||||
|
||||
return ( url.scheme() == "http" || url.scheme() == "https" || url.scheme() == "gdau" )
|
||||
// gdau links are known to be audios, (sometimes they may not have file extension).
|
||||
if ( url.scheme() == "gdau" ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Note: we check for forvo sound links explicitly, as they don't have extensions
|
||||
return ( url.scheme() == "http" || url.scheme() == "https" )
|
||||
&& ( Filetype::isNameOfSound( url.path().toUtf8().data() ) || url.host() == "apifree.forvo.com" );
|
||||
}
|
||||
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
///
|
||||
/// Aliases for legacy reasons.
|
||||
///
|
||||
/// For new code, just use the standardized std::u32string for UTF-32 strings instead.
|
||||
///
|
||||
|
||||
namespace gd {
|
||||
using wchar = char32_t;
|
||||
using wstring = std::u32string;
|
||||
} // namespace gd
|
|
@ -1,38 +0,0 @@
|
|||
#include "wstring_qt.hh"
|
||||
#include <QList>
|
||||
|
||||
namespace gd {
|
||||
|
||||
// When convert non-BMP characters to wstring,the ending char maybe \0 .This method remove the tailing \0 from the wstring
|
||||
// as \0 is sensitive in the index. This method will be only used with index related operations like store/query.
|
||||
wstring removeTrailingZero( wstring const & v )
|
||||
{
|
||||
int n = v.size();
|
||||
while ( n > 0 && v[ n - 1 ] == 0 ) {
|
||||
n--;
|
||||
}
|
||||
return wstring( v.data(), n );
|
||||
}
|
||||
|
||||
wstring removeTrailingZero( QString const & in )
|
||||
{
|
||||
QList< unsigned int > v = in.toUcs4();
|
||||
|
||||
int n = v.size();
|
||||
while ( n > 0 && v[ n - 1 ] == 0 ) {
|
||||
n--;
|
||||
}
|
||||
if ( n != v.size() ) {
|
||||
v.resize( n );
|
||||
}
|
||||
|
||||
return wstring( (const wchar *)v.constData(), v.size() );
|
||||
}
|
||||
|
||||
wstring normalize( const wstring & str )
|
||||
{
|
||||
return QString::fromStdU32String( str ).normalized( QString::NormalizationForm_C ).toStdU32String();
|
||||
}
|
||||
|
||||
|
||||
} // namespace gd
|
|
@ -1,16 +0,0 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#pragma once
|
||||
|
||||
/// This file adds conversions between gd::wstring and QString. See wstring.hh
|
||||
/// for more details on gd::wstring.
|
||||
|
||||
#include "wstring.hh"
|
||||
#include <QString>
|
||||
|
||||
namespace gd {
|
||||
wstring removeTrailingZero( wstring const & v );
|
||||
wstring removeTrailingZero( QString const & in );
|
||||
wstring normalize( wstring const & );
|
||||
} // namespace gd
|
|
@ -8,7 +8,6 @@
|
|||
#include <QtXml>
|
||||
#include <QApplication>
|
||||
#include <QStyle>
|
||||
#include "gddebug.hh"
|
||||
|
||||
#ifdef Q_OS_WIN32
|
||||
//this is a windows header file.
|
||||
|
@ -119,57 +118,6 @@ QKeySequence HotKey::toKeySequence() const
|
|||
;
|
||||
}
|
||||
|
||||
|
||||
bool InternalPlayerBackend::anyAvailable()
|
||||
{
|
||||
#if defined( MAKE_FFMPEG_PLAYER ) || defined( MAKE_QTMULTIMEDIA_PLAYER )
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
InternalPlayerBackend InternalPlayerBackend::defaultBackend()
|
||||
{
|
||||
#if defined( MAKE_FFMPEG_PLAYER )
|
||||
return ffmpeg();
|
||||
#elif defined( MAKE_QTMULTIMEDIA_PLAYER )
|
||||
return qtmultimedia();
|
||||
#else
|
||||
return InternalPlayerBackend( QString() );
|
||||
#endif
|
||||
}
|
||||
|
||||
QStringList InternalPlayerBackend::nameList()
|
||||
{
|
||||
QStringList result;
|
||||
#ifdef MAKE_FFMPEG_PLAYER
|
||||
result.push_back( ffmpeg().uiName() );
|
||||
#endif
|
||||
#ifdef MAKE_QTMULTIMEDIA_PLAYER
|
||||
result.push_back( qtmultimedia().uiName() );
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
bool InternalPlayerBackend::isFfmpeg() const
|
||||
{
|
||||
#ifdef MAKE_FFMPEG_PLAYER
|
||||
return *this == ffmpeg();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool InternalPlayerBackend::isQtmultimedia() const
|
||||
{
|
||||
#ifdef MAKE_QTMULTIMEDIA_PLAYER
|
||||
return *this == qtmultimedia();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
QString Preferences::sanitizeInputPhrase( QString const & inputWord ) const
|
||||
{
|
||||
QString result = inputWord;
|
||||
|
@ -181,9 +129,9 @@ QString Preferences::sanitizeInputPhrase( QString const & inputWord ) const
|
|||
}
|
||||
|
||||
if ( limitInputPhraseLength && result.size() > inputPhraseLengthLimit ) {
|
||||
gdDebug( "Ignoring an input phrase %lld symbols long. The configured maximum input phrase length is %d symbols.",
|
||||
result.size(),
|
||||
inputPhraseLengthLimit );
|
||||
qDebug( "Ignoring an input phrase %lld symbols long. The configured maximum input phrase length is %d symbols.",
|
||||
result.size(),
|
||||
inputPhraseLengthLimit );
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -232,7 +180,6 @@ Preferences::Preferences():
|
|||
pronounceOnLoadMain( false ),
|
||||
pronounceOnLoadPopup( false ),
|
||||
useInternalPlayer( InternalPlayerBackend::anyAvailable() ),
|
||||
internalPlayerBackend( InternalPlayerBackend::defaultBackend() ),
|
||||
checkForNewReleases( true ),
|
||||
disallowContentFromOtherSites( false ),
|
||||
hideGoldenDictHeader( false ),
|
||||
|
@ -549,10 +496,6 @@ Class load()
|
|||
c.paths.push_back( Path( getPortableVersionDictionaryDir(), true ) );
|
||||
}
|
||||
|
||||
#ifndef Q_OS_WIN32
|
||||
c.preferences.audioPlaybackProgram = "mplayer";
|
||||
#endif
|
||||
|
||||
QString possibleMorphologyPath = getProgramDataDir() + "/content/morphology";
|
||||
|
||||
if ( QDir( possibleMorphologyPath ).exists() ) {
|
||||
|
@ -592,7 +535,7 @@ Class load()
|
|||
if ( !loadFromTemplate ) {
|
||||
// Load the config as usual
|
||||
if ( !dd.setContent( &configFile, false, &errorStr, &errorLine, &errorColumn ) ) {
|
||||
GD_DPRINTF( "Error: %s at %d,%d\n", errorStr.toLocal8Bit().constData(), errorLine, errorColumn );
|
||||
qDebug( "Error: %s at %d,%d", errorStr.toLocal8Bit().constData(), errorLine, errorColumn );
|
||||
throw exMalformedConfigFile();
|
||||
}
|
||||
}
|
||||
|
@ -605,7 +548,7 @@ Class load()
|
|||
QBuffer bufferedData( &data );
|
||||
|
||||
if ( !dd.setContent( &bufferedData, false, &errorStr, &errorLine, &errorColumn ) ) {
|
||||
GD_DPRINTF( "Error: %s at %d,%d\n", errorStr.toLocal8Bit().constData(), errorLine, errorColumn );
|
||||
qDebug( "Error: %s at %d,%d", errorStr.toLocal8Bit().constData(), errorLine, errorColumn );
|
||||
throw exMalformedConfigFile();
|
||||
}
|
||||
}
|
||||
|
@ -852,7 +795,7 @@ Class load()
|
|||
// Upgrading
|
||||
c.dictServers = makeDefaultDictServers();
|
||||
}
|
||||
#ifndef NO_TTS_SUPPORT
|
||||
#ifdef TTS_SUPPORT
|
||||
QDomNode ves = root.namedItem( "voiceEngines" );
|
||||
|
||||
if ( !ves.isNull() ) {
|
||||
|
@ -1003,7 +946,7 @@ Class load()
|
|||
}
|
||||
|
||||
if ( !preferences.namedItem( "internalPlayerBackend" ).isNull() ) {
|
||||
c.preferences.internalPlayerBackend.setUiName(
|
||||
c.preferences.internalPlayerBackend.setName(
|
||||
preferences.namedItem( "internalPlayerBackend" ).toElement().text() );
|
||||
}
|
||||
|
||||
|
@ -1011,7 +954,7 @@ Class load()
|
|||
c.preferences.audioPlaybackProgram = preferences.namedItem( "audioPlaybackProgram" ).toElement().text();
|
||||
}
|
||||
else {
|
||||
c.preferences.audioPlaybackProgram = "mplayer";
|
||||
c.preferences.audioPlaybackProgram = "vlc --intf dummy --play-and-exit";
|
||||
}
|
||||
|
||||
QDomNode proxy = preferences.namedItem( "proxyserver" );
|
||||
|
@ -1736,7 +1679,7 @@ void save( Class const & c )
|
|||
p.setAttributeNode( icon );
|
||||
}
|
||||
}
|
||||
#ifndef NO_TTS_SUPPORT
|
||||
#ifdef TTS_SUPPORT
|
||||
{
|
||||
QDomNode ves = dd.createElement( "voiceEngines" );
|
||||
root.appendChild( ves );
|
||||
|
@ -1970,7 +1913,7 @@ void save( Class const & c )
|
|||
preferences.appendChild( opt );
|
||||
|
||||
opt = dd.createElement( "internalPlayerBackend" );
|
||||
opt.appendChild( dd.createTextNode( c.preferences.internalPlayerBackend.uiName() ) );
|
||||
opt.appendChild( dd.createTextNode( c.preferences.internalPlayerBackend.getName() ) );
|
||||
preferences.appendChild( opt );
|
||||
|
||||
opt = dd.createElement( "audioPlaybackProgram" );
|
||||
|
|
|
@ -3,19 +3,20 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include <QList>
|
||||
#include <QString>
|
||||
#include <QSize>
|
||||
#include <QDateTime>
|
||||
#include <QKeySequence>
|
||||
#include <QSet>
|
||||
#include <QMetaType>
|
||||
#include "audio/internalplayerbackend.hh"
|
||||
#include "ex.hh"
|
||||
#include <QDateTime>
|
||||
#include <QDomDocument>
|
||||
#include <QKeySequence>
|
||||
#include <QList>
|
||||
#include <QLocale>
|
||||
#include <optional>
|
||||
#include <QMetaType>
|
||||
#include <QObject>
|
||||
#include <QSet>
|
||||
#include <QSize>
|
||||
#include <QString>
|
||||
#include <QThread>
|
||||
#include <optional>
|
||||
|
||||
/// Special group IDs
|
||||
enum GroupId : unsigned {
|
||||
|
@ -269,66 +270,6 @@ struct CustomFonts
|
|||
}
|
||||
};
|
||||
|
||||
/// This class encapsulates supported backend preprocessor logic,
|
||||
/// discourages duplicating backend names in code, which is error-prone.
|
||||
class InternalPlayerBackend
|
||||
{
|
||||
public:
|
||||
/// Returns true if at least one backend is available.
|
||||
static bool anyAvailable();
|
||||
/// Returns the default backend or null backend if none is available.
|
||||
static InternalPlayerBackend defaultBackend();
|
||||
/// Returns the name list of supported backends.
|
||||
static QStringList nameList();
|
||||
|
||||
/// Returns true if built with FFmpeg player support and the name matches.
|
||||
bool isFfmpeg() const;
|
||||
/// Returns true if built with Qt Multimedia player support and the name matches.
|
||||
bool isQtmultimedia() const;
|
||||
|
||||
QString const & uiName() const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
void setUiName( QString const & name_ )
|
||||
{
|
||||
name = name_;
|
||||
}
|
||||
|
||||
bool operator==( InternalPlayerBackend const & other ) const
|
||||
{
|
||||
return name == other.name;
|
||||
}
|
||||
|
||||
bool operator!=( InternalPlayerBackend const & other ) const
|
||||
{
|
||||
return !operator==( other );
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef MAKE_FFMPEG_PLAYER
|
||||
static InternalPlayerBackend ffmpeg()
|
||||
{
|
||||
return InternalPlayerBackend( "FFmpeg" );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MAKE_QTMULTIMEDIA_PLAYER
|
||||
static InternalPlayerBackend qtmultimedia()
|
||||
{
|
||||
return InternalPlayerBackend( "Qt Multimedia" );
|
||||
}
|
||||
#endif
|
||||
|
||||
explicit InternalPlayerBackend( QString const & name_ ):
|
||||
name( name_ )
|
||||
{
|
||||
}
|
||||
|
||||
QString name;
|
||||
};
|
||||
|
||||
/// Various user preferences
|
||||
struct Preferences
|
||||
{
|
||||
|
@ -388,7 +329,7 @@ struct Preferences
|
|||
// Whether the word should be pronounced on page load, in main window/popup
|
||||
bool pronounceOnLoadMain, pronounceOnLoadPopup;
|
||||
bool useInternalPlayer;
|
||||
InternalPlayerBackend internalPlayerBackend;
|
||||
InternalPlayerBackend internalPlayerBackend{};
|
||||
QString audioPlaybackProgram;
|
||||
|
||||
ProxyServer proxyServer;
|
||||
|
@ -784,7 +725,7 @@ struct Program
|
|||
|
||||
using Programs = QList< Program >;
|
||||
|
||||
#ifndef NO_TTS_SUPPORT
|
||||
#ifdef TTS_SUPPORT
|
||||
struct VoiceEngine
|
||||
{
|
||||
bool enabled;
|
||||
|
@ -877,7 +818,7 @@ struct Class
|
|||
Lingua lingua;
|
||||
Forvo forvo;
|
||||
Programs programs;
|
||||
#ifndef NO_TTS_SUPPORT
|
||||
#ifdef TTS_SUPPORT
|
||||
VoiceEngines voiceEngines;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -4,11 +4,10 @@
|
|||
#include "aard.hh"
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "chunkedstorage.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "decompress.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "htmlescape.hh"
|
||||
|
||||
|
@ -16,19 +15,11 @@
|
|||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include <QDir>
|
||||
#include <QString>
|
||||
#include <QSemaphore>
|
||||
#include <QThreadPool>
|
||||
#include <QAtomicInt>
|
||||
#include <QDomDocument>
|
||||
#include <QtEndian>
|
||||
#include <QRegularExpression>
|
||||
#include "ufile.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "utils.hh"
|
||||
|
||||
namespace Aard {
|
||||
|
@ -38,7 +29,6 @@ using std::multimap;
|
|||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
|
||||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
|
@ -103,7 +93,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -225,11 +215,6 @@ public:
|
|||
|
||||
~AardDictionary();
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.articleCount;
|
||||
|
@ -250,8 +235,10 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
QString const & getDescription() override;
|
||||
|
||||
|
@ -287,19 +274,15 @@ private:
|
|||
|
||||
AardDictionary::AardDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
chunks( idx, idxHeader.chunksOffset ),
|
||||
df( dictionaryFiles[ 0 ], "rb" )
|
||||
df( dictionaryFiles[ 0 ], QIODevice::ReadOnly )
|
||||
{
|
||||
// Read dictionary name
|
||||
|
||||
idx.seek( sizeof( idxHeader ) );
|
||||
vector< char > dName( idx.read< quint32 >() );
|
||||
if ( dName.size() ) {
|
||||
idx.read( &dName.front(), dName.size() );
|
||||
dictionaryName = string( &dName.front(), dName.size() );
|
||||
}
|
||||
idx.readU32SizeAndData<>( dictionaryName );
|
||||
|
||||
// Initialize the index
|
||||
|
||||
|
@ -418,7 +401,7 @@ void AardDictionary::loadArticle( quint32 address, string & articleText, bool ra
|
|||
df.read( &articleBody.front(), articleSize );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "AARD: Failed loading article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "AARD: Failed loading article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
break;
|
||||
}
|
||||
catch ( ... ) {
|
||||
|
@ -578,14 +561,14 @@ void AardDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "Aard: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Aard: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Aard: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Aard: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( QString::fromStdString( ftsIdxName ) );
|
||||
}
|
||||
}
|
||||
|
@ -601,7 +584,7 @@ void AardDictionary::getArticleText( uint32_t articleAddress, QString & headword
|
|||
text = Html::unescape( QString::fromUtf8( articleText.data(), articleText.size() ) );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Aard: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Aard: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -619,8 +602,8 @@ AardDictionary::getSearchResults( QString const & searchString, int searchMode,
|
|||
|
||||
class AardArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
AardDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -629,8 +612,8 @@ class AardArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
AardArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
AardArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
AardDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -674,13 +657,13 @@ void AardArticleRequest::run()
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< quint32 > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -711,12 +694,12 @@ void AardArticleRequest::run()
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< std::u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
|
||||
|
@ -732,7 +715,7 @@ void AardArticleRequest::run()
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< std::u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
result += "<h3>";
|
||||
|
@ -755,9 +738,9 @@ void AardArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > AardDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > AardDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -792,17 +775,17 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
try {
|
||||
|
||||
gdDebug( "Aard: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
qDebug( "Aard: Building the index for dictionary: %s", fileName.c_str() );
|
||||
|
||||
{
|
||||
QFileInfo info( QString::fromUtf8( fileName.c_str() ) );
|
||||
if ( static_cast< quint64 >( info.size() ) > ULONG_MAX ) {
|
||||
gdWarning( "File %s is too large\n", fileName.c_str() );
|
||||
qWarning( "File %s is too large", fileName.c_str() );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
File::Index df( fileName, "rb" );
|
||||
File::Index df( fileName, QIODevice::ReadOnly );
|
||||
|
||||
AAR_header dictHeader;
|
||||
|
||||
|
@ -811,7 +794,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( strncmp( dictHeader.signature, "aard", 4 )
|
||||
|| ( !has64bitIndex && strncmp( dictHeader.indexItemFormat, ">LL", 4 ) )
|
||||
|| strncmp( dictHeader.keyLengthFormat, ">H", 2 ) || strncmp( dictHeader.articleLengthFormat, ">L", 2 ) ) {
|
||||
gdWarning( "File %s is not in supported aard format\n", fileName.c_str() );
|
||||
qWarning( "File %s is not in supported aard format", fileName.c_str() );
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -819,7 +802,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
quint32 size = qFromBigEndian( dictHeader.metaLength );
|
||||
|
||||
if ( size == 0 ) {
|
||||
gdWarning( "File %s has invalid metadata", fileName.c_str() );
|
||||
qWarning( "File %s has invalid metadata", fileName.c_str() );
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -833,7 +816,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
map< string, string > meta = parseMetaData( metaStr );
|
||||
|
||||
if ( meta.empty() ) {
|
||||
gdWarning( "File %s has invalid metadata", fileName.c_str() );
|
||||
qWarning( "File %s has invalid metadata", fileName.c_str() );
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -871,7 +854,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
initializing.indexingDictionary( dictName );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
IdxHeader idxHeader;
|
||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||
|
||||
|
@ -933,7 +916,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
|
||||
// Insert new entry
|
||||
wstring word = Utf8::decode( string( data.data(), wordSize ) );
|
||||
std::u32string word = Text::toUtf32( string( data.data(), wordSize ) );
|
||||
if ( maxHeadwordsToExpand && dictHeader.wordsCount >= maxHeadwordsToExpand ) {
|
||||
indexedWords.addSingleWord( word, articleOffset );
|
||||
}
|
||||
|
@ -987,11 +970,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
idx.write( &idxHeader, sizeof( idxHeader ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Aard dictionary indexing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Aard dictionary indexing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
continue;
|
||||
}
|
||||
catch ( ... ) {
|
||||
gdWarning( "Aard dictionary indexing failed\n" );
|
||||
qWarning( "Aard dictionary indexing failed" );
|
||||
continue;
|
||||
}
|
||||
} // if need to rebuild
|
||||
|
@ -999,7 +982,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< AardDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Aard dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Aard dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
134
src/dict/bgl.cc
134
src/dict/bgl.cc
|
@ -8,37 +8,28 @@
|
|||
#include "dictfile.hh"
|
||||
#include "folding.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "language.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "utils.hh"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include <QAtomicInt>
|
||||
#include <QCryptographicHash>
|
||||
#include <QDir>
|
||||
#include <QPainter>
|
||||
#include <QRegularExpression>
|
||||
#include <QSemaphore>
|
||||
#include <QThreadPool>
|
||||
|
||||
namespace Bgl {
|
||||
|
||||
using std::map;
|
||||
using std::multimap;
|
||||
using std::set;
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
using std::list;
|
||||
using std::pair;
|
||||
using std::string;
|
||||
|
@ -85,7 +76,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -118,7 +109,7 @@ void trimWs( string & word )
|
|||
if ( word.size() ) {
|
||||
unsigned begin = 0;
|
||||
|
||||
while ( begin < word.size() && Utf8::isspace( word[ begin ] ) ) {
|
||||
while ( begin < word.size() && Text::isspace( word[ begin ] ) ) {
|
||||
++begin;
|
||||
}
|
||||
|
||||
|
@ -130,7 +121,7 @@ void trimWs( string & word )
|
|||
|
||||
// Doesn't consist of ws entirely, so must end with just isspace()
|
||||
// condition.
|
||||
while ( Utf8::isspace( word[ end - 1 ] ) ) {
|
||||
while ( Text::isspace( word[ end - 1 ] ) ) {
|
||||
--end;
|
||||
}
|
||||
|
||||
|
@ -144,7 +135,7 @@ void trimWs( string & word )
|
|||
void addEntryToIndex( string & word,
|
||||
uint32_t articleOffset,
|
||||
IndexedWords & indexedWords,
|
||||
vector< wchar > & wcharBuffer )
|
||||
vector< char32_t > & wcharBuffer )
|
||||
{
|
||||
// Strip any leading or trailing whitespaces
|
||||
trimWs( word );
|
||||
|
@ -166,7 +157,7 @@ void addEntryToIndex( string & word,
|
|||
}
|
||||
|
||||
// Convert the word from utf8 to wide chars
|
||||
indexedWords.addWord( Utf8::decode( word ), articleOffset );
|
||||
indexedWords.addWord( Text::toUtf32( word ), articleOffset );
|
||||
}
|
||||
|
||||
class BglDictionary: public BtreeIndexing::BtreeDictionary
|
||||
|
@ -180,11 +171,6 @@ public:
|
|||
|
||||
BglDictionary( string const & id, string const & indexFile, string const & dictionaryFile );
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.articleCount;
|
||||
|
@ -205,10 +191,12 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
|
||||
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -250,7 +238,7 @@ private:
|
|||
|
||||
BglDictionary::BglDictionary( string const & id, string const & indexFile, string const & dictionaryFile ):
|
||||
BtreeDictionary( id, vector< string >( 1, dictionaryFile ) ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
chunks( idx, idxHeader.chunksOffset )
|
||||
{
|
||||
|
@ -258,15 +246,7 @@ BglDictionary::BglDictionary( string const & id, string const & indexFile, strin
|
|||
|
||||
// Read the dictionary's name
|
||||
|
||||
size_t len = idx.read< uint32_t >();
|
||||
|
||||
if ( len ) {
|
||||
vector< char > nameBuf( len );
|
||||
|
||||
idx.read( &nameBuf.front(), len );
|
||||
|
||||
dictionaryName = string( &nameBuf.front(), len );
|
||||
}
|
||||
idx.readU32SizeAndData<>( dictionaryName );
|
||||
|
||||
// Initialize the index
|
||||
|
||||
|
@ -407,7 +387,7 @@ void BglDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
|
||||
headword = QString::fromUtf8( headwordStr.data(), headwordStr.size() );
|
||||
|
||||
wstring wstr = Utf8::decode( articleStr );
|
||||
std::u32string wstr = Text::toUtf32( articleStr );
|
||||
|
||||
if ( getLangTo() == LangCoder::code2toInt( "he" ) ) {
|
||||
for ( char32_t & i : wstr ) {
|
||||
|
@ -424,7 +404,7 @@ void BglDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
text = Html::unescape( QString::fromStdU32String( wstr ) );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "BGL: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "BGL: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -440,14 +420,14 @@ void BglDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "Bgl: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Bgl: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Bgl: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Bgl: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( QString::fromStdString( ftsIdxName ) );
|
||||
}
|
||||
}
|
||||
|
@ -456,7 +436,7 @@ void BglDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
|
||||
class BglHeadwordsRequest: public Dictionary::WordSearchRequest
|
||||
{
|
||||
wstring str;
|
||||
std::u32string str;
|
||||
BglDictionary & dict;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
|
@ -464,7 +444,7 @@ class BglHeadwordsRequest: public Dictionary::WordSearchRequest
|
|||
|
||||
public:
|
||||
|
||||
BglHeadwordsRequest( wstring const & word_, BglDictionary & dict_ ):
|
||||
BglHeadwordsRequest( std::u32string const & word_, BglDictionary & dict_ ):
|
||||
str( word_ ),
|
||||
dict( dict_ )
|
||||
{
|
||||
|
@ -496,7 +476,7 @@ void BglHeadwordsRequest::run()
|
|||
|
||||
vector< WordArticleLink > chain = dict.findArticles( str );
|
||||
|
||||
wstring caseFolded = Folding::applySimpleCaseOnly( str );
|
||||
std::u32string caseFolded = Folding::applySimpleCaseOnly( str );
|
||||
|
||||
for ( auto & x : chain ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||
|
@ -508,11 +488,11 @@ void BglHeadwordsRequest::run()
|
|||
|
||||
dict.loadArticle( x.articleOffset, headword, displayedHeadword, articleText );
|
||||
|
||||
wstring headwordDecoded;
|
||||
std::u32string headwordDecoded;
|
||||
try {
|
||||
headwordDecoded = Utf8::decode( removePostfix( headword ) );
|
||||
headwordDecoded = Text::toUtf32( removePostfix( headword ) );
|
||||
}
|
||||
catch ( Utf8::exCantDecode & ) {
|
||||
catch ( Text::exCantDecode & ) {
|
||||
}
|
||||
|
||||
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) && !headwordDecoded.empty() ) {
|
||||
|
@ -527,7 +507,7 @@ void BglHeadwordsRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > BglDictionary::findHeadwordsForSynonym( wstring const & word )
|
||||
sptr< Dictionary::WordSearchRequest > BglDictionary::findHeadwordsForSynonym( std::u32string const & word )
|
||||
|
||||
{
|
||||
return synonymSearchEnabled ? std::make_shared< BglHeadwordsRequest >( word, *this ) :
|
||||
|
@ -567,8 +547,8 @@ string postfixToSuperscript( string const & in )
|
|||
|
||||
class BglArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
BglDictionary & dict;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
|
@ -577,8 +557,8 @@ class BglArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
BglArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
BglArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
BglDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -610,11 +590,11 @@ public:
|
|||
|
||||
void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - convert non-unicode to unicode
|
||||
{
|
||||
wstring hebWStr;
|
||||
std::u32string hebWStr;
|
||||
try {
|
||||
hebWStr = Utf8::decode( hebStr );
|
||||
hebWStr = Text::toUtf32( hebStr );
|
||||
}
|
||||
catch ( Utf8::exCantDecode & ) {
|
||||
catch ( Text::exCantDecode & ) {
|
||||
hebStr = "Utf-8 decoding error";
|
||||
return;
|
||||
}
|
||||
|
@ -628,7 +608,7 @@ void BglArticleRequest::fixHebString( string & hebStr ) // Hebrew support - conv
|
|||
i += 1488 - 224; // Convert to Hebrew unicode
|
||||
}
|
||||
}
|
||||
hebStr = Utf8::encode( hebWStr );
|
||||
hebStr = Text::toUtf8( hebWStr );
|
||||
}
|
||||
|
||||
void BglArticleRequest::fixHebArticle( string & hebArticle ) // Hebrew support - remove extra chars at the end
|
||||
|
@ -664,7 +644,7 @@ void BglArticleRequest::run()
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
|
@ -673,7 +653,7 @@ void BglArticleRequest::run()
|
|||
// the bodies to account for this.
|
||||
set< QByteArray > articleBodiesIncluded;
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -701,7 +681,7 @@ void BglArticleRequest::run()
|
|||
|
||||
// We do the case-folded and postfix-less comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( removePostfix( headword ) );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( removePostfix( headword ) );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
@ -724,7 +704,7 @@ void BglArticleRequest::run()
|
|||
continue; // Already had this body
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< std::u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( targetHeadword, articleText ) ) );
|
||||
|
@ -733,7 +713,7 @@ void BglArticleRequest::run()
|
|||
|
||||
} // try
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "BGL: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
|
||||
qWarning( "BGL: Failed loading article from \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -745,7 +725,7 @@ void BglArticleRequest::run()
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< std::u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
string cleaner = Utils::Html::getHtmlCleaner();
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
|
@ -822,9 +802,9 @@ void BglArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > BglDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > BglDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -899,8 +879,8 @@ void BglResourceRequest::run()
|
|||
break;
|
||||
}
|
||||
|
||||
vector< char > nameData( idx.read< uint32_t >() );
|
||||
idx.read( &nameData.front(), nameData.size() );
|
||||
vector< char > nameData;
|
||||
idx.readU32SizeAndData<>( nameData );
|
||||
|
||||
for ( size_t x = nameData.size(); x--; ) {
|
||||
nameData[ x ] = tolower( nameData[ x ] );
|
||||
|
@ -917,9 +897,9 @@ void BglResourceRequest::run()
|
|||
|
||||
data.resize( idx.read< uint32_t >() );
|
||||
|
||||
vector< unsigned char > compressedData( idx.read< uint32_t >() );
|
||||
vector< unsigned char > compressedData;
|
||||
|
||||
idx.read( &compressedData.front(), compressedData.size() );
|
||||
idx.readU32SizeAndData<>( compressedData );
|
||||
|
||||
unsigned long decompressedLength = data.size();
|
||||
|
||||
|
@ -929,7 +909,7 @@ void BglResourceRequest::run()
|
|||
compressedData.size() )
|
||||
!= Z_OK
|
||||
|| decompressedLength != data.size() ) {
|
||||
gdWarning( "Failed to decompress resource \"%s\", ignoring it.\n", name.c_str() );
|
||||
qWarning( "Failed to decompress resource \"%s\", ignoring it.", name.c_str() );
|
||||
}
|
||||
else {
|
||||
hasAnyData = true;
|
||||
|
@ -1007,14 +987,14 @@ protected:
|
|||
|
||||
void ResourceHandler::handleBabylonResource( string const & filename, char const * data, size_t size )
|
||||
{
|
||||
//GD_DPRINTF( "Handling resource file %s (%u bytes)\n", filename.c_str(), size );
|
||||
//qDebug( "Handling resource file %s (%u bytes)", filename.c_str(), size );
|
||||
|
||||
vector< unsigned char > compressedData( compressBound( size ) );
|
||||
|
||||
unsigned long compressedSize = compressedData.size();
|
||||
|
||||
if ( compress( &compressedData.front(), &compressedSize, (unsigned char const *)data, size ) != Z_OK ) {
|
||||
gdWarning( "Failed to compress the body of resource \"%s\", dropping it.\n", filename.c_str() );
|
||||
qWarning( "Failed to compress the body of resource \"%s\", dropping it.", filename.c_str() );
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1065,7 +1045,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
// Building the index
|
||||
|
||||
gdDebug( "Bgl: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
qDebug( "Bgl: Building the index for dictionary: %s", fileName.c_str() );
|
||||
|
||||
try {
|
||||
Babylon b( fileName );
|
||||
|
@ -1077,13 +1057,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
std::string sourceCharset, targetCharset;
|
||||
|
||||
if ( !b.read( sourceCharset, targetCharset ) ) {
|
||||
gdWarning( "Failed to start reading from %s, skipping it\n", fileName.c_str() );
|
||||
qWarning( "Failed to start reading from %s, skipping it", fileName.c_str() );
|
||||
continue;
|
||||
}
|
||||
|
||||
initializing.indexingDictionary( b.title() );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader;
|
||||
|
||||
|
@ -1105,7 +1085,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
IndexedWords indexedWords;
|
||||
|
||||
// We use this buffer to decode utf8 into it.
|
||||
vector< wchar > wcharBuffer;
|
||||
vector< char32_t > wcharBuffer;
|
||||
|
||||
ChunkedStorage::Writer chunks( idx );
|
||||
|
||||
|
@ -1169,7 +1149,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
idxHeader.chunksOffset = chunks.finish();
|
||||
|
||||
GD_DPRINTF( "Writing index...\n" );
|
||||
qDebug( "Writing index..." );
|
||||
|
||||
// Good. Now build the index
|
||||
|
||||
|
@ -1205,7 +1185,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
idx.write( &idxHeader, sizeof( idxHeader ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "BGL dictionary indexing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "BGL dictionary indexing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1213,7 +1193,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< BglDictionary >( dictId, indexFile, fileName ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "BGL dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "BGL dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
|
||||
#include "bgl_babylon.hh"
|
||||
#include "dictionary.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "globalregex.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "iconv.hh"
|
||||
|
@ -333,10 +332,10 @@ bool Babylon::read( const std::string & source_charset, const std::string & targ
|
|||
convertToUtf8( m_email, BGL_TARGET_CHARSET );
|
||||
convertToUtf8( m_copyright, BGL_TARGET_CHARSET );
|
||||
convertToUtf8( m_description, BGL_TARGET_CHARSET );
|
||||
GD_DPRINTF( "Default charset: %s\nSource Charset: %s\nTargetCharset: %s\n",
|
||||
m_defaultCharset.c_str(),
|
||||
m_sourceCharset.c_str(),
|
||||
m_targetCharset.c_str() );
|
||||
qDebug( "Default charset: %s\nSource Charset: %s\nTargetCharset: %s",
|
||||
m_defaultCharset.c_str(),
|
||||
m_sourceCharset.c_str(),
|
||||
m_targetCharset.c_str() );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -498,7 +497,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
unsigned length = (unsigned char)block.data[ pos ] - 0x3F;
|
||||
|
||||
if ( length > len - a - 2 ) {
|
||||
GD_FDPRINTF( stderr, "Hidden displayed headword is too large %s\n", headword.c_str() );
|
||||
qWarning( "Hidden displayed headword is too large %s", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
@ -511,7 +510,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
unsigned length = (unsigned char)block.data[ pos + 1 ];
|
||||
|
||||
if ( length > len - a - 2 ) {
|
||||
GD_FDPRINTF( stderr, "Displayed headword's length is too large for headword %s\n", headword.c_str() );
|
||||
qWarning( "Displayed headword's length is too large for headword %s", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
@ -525,7 +524,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
unsigned length = qFromBigEndian( *reinterpret_cast< quint16 * >( block.data + pos + 1 ) );
|
||||
|
||||
if ( length > len - a - 3 ) {
|
||||
GD_FDPRINTF( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() );
|
||||
qWarning( "2-byte sized displayed headword for %s is too large", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
@ -541,9 +540,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
unsigned length = (unsigned char)block.data[ pos + 2 ];
|
||||
|
||||
if ( length > len - a - 3 ) {
|
||||
GD_FDPRINTF( stderr,
|
||||
"1-byte-sized transcription's length is too large for headword %s\n",
|
||||
headword.c_str() );
|
||||
qWarning( "1-byte-sized transcription's length is too large for headword %s\n", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
@ -553,7 +550,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
transcription = Iconv::toUtf8( "Windows-1252", block.data + pos + 3, length );
|
||||
}
|
||||
catch ( Iconv::Ex & e ) {
|
||||
qWarning( "Bgl: charset conversion error, no trancription processing's done: %s\n", e.what() );
|
||||
qWarning( "Bgl: charset conversion error, no trancription processing's done: %s", e.what() );
|
||||
transcription = std::string( block.data + pos + 3, length );
|
||||
}
|
||||
}
|
||||
|
@ -570,9 +567,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
unsigned length = qFromBigEndian( *reinterpret_cast< quint16 * >( block.data + pos + 2 ) );
|
||||
|
||||
if ( length > len - a - 4 ) {
|
||||
GD_FDPRINTF( stderr,
|
||||
"2-byte-sized transcription's length is too large for headword %s\n",
|
||||
headword.c_str() );
|
||||
qWarning( "2-byte-sized transcription's length is too large for headword %s\n", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
@ -582,7 +577,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
transcription = Iconv::toUtf8( "Windows-1252", block.data + pos + 4, length );
|
||||
}
|
||||
catch ( Iconv::Ex & e ) {
|
||||
qWarning( "Bgl: charset conversion error, no transcription processing's done: %s\n", e.what() );
|
||||
qWarning( "Bgl: charset conversion error, no transcription processing's done: %s", e.what() );
|
||||
transcription = std::string( block.data + pos + 4, length );
|
||||
}
|
||||
}
|
||||
|
@ -600,7 +595,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
unsigned length = (unsigned char)block.data[ pos ] - 0x3F;
|
||||
|
||||
if ( length > len - a - 2 ) {
|
||||
GD_FDPRINTF( stderr, "Hidden transcription is too large %s\n", headword.c_str() );
|
||||
qWarning( "Hidden transcription is too large %s", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -3,28 +3,21 @@
|
|||
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include <QRunnable>
|
||||
#include <QThreadPool>
|
||||
#include <QSemaphore>
|
||||
#include "text.hh"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "gddebug.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "utils.hh"
|
||||
|
||||
#include <QRegularExpression>
|
||||
#include "wildcard.hh"
|
||||
#include "globalbroadcaster.hh"
|
||||
|
||||
#include <QtConcurrent>
|
||||
#include <QtConcurrentRun>
|
||||
#include <zlib.h>
|
||||
|
||||
namespace BtreeIndexing {
|
||||
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
using std::pair;
|
||||
|
||||
enum {
|
||||
|
@ -63,14 +56,14 @@ void BtreeIndex::openIndex( IndexInfo const & indexInfo, File::Index & file, QMu
|
|||
}
|
||||
|
||||
vector< WordArticleLink >
|
||||
BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount )
|
||||
BtreeIndex::findArticles( std::u32string const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount )
|
||||
{
|
||||
//First trim ending zero
|
||||
wstring word = gd::removeTrailingZero( search_word );
|
||||
std::u32string word = Text::removeTrailingZero( search_word );
|
||||
vector< WordArticleLink > result;
|
||||
|
||||
try {
|
||||
wstring folded = Folding::apply( word );
|
||||
std::u32string folded = Folding::apply( word );
|
||||
if ( folded.empty() ) {
|
||||
folded = Folding::applyWhitespaceOnly( word );
|
||||
}
|
||||
|
@ -91,11 +84,11 @@ BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, ui
|
|||
}
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Articles searching failed, error: %s\n", e.what() );
|
||||
qWarning( "Articles searching failed, error: %s", e.what() );
|
||||
result.clear();
|
||||
}
|
||||
catch ( ... ) {
|
||||
qWarning( "Articles searching failed\n" );
|
||||
qWarning( "Articles searching failed" );
|
||||
result.clear();
|
||||
}
|
||||
|
||||
|
@ -104,7 +97,7 @@ BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, ui
|
|||
|
||||
|
||||
BtreeWordSearchRequest::BtreeWordSearchRequest( BtreeDictionary & dict_,
|
||||
wstring const & str_,
|
||||
std::u32string const & str_,
|
||||
unsigned minLength_,
|
||||
int maxSuffixVariation_,
|
||||
bool allowMiddleMatches_,
|
||||
|
@ -141,11 +134,11 @@ void BtreeWordSearchRequest::findMatches()
|
|||
|
||||
bool useWildcards = false;
|
||||
if ( allowMiddleMatches ) {
|
||||
useWildcards = ( str.find( '*' ) != wstring::npos || str.find( '?' ) != wstring::npos
|
||||
|| str.find( '[' ) != wstring::npos || str.find( ']' ) != wstring::npos );
|
||||
useWildcards = ( str.find( '*' ) != std::u32string::npos || str.find( '?' ) != std::u32string::npos
|
||||
|| str.find( '[' ) != std::u32string::npos || str.find( ']' ) != std::u32string::npos );
|
||||
}
|
||||
|
||||
wstring folded = Folding::apply( str );
|
||||
std::u32string folded = Folding::apply( str );
|
||||
|
||||
int minMatchLength = 0;
|
||||
|
||||
|
@ -158,7 +151,7 @@ void BtreeWordSearchRequest::findMatches()
|
|||
regexp.setPatternOptions( QRegularExpression::CaseInsensitiveOption );
|
||||
|
||||
bool bNoLetters = folded.empty();
|
||||
wstring foldedWithWildcards;
|
||||
std::u32string foldedWithWildcards;
|
||||
|
||||
if ( bNoLetters ) {
|
||||
foldedWithWildcards = Folding::applyWhitespaceOnly( str );
|
||||
|
@ -268,13 +261,13 @@ void BtreeWordSearchRequest::findMatches()
|
|||
break;
|
||||
}
|
||||
|
||||
//GD_DPRINTF( "offset = %u, size = %u\n", chainOffset - &leaf.front(), leaf.size() );
|
||||
//qDebug( "offset = %u, size = %u", chainOffset - &leaf.front(), leaf.size() );
|
||||
|
||||
vector< WordArticleLink > chain = dict.readChain( chainOffset );
|
||||
|
||||
wstring chainHead = Utf8::decode( chain[ 0 ].word );
|
||||
std::u32string chainHead = Text::toUtf32( chain[ 0 ].word );
|
||||
|
||||
wstring resultFolded = Folding::apply( chainHead );
|
||||
std::u32string resultFolded = Folding::apply( chainHead );
|
||||
if ( resultFolded.empty() ) {
|
||||
resultFolded = Folding::applyWhitespaceOnly( chainHead );
|
||||
}
|
||||
|
@ -290,9 +283,9 @@ void BtreeWordSearchRequest::findMatches()
|
|||
break;
|
||||
}
|
||||
if ( useWildcards ) {
|
||||
wstring word = Utf8::decode( x.prefix + x.word );
|
||||
wstring result = Folding::applyDiacriticsOnly( word );
|
||||
if ( result.size() >= (wstring::size_type)minMatchLength ) {
|
||||
std::u32string word = Text::toUtf32( x.prefix + x.word );
|
||||
std::u32string result = Folding::applyDiacriticsOnly( word );
|
||||
if ( result.size() >= (std::u32string::size_type)minMatchLength ) {
|
||||
QRegularExpressionMatch match = regexp.match( QString::fromStdU32String( result ) );
|
||||
if ( match.hasMatch() && match.capturedStart() == 0 ) {
|
||||
addMatch( word );
|
||||
|
@ -302,10 +295,10 @@ void BtreeWordSearchRequest::findMatches()
|
|||
else {
|
||||
// Skip middle matches, if requested. If suffix variation is specified,
|
||||
// make sure the string isn't larger than requested.
|
||||
if ( ( allowMiddleMatches || Folding::apply( Utf8::decode( x.prefix ) ).empty() )
|
||||
if ( ( allowMiddleMatches || Folding::apply( Text::toUtf32( x.prefix ) ).empty() )
|
||||
&& ( maxSuffixVariation < 0
|
||||
|| (int)resultFolded.size() - initialFoldedSize <= maxSuffixVariation ) ) {
|
||||
addMatch( Utf8::decode( x.prefix + x.word ) );
|
||||
addMatch( Text::toUtf32( x.prefix + x.word ) );
|
||||
}
|
||||
}
|
||||
if ( matches.size() >= maxResults ) {
|
||||
|
@ -331,7 +324,7 @@ void BtreeWordSearchRequest::findMatches()
|
|||
if ( chainOffset >= leafEnd ) {
|
||||
// We're past the current leaf, fetch the next one
|
||||
|
||||
//GD_DPRINTF( "advancing\n" );
|
||||
//qDebug( "advancing" );
|
||||
|
||||
if ( nextLeaf ) {
|
||||
QMutexLocker _( dict.idxFileMutex );
|
||||
|
@ -345,7 +338,7 @@ void BtreeWordSearchRequest::findMatches()
|
|||
uint32_t leafEntries = *(uint32_t *)&leaf.front();
|
||||
|
||||
if ( leafEntries == 0xffffFFFF ) {
|
||||
//GD_DPRINTF( "bah!\n" );
|
||||
//qDebug( "bah!" );
|
||||
exit( 1 );
|
||||
}
|
||||
}
|
||||
|
@ -366,10 +359,10 @@ void BtreeWordSearchRequest::findMatches()
|
|||
}
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
qWarning( "Index searching failed: \"%s\", error: %s\n", dict.getName().c_str(), e.what() );
|
||||
qWarning( "Index searching failed: \"%s\", error: %s", dict.getName().c_str(), e.what() );
|
||||
}
|
||||
catch ( ... ) {
|
||||
gdWarning( "Index searching failed: \"%s\"\n", dict.getName().c_str() );
|
||||
qWarning( "Index searching failed: \"%s\"", dict.getName().c_str() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -397,13 +390,14 @@ BtreeWordSearchRequest::~BtreeWordSearchRequest()
|
|||
f.waitForFinished();
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch( wstring const & str, unsigned long maxResults )
|
||||
sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch( std::u32string const & str,
|
||||
unsigned long maxResults )
|
||||
|
||||
{
|
||||
return std::make_shared< BtreeWordSearchRequest >( *this, str, 0, -1, true, maxResults );
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch( wstring const & str,
|
||||
sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch( std::u32string const & str,
|
||||
unsigned minLength,
|
||||
unsigned maxSuffixVariation,
|
||||
unsigned long maxResults )
|
||||
|
@ -424,7 +418,7 @@ void BtreeIndex::readNode( uint32_t offset, vector< char > & out )
|
|||
uint32_t uncompressedSize = idxFile->read< uint32_t >();
|
||||
uint32_t compressedSize = idxFile->read< uint32_t >();
|
||||
|
||||
//GD_DPRINTF( "%x,%x\n", uncompressedSize, compressedSize );
|
||||
//qDebug( "%x,%x", uncompressedSize, compressedSize );
|
||||
|
||||
out.resize( uncompressedSize );
|
||||
|
||||
|
@ -441,8 +435,11 @@ void BtreeIndex::readNode( uint32_t offset, vector< char > & out )
|
|||
}
|
||||
}
|
||||
|
||||
char const * BtreeIndex::findChainOffsetExactOrPrefix(
|
||||
wstring const & target, bool & exactMatch, vector< char > & extLeaf, uint32_t & nextLeaf, char const *& leafEnd )
|
||||
char const * BtreeIndex::findChainOffsetExactOrPrefix( std::u32string const & target,
|
||||
bool & exactMatch,
|
||||
vector< char > & extLeaf,
|
||||
uint32_t & nextLeaf,
|
||||
char const *& leafEnd )
|
||||
{
|
||||
if ( !idxFile ) {
|
||||
throw exIndexWasNotOpened();
|
||||
|
@ -453,7 +450,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
|
|||
// Lookup the index by traversing the index btree
|
||||
|
||||
// vector< wchar > wcharBuffer;
|
||||
wstring w_word;
|
||||
std::u32string w_word;
|
||||
exactMatch = false;
|
||||
|
||||
// Read a node
|
||||
|
@ -505,7 +502,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
|
|||
if ( leafEntries == 0xffffFFFF ) {
|
||||
// A node
|
||||
|
||||
//GD_DPRINTF( "=>a node\n" );
|
||||
//qDebug( "=>a node" );
|
||||
|
||||
uint32_t const * offsets = (uint32_t *)leaf + 1;
|
||||
|
||||
|
@ -534,7 +531,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
|
|||
|
||||
size_t wordSize = strlen( closestString );
|
||||
|
||||
w_word = Utf8::decode( string( closestString, wordSize ) );
|
||||
w_word = Text::toUtf32( string( closestString, wordSize ) );
|
||||
|
||||
compareResult = target.compare( w_word );
|
||||
|
||||
|
@ -591,13 +588,13 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
|
|||
currentNodeOffset = offsets[ entry + 1 ];
|
||||
}
|
||||
|
||||
//GD_DPRINTF( "reading node at %x\n", currentNodeOffset );
|
||||
//qDebug( "reading node at %x", currentNodeOffset );
|
||||
readNode( currentNodeOffset, extLeaf );
|
||||
leaf = &extLeaf.front();
|
||||
leafEnd = leaf + extLeaf.size();
|
||||
}
|
||||
else {
|
||||
//GD_DPRINTF( "=>a leaf\n" );
|
||||
//qDebug( "=>a leaf" );
|
||||
// A leaf
|
||||
|
||||
// If this leaf is the root, there's no next leaf, it just can't be.
|
||||
|
@ -630,7 +627,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
|
|||
|
||||
memcpy( &chainSize, ptr, sizeof( uint32_t ) );
|
||||
|
||||
//GD_DPRINTF( "%s + %s\n", ptr + sizeof( uint32_t ), ptr + sizeof( uint32_t ) + strlen( ptr + sizeof( uint32_t ) ) + 1 );
|
||||
//qDebug( "%s + %s", ptr + sizeof( uint32_t ), ptr + sizeof( uint32_t ) + strlen( ptr + sizeof( uint32_t ) ) + 1 );
|
||||
|
||||
ptr += sizeof( uint32_t ) + chainSize;
|
||||
}
|
||||
|
@ -643,7 +640,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
|
|||
unsigned windowSize = chainOffsets.size();
|
||||
|
||||
for ( ;; ) {
|
||||
//GD_DPRINTF( "window = %u, ws = %u\n", window - &chainOffsets.front(), windowSize );
|
||||
//qDebug( "window = %u, ws = %u", window - &chainOffsets.front(), windowSize );
|
||||
|
||||
char const ** chainToCheck = window + windowSize / 2;
|
||||
ptr = *chainToCheck;
|
||||
|
@ -653,9 +650,9 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix(
|
|||
|
||||
size_t wordSize = strlen( ptr );
|
||||
|
||||
w_word = Utf8::decode( string( ptr, wordSize ) );
|
||||
w_word = Text::toUtf32( string( ptr, wordSize ) );
|
||||
|
||||
wstring foldedWord = Folding::apply( w_word );
|
||||
std::u32string foldedWord = Folding::apply( w_word );
|
||||
if ( foldedWord.empty() ) {
|
||||
foldedWord = Folding::applyWhitespaceOnly( w_word );
|
||||
}
|
||||
|
@ -754,9 +751,9 @@ vector< WordArticleLink > BtreeIndex::readChain( char const *& ptr, uint32_t max
|
|||
return result;
|
||||
}
|
||||
|
||||
void BtreeIndex::antialias( wstring const & str, vector< WordArticleLink > & chain, bool ignoreDiacritics )
|
||||
void BtreeIndex::antialias( std::u32string const & str, vector< WordArticleLink > & chain, bool ignoreDiacritics )
|
||||
{
|
||||
wstring caseFolded = Folding::applySimpleCaseOnly( gd::normalize( str ) );
|
||||
std::u32string caseFolded = Folding::applySimpleCaseOnly( Text::normalize( str ) );
|
||||
if ( ignoreDiacritics ) {
|
||||
caseFolded = Folding::applyDiacriticsOnly( caseFolded );
|
||||
}
|
||||
|
@ -768,8 +765,8 @@ void BtreeIndex::antialias( wstring const & str, vector< WordArticleLink > & cha
|
|||
for ( unsigned x = chain.size(); x--; ) {
|
||||
// If after applying case folding to each word they wouldn't match, we
|
||||
// drop the entry.
|
||||
wstring entry =
|
||||
Folding::applySimpleCaseOnly( gd::normalize( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ) );
|
||||
std::u32string entry =
|
||||
Folding::applySimpleCaseOnly( Text::normalize( Text::toUtf32( chain[ x ].prefix + chain[ x ].word ) ) );
|
||||
if ( ignoreDiacritics ) {
|
||||
entry = Folding::applyDiacriticsOnly( entry );
|
||||
}
|
||||
|
@ -927,9 +924,9 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex,
|
|||
return offset;
|
||||
}
|
||||
|
||||
void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset, unsigned int maxHeadwordSize )
|
||||
void IndexedWords::addWord( std::u32string const & index_word, uint32_t articleOffset, unsigned int maxHeadwordSize )
|
||||
{
|
||||
wstring word = gd::removeTrailingZero( index_word );
|
||||
std::u32string word = Text::removeTrailingZero( index_word );
|
||||
string::size_type wordSize = word.size();
|
||||
|
||||
// Safeguard us against various bugs here. Don't attempt adding words
|
||||
|
@ -949,7 +946,7 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
|
|||
|
||||
wordSize = word.size();
|
||||
}
|
||||
wchar const * wordBegin = word.c_str();
|
||||
char32_t const * wordBegin = word.c_str();
|
||||
|
||||
// Skip any leading whitespace
|
||||
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
|
||||
|
@ -962,7 +959,7 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
|
|||
--wordSize;
|
||||
}
|
||||
|
||||
wchar const * nextChar = wordBegin;
|
||||
char32_t const * nextChar = wordBegin;
|
||||
|
||||
vector< char > utfBuffer( wordSize * 4 );
|
||||
|
||||
|
@ -974,11 +971,11 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
|
|||
if ( !*nextChar ) // End of string ends everything
|
||||
{
|
||||
if ( wordsAdded == 0 ) {
|
||||
wstring folded = Folding::applyWhitespaceOnly( wstring( wordBegin, wordSize ) );
|
||||
std::u32string folded = Folding::applyWhitespaceOnly( std::u32string( wordBegin, wordSize ) );
|
||||
if ( !folded.empty() ) {
|
||||
auto i = insert( { Utf8::encode( folded ), vector< WordArticleLink >() } ).first;
|
||||
auto i = insert( { Text::toUtf8( folded ), vector< WordArticleLink >() } ).first;
|
||||
|
||||
string utfWord = Utf8::encode( wstring( wordBegin, wordSize ) );
|
||||
string utfWord = Text::toUtf8( std::u32string( wordBegin, wordSize ) );
|
||||
string utfPrefix;
|
||||
i->second.emplace_back( utfWord, articleOffset, utfPrefix );
|
||||
}
|
||||
|
@ -992,15 +989,15 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
|
|||
}
|
||||
|
||||
// Insert this word
|
||||
wstring folded = Folding::apply( nextChar );
|
||||
auto name = Utf8::encode( folded );
|
||||
std::u32string folded = Folding::apply( nextChar );
|
||||
auto name = Text::toUtf8( folded );
|
||||
|
||||
auto i = insert( { std::move( name ), vector< WordArticleLink >() } ).first;
|
||||
|
||||
if ( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches
|
||||
{
|
||||
string utfWord = Utf8::encode( wstring( nextChar, wordSize - ( nextChar - wordBegin ) ) );
|
||||
string utfPrefix = Utf8::encode( wstring( wordBegin, nextChar - wordBegin ) );
|
||||
string utfWord = Text::toUtf8( std::u32string( nextChar, wordSize - ( nextChar - wordBegin ) ) );
|
||||
string utfPrefix = Text::toUtf8( std::u32string( wordBegin, nextChar - wordBegin ) );
|
||||
|
||||
i->second.emplace_back( std::move( utfWord ), articleOffset, std::move( utfPrefix ) );
|
||||
// reduce the vector reallocation.
|
||||
|
@ -1024,14 +1021,14 @@ void IndexedWords::addWord( wstring const & index_word, uint32_t articleOffset,
|
|||
}
|
||||
}
|
||||
|
||||
void IndexedWords::addSingleWord( wstring const & index_word, uint32_t articleOffset )
|
||||
void IndexedWords::addSingleWord( std::u32string const & index_word, uint32_t articleOffset )
|
||||
{
|
||||
wstring const & word = gd::removeTrailingZero( index_word );
|
||||
wstring folded = Folding::apply( word );
|
||||
std::u32string const & word = Text::removeTrailingZero( index_word );
|
||||
std::u32string folded = Folding::apply( word );
|
||||
if ( folded.empty() ) {
|
||||
folded = Folding::applyWhitespaceOnly( word );
|
||||
}
|
||||
operator[]( Utf8::encode( folded ) ).emplace_back( Utf8::encode( word ), articleOffset );
|
||||
operator[]( Text::toUtf8( folded ) ).emplace_back( Text::toUtf8( word ), articleOffset );
|
||||
}
|
||||
|
||||
IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file )
|
||||
|
@ -1059,7 +1056,7 @@ IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file )
|
|||
btreeMaxElements = BtreeMaxElements;
|
||||
}
|
||||
|
||||
GD_DPRINTF( "Building a tree of %u elements\n", (unsigned)btreeMaxElements );
|
||||
qDebug( "Building a tree of %u elements", (unsigned)btreeMaxElements );
|
||||
|
||||
|
||||
uint32_t lastLeafOffset = 0;
|
||||
|
@ -1419,7 +1416,7 @@ bool BtreeDictionary::getHeadwords( QStringList & headwords )
|
|||
}
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Failed headwords retrieving for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Failed headwords retrieving for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
|
||||
return headwords.size() > 0;
|
||||
|
|
|
@ -5,17 +5,12 @@
|
|||
|
||||
#include "dict/dictionary.hh"
|
||||
#include "dictfile.hh"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <QFuture>
|
||||
#include <QList>
|
||||
#include <QSet>
|
||||
#include <QList>
|
||||
|
||||
|
||||
/// A base for the dictionary which creates a btree index to look up
|
||||
|
@ -23,7 +18,6 @@
|
|||
namespace BtreeIndexing {
|
||||
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using std::vector;
|
||||
using std::map;
|
||||
|
||||
|
@ -85,7 +79,8 @@ public:
|
|||
|
||||
/// Finds articles that match the given string. A case-insensitive search
|
||||
/// is performed.
|
||||
vector< WordArticleLink > findArticles( wstring const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 );
|
||||
vector< WordArticleLink >
|
||||
findArticles( std::u32string const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 );
|
||||
|
||||
/// Find all unique article links in the index
|
||||
void findAllArticleLinks( QList< WordArticleLink > & articleLinks );
|
||||
|
@ -121,8 +116,11 @@ protected:
|
|||
/// case, the returned pointer wouldn't belong to 'leaf' at all. To that end,
|
||||
/// the leafEnd pointer always holds the pointer to the first byte outside
|
||||
/// the node data.
|
||||
char const * findChainOffsetExactOrPrefix(
|
||||
wstring const & target, bool & exactMatch, vector< char > & leaf, uint32_t & nextLeaf, char const *& leafEnd );
|
||||
char const * findChainOffsetExactOrPrefix( std::u32string const & target,
|
||||
bool & exactMatch,
|
||||
vector< char > & leaf,
|
||||
uint32_t & nextLeaf,
|
||||
char const *& leafEnd );
|
||||
|
||||
/// Reads a node or leaf at the given offset. Just uncompresses its data
|
||||
/// to the given vector and does nothing more.
|
||||
|
@ -134,7 +132,7 @@ protected:
|
|||
|
||||
/// Drops any aliases which arose due to folding. Only case-folded aliases
|
||||
/// are left.
|
||||
void antialias( wstring const &, vector< WordArticleLink > &, bool ignoreDiactitics );
|
||||
void antialias( std::u32string const &, vector< WordArticleLink > &, bool ignoreDiactitics );
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -166,10 +164,10 @@ public:
|
|||
|
||||
/// This function does the search using the btree index. Derivatives usually
|
||||
/// need not to implement this function.
|
||||
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long );
|
||||
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( std::u32string const &, unsigned long );
|
||||
|
||||
virtual sptr< Dictionary::WordSearchRequest >
|
||||
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
|
||||
stemmedMatch( std::u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
|
||||
|
||||
virtual bool isLocalDictionary()
|
||||
{
|
||||
|
@ -215,7 +213,7 @@ class BtreeWordSearchRequest: public Dictionary::WordSearchRequest
|
|||
{
|
||||
protected:
|
||||
BtreeDictionary & dict;
|
||||
wstring str;
|
||||
std::u32string str;
|
||||
unsigned long maxResults;
|
||||
unsigned minLength;
|
||||
int maxSuffixVariation;
|
||||
|
@ -226,7 +224,7 @@ protected:
|
|||
public:
|
||||
|
||||
BtreeWordSearchRequest( BtreeDictionary & dict_,
|
||||
wstring const & str_,
|
||||
std::u32string const & str_,
|
||||
unsigned minLength_,
|
||||
int maxSuffixVariation_,
|
||||
bool allowMiddleMatches_,
|
||||
|
@ -256,11 +254,11 @@ struct IndexedWords: public map< string, vector< WordArticleLink > >
|
|||
/// Instead of adding to the map directly, use this function. It does folding
|
||||
/// itself, and for phrases/sentences it adds additional entries beginning with
|
||||
/// each new word.
|
||||
void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
|
||||
void addWord( std::u32string const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
|
||||
|
||||
/// Differs from addWord() in that it only adds a single entry. We use this
|
||||
/// for zip's file names.
|
||||
void addSingleWord( wstring const & word, uint32_t articleOffset );
|
||||
void addSingleWord( std::u32string const & word, uint32_t articleOffset );
|
||||
};
|
||||
|
||||
/// Builds the index, as a compressed btree. Returns IndexInfo.
|
||||
|
|
|
@ -4,10 +4,9 @@
|
|||
#include "dictdfiles.hh"
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "dictzip.hh"
|
||||
#include "htmlescape.hh"
|
||||
|
||||
#include "langcoder.hh"
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
@ -16,17 +15,13 @@
|
|||
#include <list>
|
||||
#include <wctype.h>
|
||||
#include <stdlib.h>
|
||||
#include "gddebug.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include <QDir>
|
||||
#include <QUrl>
|
||||
|
||||
|
||||
#include <QRegularExpression>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
namespace DictdFiles {
|
||||
|
||||
using std::map;
|
||||
|
@ -34,7 +29,6 @@ using std::multimap;
|
|||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using std::vector;
|
||||
using std::list;
|
||||
|
||||
|
@ -74,7 +68,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -96,16 +90,6 @@ public:
|
|||
|
||||
~DictdDictionary();
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.articleCount;
|
||||
|
@ -128,8 +112,10 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
QString const & getDescription() override;
|
||||
|
||||
|
@ -155,19 +141,15 @@ DictdDictionary::DictdDictionary( string const & id,
|
|||
string const & indexFile,
|
||||
vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
indexFile( dictionaryFiles[ 0 ], "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
indexFile( dictionaryFiles[ 0 ], QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() )
|
||||
{
|
||||
|
||||
// Read the dictionary name
|
||||
idx.seek( sizeof( idxHeader ) );
|
||||
|
||||
vector< char > dName( idx.read< uint32_t >() );
|
||||
if ( dName.size() > 0 ) {
|
||||
idx.read( &dName.front(), dName.size() );
|
||||
dictionaryName = string( &dName.front(), dName.size() );
|
||||
}
|
||||
idx.readU32SizeAndData<>( dictionaryName );
|
||||
|
||||
// Open the .dict file
|
||||
|
||||
|
@ -253,9 +235,9 @@ uint32_t decodeBase64( string const & str )
|
|||
return number;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > DictdDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -270,13 +252,13 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, string > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, string > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -396,12 +378,12 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, string > & mapToUse =
|
||||
multimap< std::u32string, string > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( x.word ), articleText ) );
|
||||
|
@ -415,7 +397,7 @@ sptr< Dictionary::DataRequest > DictdDictionary::getArticle( wstring const & wor
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, string >::const_iterator i;
|
||||
multimap< std::u32string, string >::const_iterator i;
|
||||
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
result += i->second;
|
||||
|
@ -441,7 +423,8 @@ QString const & DictdDictionary::getDescription()
|
|||
return dictionaryDescription;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > req = getArticle( U"00databaseinfo", vector< wstring >(), wstring(), false );
|
||||
sptr< Dictionary::DataRequest > req =
|
||||
getArticle( U"00databaseinfo", vector< std::u32string >(), std::u32string(), false );
|
||||
|
||||
if ( req->dataSize() > 0 ) {
|
||||
dictionaryDescription = QString::fromUtf8( req->getFullData().data(), req->getFullData().size() );
|
||||
|
@ -469,14 +452,14 @@ void DictdDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "DictD: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "DictD: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "DictD: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "DictD: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( QString::fromStdString( ftsIdxName ) );
|
||||
}
|
||||
}
|
||||
|
@ -550,7 +533,7 @@ void DictdDictionary::getArticleText( uint32_t articleAddress, QString & headwor
|
|||
}
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "DictD: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "DictD: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -602,11 +585,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// Building the index
|
||||
string dictionaryName = nameFromFileName( dictFiles[ 0 ] );
|
||||
|
||||
gdDebug( "DictD: Building the index for dictionary: %s\n", dictionaryName.c_str() );
|
||||
qDebug( "DictD: Building the index for dictionary: %s", dictionaryName.c_str() );
|
||||
|
||||
initializing.indexingDictionary( dictionaryName );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader;
|
||||
|
||||
|
@ -619,7 +602,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
IndexedWords indexedWords;
|
||||
|
||||
File::Index indexFile( dictFiles[ 0 ], "rb" );
|
||||
File::Index indexFile( dictFiles[ 0 ], QIODevice::ReadOnly );
|
||||
|
||||
// Read words from index until none's left.
|
||||
|
||||
|
@ -641,17 +624,17 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( tab3 ) {
|
||||
char * tab4 = strchr( tab3 + 1, '\t' );
|
||||
if ( tab4 ) {
|
||||
GD_DPRINTF( "Warning: too many tabs present, skipping: %s\n", buf );
|
||||
qDebug( "Warning: too many tabs present, skipping: %s", buf );
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle the forth entry, if it exists. From dictfmt man:
|
||||
// When --index-keep-orig option is used fourth column is created
|
||||
// (if necessary) in .index file.
|
||||
indexedWords.addWord( Utf8::decode( string( tab3 + 1, strlen( tab3 + 1 ) ) ), curOffset );
|
||||
indexedWords.addWord( Text::toUtf32( string( tab3 + 1, strlen( tab3 + 1 ) ) ), curOffset );
|
||||
++idxHeader.wordCount;
|
||||
}
|
||||
indexedWords.addWord( Utf8::decode( string( buf, strchr( buf, '\t' ) - buf ) ), curOffset );
|
||||
indexedWords.addWord( Text::toUtf32( string( buf, strchr( buf, '\t' ) - buf ) ), curOffset );
|
||||
++idxHeader.wordCount;
|
||||
++idxHeader.articleCount;
|
||||
|
||||
|
@ -676,7 +659,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
eol = articleBody; // No headword itself
|
||||
}
|
||||
if ( eol ) {
|
||||
while ( *eol && Utf8::isspace( *eol ) ) {
|
||||
while ( *eol && Text::isspace( *eol ) ) {
|
||||
++eol; // skip spaces
|
||||
}
|
||||
|
||||
|
@ -686,7 +669,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
*endEol = 0;
|
||||
}
|
||||
|
||||
GD_DPRINTF( "DICT NAME: '%s'\n", eol );
|
||||
qDebug( "DICT NAME: '%s'", eol );
|
||||
dictionaryName = eol;
|
||||
}
|
||||
}
|
||||
|
@ -698,12 +681,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
}
|
||||
else {
|
||||
GD_DPRINTF( "Warning: only a single tab present, skipping: %s\n", buf );
|
||||
qDebug( "Warning: only a single tab present, skipping: %s", buf );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else {
|
||||
GD_DPRINTF( "Warning: no tabs present, skipping: %s\n", buf );
|
||||
qDebug( "Warning: no tabs present, skipping: %s", buf );
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -747,7 +730,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< DictdDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Dictd dictionary \"%s\" reading failed, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Dictd dictionary \"%s\" reading failed, error: %s", fileName.c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -177,7 +177,7 @@ void Class::deferredInit()
|
|||
//base method.
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/,
|
||||
sptr< WordSearchRequest > Class::stemmedMatch( std::u32string const & /*str*/,
|
||||
unsigned /*minLength*/,
|
||||
unsigned /*maxSuffixVariation*/,
|
||||
unsigned long /*maxResults*/ )
|
||||
|
@ -185,12 +185,12 @@ sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/,
|
|||
return std::make_shared< WordSearchRequestInstant >();
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > Class::findHeadwordsForSynonym( wstring const & )
|
||||
sptr< WordSearchRequest > Class::findHeadwordsForSynonym( std::u32string const & )
|
||||
{
|
||||
return std::make_shared< WordSearchRequestInstant >();
|
||||
}
|
||||
|
||||
vector< wstring > Class::getAlternateWritings( wstring const & ) noexcept
|
||||
vector< std::u32string > Class::getAlternateWritings( std::u32string const & ) noexcept
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "langcoder.hh"
|
||||
#include "sptr.hh"
|
||||
#include "utils.hh"
|
||||
#include "wstring.hh"
|
||||
#include "text.hh"
|
||||
#include <QtGlobal>
|
||||
|
||||
/// Abstract dictionary-related stuff
|
||||
|
@ -27,16 +27,8 @@ namespace Dictionary {
|
|||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using std::map;
|
||||
|
||||
enum Property {
|
||||
Author,
|
||||
Copyright,
|
||||
Description,
|
||||
Email
|
||||
};
|
||||
|
||||
DEF_EX( Ex, "Dictionary error", std::exception )
|
||||
DEF_EX( exIndexOutOfRange, "The supplied index is out of range", Ex )
|
||||
DEF_EX( exSliceOutOfRange, "The requested data slice is out of range", Ex )
|
||||
|
@ -131,19 +123,19 @@ private:
|
|||
/// algorithms. Positive values are used by morphology matches.
|
||||
struct WordMatch
|
||||
{
|
||||
wstring word;
|
||||
std::u32string word;
|
||||
int weight;
|
||||
|
||||
WordMatch():
|
||||
weight( 0 )
|
||||
{
|
||||
}
|
||||
WordMatch( wstring const & word_ ):
|
||||
WordMatch( std::u32string const & word_ ):
|
||||
word( word_ ),
|
||||
weight( 0 )
|
||||
{
|
||||
}
|
||||
WordMatch( wstring const & word_, int weight_ ):
|
||||
WordMatch( std::u32string const & word_, int weight_ ):
|
||||
word( word_ ),
|
||||
weight( weight_ )
|
||||
{
|
||||
|
@ -380,10 +372,6 @@ public:
|
|||
metadata_enable_fts = _enable_FTS;
|
||||
}
|
||||
|
||||
/// Returns all the available properties, like the author's name, copyright,
|
||||
/// description etc. All strings are in utf8.
|
||||
virtual map< Property, string > getProperties() noexcept = 0;
|
||||
|
||||
/// Returns the features the dictionary possess. See the Feature enum for
|
||||
/// their list.
|
||||
virtual Features getFeatures() const noexcept
|
||||
|
@ -442,7 +430,7 @@ public:
|
|||
/// prefix results should be added. Not more than maxResults results should
|
||||
/// be stored. The whole operation is supposed to be fast, though some
|
||||
/// dictionaries, the network ones particularly, may of course be slow.
|
||||
virtual sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) = 0;
|
||||
virtual sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) = 0;
|
||||
|
||||
/// Looks up a given word in the dictionary, aiming to find different forms
|
||||
/// of the given word by allowing suffix variations. This means allowing words
|
||||
|
@ -453,20 +441,20 @@ public:
|
|||
/// in the middle of a phrase got matched should be returned.
|
||||
/// The default implementation does nothing, returning an empty result.
|
||||
virtual sptr< WordSearchRequest >
|
||||
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
|
||||
stemmedMatch( std::u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults );
|
||||
|
||||
/// Finds known headwords for the given word, that is, the words for which
|
||||
/// the given word is a synonym. If a dictionary can't perform this operation,
|
||||
/// it should leave the default implementation which always returns an empty
|
||||
/// result.
|
||||
virtual sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & );
|
||||
virtual sptr< WordSearchRequest > findHeadwordsForSynonym( std::u32string const & );
|
||||
|
||||
/// For a given word, provides alternate writings of it which are to be looked
|
||||
/// up alongside with it. Transliteration dictionaries implement this. The
|
||||
/// default implementation returns an empty list. Note that this function is
|
||||
/// supposed to be very fast and simple, and the results are thus returned
|
||||
/// synchronously.
|
||||
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept;
|
||||
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept;
|
||||
|
||||
/// Returns a definition for the given word. The definition should
|
||||
/// be an html fragment (without html/head/body tags) in an utf8 encoding.
|
||||
|
@ -475,10 +463,10 @@ public:
|
|||
/// synonyms for the main word.
|
||||
/// context is a dictionary-specific data, currently only used for the
|
||||
/// 'Websites' feature.
|
||||
virtual sptr< DataRequest > getArticle( wstring const &,
|
||||
vector< wstring > const & alts,
|
||||
wstring const & context = wstring(),
|
||||
bool ignoreDiacritics = false ) = 0;
|
||||
virtual sptr< DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const & context = std::u32string(),
|
||||
bool ignoreDiacritics = false ) = 0;
|
||||
|
||||
/// Loads contents of a resource named 'name' into the 'data' vector. This is
|
||||
/// usually a picture file referenced in the article or something like that.
|
||||
|
|
|
@ -2,16 +2,16 @@
|
|||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "dictserver.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include <QTimer>
|
||||
#include <QUrl>
|
||||
#include <QTcpSocket>
|
||||
#include <QString>
|
||||
#include <list>
|
||||
#include "htmlescape.hh"
|
||||
|
||||
#include <QCryptographicHash>
|
||||
#include <QDir>
|
||||
#include <QFileInfo>
|
||||
#include <QRegularExpression>
|
||||
#include <QtConcurrent>
|
||||
#include <QtConcurrentRun>
|
||||
|
||||
namespace DictServer {
|
||||
|
||||
|
@ -176,7 +176,6 @@ class DictServerDictionary: public Dictionary::Class
|
|||
{
|
||||
Q_OBJECT
|
||||
|
||||
string name;
|
||||
QString url, icon;
|
||||
quint32 langId;
|
||||
QString errorString;
|
||||
|
@ -196,11 +195,13 @@ public:
|
|||
QString const & strategies_,
|
||||
QString const & icon_ ):
|
||||
Dictionary::Class( id, vector< string >() ),
|
||||
name( name_ ),
|
||||
url( url_ ),
|
||||
icon( icon_ ),
|
||||
langId( 0 )
|
||||
{
|
||||
|
||||
dictionaryName = name_;
|
||||
|
||||
int pos = url.indexOf( "://" );
|
||||
if ( pos < 0 ) {
|
||||
url = "dict://" + url;
|
||||
|
@ -301,15 +302,6 @@ public:
|
|||
disconnectFromServer( socket );
|
||||
}
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
map< Property, string > getProperties() noexcept override
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
|
@ -321,9 +313,10 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
|
||||
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
|
||||
|
||||
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
|
||||
sptr< DataRequest >
|
||||
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
|
||||
|
||||
quint32 getLangFrom() const override
|
||||
{
|
||||
|
@ -394,7 +387,7 @@ class DictServerWordSearchRequest: public Dictionary::WordSearchRequest
|
|||
{
|
||||
Q_OBJECT
|
||||
QAtomicInt isCancelled;
|
||||
wstring word;
|
||||
std::u32string word;
|
||||
QString errorString;
|
||||
DictServerDictionary & dict;
|
||||
|
||||
|
@ -409,7 +402,7 @@ class DictServerWordSearchRequest: public Dictionary::WordSearchRequest
|
|||
|
||||
public:
|
||||
|
||||
DictServerWordSearchRequest( wstring word_, DictServerDictionary & dict_ ):
|
||||
DictServerWordSearchRequest( std::u32string word_, DictServerDictionary & dict_ ):
|
||||
word( std::move( word_ ) ),
|
||||
dict( dict_ ),
|
||||
dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-w" ) )
|
||||
|
@ -486,7 +479,7 @@ void DictServerWordSearchRequest::run()
|
|||
qDebug() << "receive match:" << reply;
|
||||
auto code = reply.left( 3 );
|
||||
|
||||
if ( reply.left( 3 ) != "152" ) {
|
||||
if ( code != "152" ) {
|
||||
|
||||
matchNext();
|
||||
}
|
||||
|
@ -569,32 +562,42 @@ void DictServer::DictServerWordSearchRequest::addMatchedWord( const QString & st
|
|||
class DictServerArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
QAtomicInt isCancelled;
|
||||
wstring word;
|
||||
std::u32string word;
|
||||
QString errorString;
|
||||
DictServerDictionary & dict;
|
||||
string articleData;
|
||||
|
||||
QString articleText;
|
||||
|
||||
int currentDatabase = 0;
|
||||
DictServerState state;
|
||||
QTimer * timer;
|
||||
bool contentInHtml = false;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
DictServerImpl * dictImpl;
|
||||
DictServerArticleRequest( wstring word_, DictServerDictionary & dict_ ):
|
||||
DictServerArticleRequest( std::u32string word_, DictServerDictionary & dict_ ):
|
||||
word( std::move( word_ ) ),
|
||||
dict( dict_ ),
|
||||
dictImpl( new DictServerImpl( this, dict_.url, "GoldenDict-t" ) )
|
||||
{
|
||||
timer = new QTimer( this );
|
||||
timer->setInterval( 5000 );
|
||||
timer->setSingleShot( true );
|
||||
qDebug() << "receive data:" << QDateTime::currentDateTime();
|
||||
connect( timer, &QTimer::timeout, this, [ this ]() {
|
||||
qDebug() << "Server takes too much time to response" << QDateTime::currentDateTime();
|
||||
cancel();
|
||||
} );
|
||||
|
||||
connect( this, &DictServerArticleRequest::finishedArticle, this, [ this ]( QString articleText ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||
cancel();
|
||||
return;
|
||||
}
|
||||
|
||||
qDebug() << articleText;
|
||||
|
||||
static QRegularExpression phonetic( R"(\\([^\\]+)\\)",
|
||||
QRegularExpression::CaseInsensitiveOption ); // phonetics: \stuff\ ...
|
||||
static QRegularExpression divs_inside_phonetic( "</div([^>]*)><div([^>]*)>",
|
||||
|
@ -687,10 +690,7 @@ public:
|
|||
defineNext();
|
||||
} );
|
||||
|
||||
QTimer::singleShot( 5000, this, [ this ]() {
|
||||
qDebug() << "Server takes too much time to response";
|
||||
cancel();
|
||||
} );
|
||||
timer->start();
|
||||
}
|
||||
|
||||
void run();
|
||||
|
@ -723,9 +723,9 @@ void DictServerArticleRequest::run()
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
connect( &dictImpl->socket, &QTcpSocket::readyRead, this, [ this ]() {
|
||||
QMutexLocker const _( &dictImpl->mutex );
|
||||
timer->start();
|
||||
if ( state == DictServerState::DEFINE ) {
|
||||
QByteArray reply = dictImpl->socket.readLine();
|
||||
qDebug() << "receive define:" << reply;
|
||||
|
@ -748,34 +748,19 @@ void DictServerArticleRequest::run()
|
|||
if ( reply.left( 3 ) == "150" ) {
|
||||
// Articles found
|
||||
int countPos = reply.indexOf( ' ', 4 );
|
||||
|
||||
QString articleText;
|
||||
|
||||
// Get articles count
|
||||
// Get articles count,
|
||||
// todo ,how to use this count?
|
||||
int count = reply.mid( 4, countPos > 4 ? countPos - 4 : -1 ).toInt();
|
||||
|
||||
// Read articles
|
||||
for ( int x = 0; x < count; x++ ) {
|
||||
reply = dictImpl->socket.readLine();
|
||||
if ( reply.isEmpty() ) {
|
||||
state = DictServerState::DEFINE_DATA;
|
||||
return;
|
||||
}
|
||||
readData( reply );
|
||||
}
|
||||
readData( reply );
|
||||
state = DictServerState::DEFINE_DATA;
|
||||
}
|
||||
}
|
||||
else if ( state == DictServerState::DEFINE_DATA ) {
|
||||
QByteArray reply = dictImpl->socket.readLine();
|
||||
qDebug() << "receive define data:" << reply;
|
||||
while ( true ) {
|
||||
if ( reply.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
readData( reply );
|
||||
reply = dictImpl->socket.readLine();
|
||||
}
|
||||
qDebug() << "receive define data:" << reply << QDateTime::currentDateTime();
|
||||
readData( reply );
|
||||
}
|
||||
} );
|
||||
|
||||
|
@ -814,7 +799,8 @@ void DictServerArticleRequest::readData( QByteArray reply )
|
|||
|
||||
pos = endPos + 1;
|
||||
|
||||
QString dbID, dbName;
|
||||
QString dbID;
|
||||
QString dbName;
|
||||
|
||||
// Retrieve database ID
|
||||
endPos = reply.indexOf( ' ', pos );
|
||||
|
@ -827,8 +813,7 @@ void DictServerArticleRequest::readData( QByteArray reply )
|
|||
dbID = reply.mid( pos, endPos - pos );
|
||||
|
||||
// Retrieve database ID
|
||||
pos = endPos + 1;
|
||||
endPos = reply.indexOf( ' ', pos );
|
||||
pos = endPos + 1;
|
||||
if ( reply[ pos ] == '\"' ) {
|
||||
endPos = reply.indexOf( '\"', pos + 1 ) + 1;
|
||||
}
|
||||
|
@ -852,47 +837,30 @@ void DictServerArticleRequest::readData( QByteArray reply )
|
|||
articleData += string( "<div class=\"dictserver_from\">" ) + dbName.toUtf8().data() + "[" + dbID.toUtf8().data()
|
||||
+ "]" + "</div>";
|
||||
|
||||
// Retreive MIME headers if any
|
||||
reply = dictImpl->socket.readAll();
|
||||
|
||||
static QRegularExpression contentTypeExpr( "Content-Type\\s*:\\s*text/html",
|
||||
QRegularExpression::CaseInsensitiveOption );
|
||||
|
||||
for ( ;; ) {
|
||||
reply = dictImpl->socket.readLine();
|
||||
if ( reply.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ( reply == "\r\n" ) {
|
||||
break;
|
||||
}
|
||||
|
||||
QRegularExpressionMatch match = contentTypeExpr.match( reply );
|
||||
if ( match.hasMatch() ) {
|
||||
contentInHtml = true;
|
||||
}
|
||||
}
|
||||
QString articleText;
|
||||
// Retrieve article text
|
||||
|
||||
articleText.clear();
|
||||
for ( ;; ) {
|
||||
reply = dictImpl->socket.readLine();
|
||||
if ( reply.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
qDebug() << "reply data:" << reply;
|
||||
if ( reply == ".\r\n" ) {
|
||||
//discard all left message.
|
||||
while ( !dictImpl->socket.readLine().isEmpty() ) {}
|
||||
emit finishedArticle( articleText );
|
||||
return;
|
||||
}
|
||||
|
||||
articleText += reply;
|
||||
articleText += reply;
|
||||
qDebug() << "reply data:" << reply << QDateTime::currentDateTime();
|
||||
if ( articleText.contains( "\r\n.\r\n" ) ) {
|
||||
//discard all left message.
|
||||
emit finishedArticle( articleText );
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
articleText += reply;
|
||||
reply = dictImpl->socket.readAll();
|
||||
qDebug() << "reply data:" << reply << QDateTime::currentDateTime();
|
||||
|
||||
articleText += reply;
|
||||
if ( reply.contains( "\r\n.\r\n" ) ) {
|
||||
//discard all left message. maybe delete all the remaining data after `.\r\n`
|
||||
emit finishedArticle( articleText );
|
||||
return;
|
||||
}
|
||||
}
|
||||
//restart.
|
||||
timer->start();
|
||||
}
|
||||
|
||||
void DictServerArticleRequest::cancel()
|
||||
|
@ -902,7 +870,7 @@ void DictServerArticleRequest::cancel()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > DictServerDictionary::prefixMatch( wstring const & word, unsigned long maxResults )
|
||||
sptr< WordSearchRequest > DictServerDictionary::prefixMatch( std::u32string const & word, unsigned long maxResults )
|
||||
{
|
||||
(void)maxResults;
|
||||
if ( word.size() > 80 ) {
|
||||
|
@ -915,8 +883,10 @@ sptr< WordSearchRequest > DictServerDictionary::prefixMatch( wstring const & wor
|
|||
}
|
||||
}
|
||||
|
||||
sptr< DataRequest >
|
||||
DictServerDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
|
||||
sptr< DataRequest > DictServerDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const &,
|
||||
std::u32string const &,
|
||||
bool )
|
||||
|
||||
{
|
||||
if ( word.size() > 80 ) {
|
||||
|
@ -950,4 +920,4 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::DictServers const
|
|||
return result;
|
||||
}
|
||||
#include "dictserver.moc"
|
||||
} // namespace DictServer
|
||||
} // namespace DictServer
|
||||
|
|
302
src/dict/dsl.cc
302
src/dict/dsl.cc
|
@ -5,54 +5,33 @@
|
|||
#include "dsl_details.hh"
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "chunkedstorage.hh"
|
||||
#include "dictzip.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "iconv.hh"
|
||||
#include "filetype.hh"
|
||||
|
||||
#include "audiolink.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "indexedzip.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "tiff.hh"
|
||||
#include "ftshelpers.hh"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <wctype.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include <QSemaphore>
|
||||
#include <QThreadPool>
|
||||
#include <QAtomicInt>
|
||||
#include <QUrl>
|
||||
|
||||
#include <QDir>
|
||||
#include <QFileInfo>
|
||||
#include <QPainter>
|
||||
#include <QStringList>
|
||||
|
||||
#include <QRegularExpression>
|
||||
|
||||
// For TIFF conversion
|
||||
#include <QImage>
|
||||
#include <QByteArray>
|
||||
#include <QBuffer>
|
||||
|
||||
// For SVG handling
|
||||
#include <QtSvg/QSvgRenderer>
|
||||
|
||||
#include <QtConcurrent>
|
||||
|
||||
#include <QSvgRenderer>
|
||||
#include <QtConcurrentRun>
|
||||
#include "utils.hh"
|
||||
|
||||
namespace Dsl {
|
||||
|
@ -64,11 +43,9 @@ using std::multimap;
|
|||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
using std::vector;
|
||||
using std::list;
|
||||
using Utf8::Encoding;
|
||||
using Text::Encoding;
|
||||
|
||||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
|
@ -120,8 +97,8 @@ struct InsidedCard
|
|||
{
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
QList< wstring > headwords;
|
||||
InsidedCard( uint32_t _offset, uint32_t _size, QList< wstring > const & words ):
|
||||
QList< std::u32string > headwords;
|
||||
InsidedCard( uint32_t _offset, uint32_t _size, QList< std::u32string > const & words ):
|
||||
offset( _offset ),
|
||||
size( _size ),
|
||||
headwords( words )
|
||||
|
@ -132,7 +109,7 @@ struct InsidedCard
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile, bool hasZipFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -158,14 +135,13 @@ class DslDictionary: public BtreeIndexing::BtreeDictionary
|
|||
QAtomicInt deferredInitDone;
|
||||
QMutex deferredInitMutex;
|
||||
bool deferredInitRunnableStarted;
|
||||
QSemaphore deferredInitRunnableExited;
|
||||
|
||||
string initError;
|
||||
|
||||
int optionalPartNom;
|
||||
quint8 articleNom;
|
||||
|
||||
wstring currentHeadword;
|
||||
std::u32string currentHeadword;
|
||||
string resourceDir1, resourceDir2;
|
||||
|
||||
public:
|
||||
|
@ -176,15 +152,6 @@ public:
|
|||
|
||||
~DslDictionary();
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
|
@ -217,8 +184,10 @@ public:
|
|||
}
|
||||
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -262,15 +231,15 @@ private:
|
|||
|
||||
/// Loads the article. Does not process the DSL language.
|
||||
void loadArticle( uint32_t address,
|
||||
wstring const & requestedHeadwordFolded,
|
||||
std::u32string const & requestedHeadwordFolded,
|
||||
bool ignoreDiacritics,
|
||||
wstring & tildeValue,
|
||||
wstring & displayedHeadword,
|
||||
std::u32string & tildeValue,
|
||||
std::u32string & displayedHeadword,
|
||||
unsigned & headwordIndex,
|
||||
wstring & articleText );
|
||||
std::u32string & articleText );
|
||||
|
||||
/// Converts DSL language to an Html.
|
||||
string dslToHtml( wstring const &, wstring const & headword = wstring() );
|
||||
string dslToHtml( std::u32string const &, std::u32string const & headword = std::u32string() );
|
||||
|
||||
// Parts of dslToHtml()
|
||||
string nodeToHtml( ArticleDom::Node const & );
|
||||
|
@ -289,7 +258,7 @@ private:
|
|||
|
||||
DslDictionary::DslDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
dz( 0 ),
|
||||
deferredInitRunnableStarted( false ),
|
||||
|
@ -303,17 +272,9 @@ DslDictionary::DslDictionary( string const & id, string const & indexFile, vecto
|
|||
|
||||
idx.seek( sizeof( idxHeader ) );
|
||||
|
||||
vector< char > dName( idx.read< uint32_t >() );
|
||||
if ( dName.size() > 0 ) {
|
||||
idx.read( &dName.front(), dName.size() );
|
||||
dictionaryName = string( &dName.front(), dName.size() );
|
||||
}
|
||||
idx.readU32SizeAndData<>( dictionaryName );
|
||||
idx.readU32SizeAndData<>( preferredSoundDictionary );
|
||||
|
||||
vector< char > sName( idx.read< uint32_t >() );
|
||||
if ( sName.size() > 0 ) {
|
||||
idx.read( &sName.front(), sName.size() );
|
||||
preferredSoundDictionary = string( &sName.front(), sName.size() );
|
||||
}
|
||||
|
||||
resourceDir1 = getDictionaryFilenames()[ 0 ] + ".files" + Utils::Fs::separator();
|
||||
QString s = QString::fromStdString( getDictionaryFilenames()[ 0 ] );
|
||||
|
@ -407,7 +368,7 @@ void DslDictionary::doDeferredInit()
|
|||
memcpy( &total, abrvBlock, sizeof( uint32_t ) );
|
||||
abrvBlock += sizeof( uint32_t );
|
||||
|
||||
GD_DPRINTF( "Loading %u abbrv\n", total );
|
||||
qDebug( "Loading %u abbrv", total );
|
||||
|
||||
while ( total-- ) {
|
||||
uint32_t keySz;
|
||||
|
@ -490,7 +451,7 @@ void DslDictionary::loadIcon() noexcept
|
|||
/// so nbsp is not a whitespace character for Dsl compiler.
|
||||
/// For now we have only space and tab, since those are most likely the only
|
||||
/// ones recognized as spaces by that compiler.
|
||||
bool isDslWs( wchar ch )
|
||||
bool isDslWs( char32_t ch )
|
||||
{
|
||||
switch ( ch ) {
|
||||
case ' ':
|
||||
|
@ -502,14 +463,14 @@ bool isDslWs( wchar ch )
|
|||
}
|
||||
|
||||
void DslDictionary::loadArticle( uint32_t address,
|
||||
wstring const & requestedHeadwordFolded,
|
||||
std::u32string const & requestedHeadwordFolded,
|
||||
bool ignoreDiacritics,
|
||||
wstring & tildeValue,
|
||||
wstring & displayedHeadword,
|
||||
std::u32string & tildeValue,
|
||||
std::u32string & displayedHeadword,
|
||||
unsigned & headwordIndex,
|
||||
wstring & articleText )
|
||||
std::u32string & articleText )
|
||||
{
|
||||
wstring articleData;
|
||||
std::u32string articleData;
|
||||
|
||||
{
|
||||
vector< char > chunk;
|
||||
|
@ -527,7 +488,7 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
memcpy( &articleOffset, articleProps, sizeof( articleOffset ) );
|
||||
memcpy( &articleSize, articleProps + sizeof( articleOffset ), sizeof( articleSize ) );
|
||||
|
||||
GD_DPRINTF( "offset = %x\n", articleOffset );
|
||||
qDebug( "offset = %x", articleOffset );
|
||||
|
||||
|
||||
char * articleBody;
|
||||
|
@ -545,7 +506,7 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
else {
|
||||
try {
|
||||
articleData =
|
||||
Iconv::toWstring( Utf8::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
|
||||
Iconv::toWstring( Text::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
|
||||
free( articleBody );
|
||||
|
||||
// Strip DSL comments
|
||||
|
@ -566,27 +527,27 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
// Check is we retrieve insided card
|
||||
bool insidedCard = isDslWs( articleData.at( 0 ) );
|
||||
|
||||
wstring tildeValueWithUnsorted; // This one has unsorted parts left
|
||||
std::u32string tildeValueWithUnsorted; // This one has unsorted parts left
|
||||
for ( headwordIndex = 0;; ) {
|
||||
size_t begin = pos;
|
||||
|
||||
pos = articleData.find_first_of( U"\n\r", begin );
|
||||
|
||||
if ( pos == wstring::npos ) {
|
||||
if ( pos == std::u32string::npos ) {
|
||||
pos = articleData.size();
|
||||
}
|
||||
|
||||
if ( !foundDisplayedHeadword ) {
|
||||
// Process the headword
|
||||
|
||||
wstring rawHeadword = wstring( articleData, begin, pos - begin );
|
||||
std::u32string rawHeadword = std::u32string( articleData, begin, pos - begin );
|
||||
|
||||
if ( insidedCard && !rawHeadword.empty() && isDslWs( rawHeadword[ 0 ] ) ) {
|
||||
// Headword of the insided card
|
||||
wstring::size_type hpos = rawHeadword.find( L'@' );
|
||||
std::u32string::size_type hpos = rawHeadword.find( L'@' );
|
||||
if ( hpos != string::npos ) {
|
||||
wstring head = Folding::trimWhitespace( rawHeadword.substr( hpos + 1 ) );
|
||||
hpos = head.find( L'~' );
|
||||
std::u32string head = Folding::trimWhitespace( rawHeadword.substr( hpos + 1 ) );
|
||||
hpos = head.find( L'~' );
|
||||
while ( hpos != string::npos ) {
|
||||
if ( hpos == 0 || head[ hpos ] != L'\\' ) {
|
||||
break;
|
||||
|
@ -607,7 +568,7 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
// We need our tilde expansion value
|
||||
tildeValue = rawHeadword;
|
||||
|
||||
list< wstring > lst;
|
||||
list< std::u32string > lst;
|
||||
|
||||
expandOptionalParts( tildeValue, &lst );
|
||||
|
||||
|
@ -619,7 +580,7 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
|
||||
processUnsortedParts( tildeValue, false );
|
||||
}
|
||||
wstring str = rawHeadword;
|
||||
std::u32string str = rawHeadword;
|
||||
|
||||
if ( hadFirstHeadword ) {
|
||||
expandTildes( str, tildeValueWithUnsorted );
|
||||
|
@ -629,7 +590,7 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
|
||||
str = Folding::applySimpleCaseOnly( str );
|
||||
|
||||
list< wstring > lst;
|
||||
list< std::u32string > lst;
|
||||
expandOptionalParts( str, &lst );
|
||||
|
||||
// Does one of the results match the requested word? If so, we'd choose
|
||||
|
@ -695,15 +656,15 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
// Check for begin article text
|
||||
if ( insidedCard ) {
|
||||
// Check for next insided headword
|
||||
wstring::size_type hpos = articleData.find_first_of( U"\n\r", pos );
|
||||
if ( hpos == wstring::npos ) {
|
||||
std::u32string::size_type hpos = articleData.find_first_of( U"\n\r", pos );
|
||||
if ( hpos == std::u32string::npos ) {
|
||||
hpos = articleData.size();
|
||||
}
|
||||
|
||||
wstring str = wstring( articleData, pos, hpos - pos );
|
||||
std::u32string str = std::u32string( articleData, pos, hpos - pos );
|
||||
|
||||
hpos = str.find( L'@' );
|
||||
if ( hpos == wstring::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
|
||||
if ( hpos == std::u32string::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -725,18 +686,18 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
}
|
||||
|
||||
if ( pos != articleData.size() ) {
|
||||
articleText = wstring( articleData, pos );
|
||||
articleText = std::u32string( articleData, pos );
|
||||
}
|
||||
else {
|
||||
articleText.clear();
|
||||
}
|
||||
}
|
||||
|
||||
string DslDictionary::dslToHtml( wstring const & str, wstring const & headword )
|
||||
string DslDictionary::dslToHtml( std::u32string const & str, std::u32string const & headword )
|
||||
{
|
||||
// Normalize the string
|
||||
wstring normalizedStr = gd::normalize( str );
|
||||
currentHeadword = headword;
|
||||
std::u32string normalizedStr = Text::normalize( str );
|
||||
currentHeadword = headword;
|
||||
|
||||
ArticleDom dom( normalizedStr, getName(), headword );
|
||||
|
||||
|
@ -771,7 +732,7 @@ string DslDictionary::getNodeLink( ArticleDom::Node const & node )
|
|||
}
|
||||
}
|
||||
if ( link.empty() ) {
|
||||
link = Html::escape( Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false ) );
|
||||
link = Html::escape( Filetype::simplifyString( Text::toUtf8( node.renderAsText() ), false ) );
|
||||
}
|
||||
|
||||
return link;
|
||||
|
@ -782,7 +743,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
string result;
|
||||
|
||||
if ( !node.isTag ) {
|
||||
result = Html::escape( Utf8::encode( node.text ) );
|
||||
result = Html::escape( Text::toUtf8( node.text ) );
|
||||
|
||||
// Handle all end-of-line
|
||||
|
||||
|
@ -822,7 +783,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
result += "<span class=\"c_default_color\">" + processNodeChildren( node ) + "</span>";
|
||||
}
|
||||
else {
|
||||
result += "<font color=\"" + Html::escape( Utf8::encode( node.tagAttrs ) ) + "\">" + processNodeChildren( node )
|
||||
result += "<font color=\"" + Html::escape( Text::toUtf8( node.tagAttrs ) ) + "\">" + processNodeChildren( node )
|
||||
+ "</font>";
|
||||
}
|
||||
}
|
||||
|
@ -835,7 +796,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
result += "<div class=\"dsl_m\">" + processNodeChildren( node ) + "</div>";
|
||||
}
|
||||
else if ( node.tagName.size() == 2 && node.tagName[ 0 ] == L'm' && iswdigit( node.tagName[ 1 ] ) ) {
|
||||
result += "<div class=\"dsl_" + Utf8::encode( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>";
|
||||
result += "<div class=\"dsl_" + Text::toUtf8( node.tagName ) + "\">" + processNodeChildren( node ) + "</div>";
|
||||
}
|
||||
else if ( node.tagName == U"trn" ) {
|
||||
result += "<span class=\"dsl_trn\">" + processNodeChildren( node ) + "</span>";
|
||||
|
@ -847,7 +808,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
result += "<span class=\"dsl_com\">" + processNodeChildren( node ) + "</span>";
|
||||
}
|
||||
else if ( node.tagName == U"s" || node.tagName == U"video" ) {
|
||||
string filename = Filetype::simplifyString( Utf8::encode( node.renderAsText() ), false );
|
||||
string filename = Filetype::simplifyString( Text::toUtf8( node.renderAsText() ), false );
|
||||
string n = resourceDir1 + filename;
|
||||
|
||||
if ( Filetype::isNameOfSound( filename ) ) {
|
||||
|
@ -926,7 +887,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
else if ( node.tagName == U"p" ) {
|
||||
result += "<span class=\"dsl_p\"";
|
||||
|
||||
string val = Utf8::encode( node.renderAsText() );
|
||||
string val = Text::toUtf8( node.renderAsText() );
|
||||
|
||||
// If we have such a key, display a title
|
||||
|
||||
|
@ -946,7 +907,8 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
// user could pick up the best suitable option.
|
||||
string data = processNodeChildren( node );
|
||||
result += R"(<span class="dsl_stress"><span class="dsl_stress_without_accent">)" + data + "</span>"
|
||||
+ "<span class=\"dsl_stress_with_accent\">" + data + Utf8::encode( wstring( 1, 0x301 ) ) + "</span></span>";
|
||||
+ "<span class=\"dsl_stress_with_accent\">" + data + Text::toUtf8( std::u32string( 1, 0x301 ) )
|
||||
+ "</span></span>";
|
||||
}
|
||||
else if ( node.tagName == U"lang" ) {
|
||||
result += "<span class=\"dsl_lang\"";
|
||||
|
@ -982,7 +944,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
|
||||
url.setScheme( "gdlookup" );
|
||||
url.setHost( "localhost" );
|
||||
auto nodeStr = Utf8::decode( getNodeLink( node ) );
|
||||
auto nodeStr = Text::toUtf32( getNodeLink( node ) );
|
||||
|
||||
normalizeHeadword( nodeStr );
|
||||
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) );
|
||||
|
@ -1006,7 +968,7 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
|
||||
url.setScheme( "gdlookup" );
|
||||
url.setHost( "localhost" );
|
||||
wstring nodeStr = node.renderAsText();
|
||||
std::u32string nodeStr = node.renderAsText();
|
||||
normalizeHeadword( nodeStr );
|
||||
url.setPath( Utils::Url::ensureLeadingSlash( QString::fromStdU32String( nodeStr ) ) );
|
||||
|
||||
|
@ -1026,11 +988,11 @@ string DslDictionary::nodeToHtml( ArticleDom::Node const & node )
|
|||
result += "<br />";
|
||||
}
|
||||
else {
|
||||
gdWarning( R"(DSL: Unknown tag "%s" with attributes "%s" found in "%s", article "%s".)",
|
||||
QString::fromStdU32String( node.tagName ).toUtf8().data(),
|
||||
QString::fromStdU32String( node.tagAttrs ).toUtf8().data(),
|
||||
getName().c_str(),
|
||||
QString::fromStdU32String( currentHeadword ).toUtf8().data() );
|
||||
qWarning( R"(DSL: Unknown tag "%s" with attributes "%s" found in "%s", article "%s".)",
|
||||
QString::fromStdU32String( node.tagName ).toUtf8().data(),
|
||||
QString::fromStdU32String( node.tagAttrs ).toUtf8().data(),
|
||||
getName().c_str(),
|
||||
QString::fromStdU32String( currentHeadword ).toUtf8().data() );
|
||||
|
||||
result += "<span class=\"dsl_unknown\">[" + string( QString::fromStdU32String( node.tagName ).toUtf8().data() );
|
||||
if ( !node.tagAttrs.empty() ) {
|
||||
|
@ -1138,14 +1100,14 @@ void DslDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "Dsl: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Dsl: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "DSL: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "DSL: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( ftsIdxName.c_str() );
|
||||
}
|
||||
}
|
||||
|
@ -1158,7 +1120,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
vector< char > chunk;
|
||||
|
||||
char * articleProps;
|
||||
wstring articleData;
|
||||
std::u32string articleData;
|
||||
|
||||
{
|
||||
QMutexLocker _( &idxMutex );
|
||||
|
@ -1182,8 +1144,9 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
}
|
||||
else {
|
||||
try {
|
||||
articleData =
|
||||
Iconv::toWstring( getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
|
||||
articleData = Iconv::toWstring( getEncodingNameFor( static_cast< Encoding >( idxHeader.dslEncoding ) ),
|
||||
articleBody,
|
||||
articleSize );
|
||||
free( articleBody );
|
||||
|
||||
// Strip DSL comments
|
||||
|
@ -1199,7 +1162,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
// Skip headword
|
||||
|
||||
size_t pos = 0;
|
||||
wstring articleHeadword, tildeValue;
|
||||
std::u32string articleHeadword, tildeValue;
|
||||
|
||||
// Check if we retrieve insided card
|
||||
bool insidedCard = isDslWs( articleData.at( 0 ) );
|
||||
|
@ -1208,20 +1171,20 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
size_t begin = pos;
|
||||
|
||||
pos = articleData.find_first_of( U"\n\r", begin );
|
||||
if ( pos == wstring::npos ) {
|
||||
if ( pos == std::u32string::npos ) {
|
||||
pos = articleData.size();
|
||||
}
|
||||
|
||||
if ( articleHeadword.empty() ) {
|
||||
// Process the headword
|
||||
articleHeadword = wstring( articleData, begin, pos - begin );
|
||||
articleHeadword = std::u32string( articleData, begin, pos - begin );
|
||||
|
||||
if ( insidedCard && !articleHeadword.empty() && isDslWs( articleHeadword[ 0 ] ) ) {
|
||||
// Headword of the insided card
|
||||
wstring::size_type hpos = articleHeadword.find( L'@' );
|
||||
std::u32string::size_type hpos = articleHeadword.find( L'@' );
|
||||
if ( hpos != string::npos ) {
|
||||
wstring head = Folding::trimWhitespace( articleHeadword.substr( hpos + 1 ) );
|
||||
hpos = head.find( L'~' );
|
||||
std::u32string head = Folding::trimWhitespace( articleHeadword.substr( hpos + 1 ) );
|
||||
hpos = head.find( L'~' );
|
||||
while ( hpos != string::npos ) {
|
||||
if ( hpos == 0 || head[ hpos ] != L'\\' ) {
|
||||
break;
|
||||
|
@ -1238,7 +1201,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
}
|
||||
|
||||
if ( !articleHeadword.empty() ) {
|
||||
list< wstring > lst;
|
||||
list< std::u32string > lst;
|
||||
|
||||
tildeValue = articleHeadword;
|
||||
|
||||
|
@ -1275,15 +1238,15 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
// Check for begin article text
|
||||
if ( insidedCard ) {
|
||||
// Check for next insided headword
|
||||
wstring::size_type hpos = articleData.find_first_of( U"\n\r", pos );
|
||||
if ( hpos == wstring::npos ) {
|
||||
std::u32string::size_type hpos = articleData.find_first_of( U"\n\r", pos );
|
||||
if ( hpos == std::u32string::npos ) {
|
||||
hpos = articleData.size();
|
||||
}
|
||||
|
||||
wstring str = wstring( articleData, pos, hpos - pos );
|
||||
std::u32string str = std::u32string( articleData, pos, hpos - pos );
|
||||
|
||||
hpos = str.find( L'@' );
|
||||
if ( hpos == wstring::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
|
||||
if ( hpos == std::u32string::npos || str[ hpos - 1 ] == L'\\' || !isAtSignFirst( str ) ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1299,17 +1262,17 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
headword = QString::fromStdU32String( articleHeadword );
|
||||
}
|
||||
|
||||
wstring articleText;
|
||||
std::u32string articleText;
|
||||
|
||||
if ( pos != articleData.size() ) {
|
||||
articleText = wstring( articleData, pos );
|
||||
articleText = std::u32string( articleData, pos );
|
||||
}
|
||||
else {
|
||||
articleText.clear();
|
||||
}
|
||||
|
||||
if ( !tildeValue.empty() ) {
|
||||
list< wstring > lst;
|
||||
list< std::u32string > lst;
|
||||
|
||||
processUnsortedParts( tildeValue, false );
|
||||
expandOptionalParts( tildeValue, &lst );
|
||||
|
@ -1415,19 +1378,18 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
|
||||
class DslArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
DslDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
QSemaphore hasExited;
|
||||
QFuture< void > f;
|
||||
|
||||
public:
|
||||
|
||||
DslArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
DslArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
DslDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -1483,7 +1445,7 @@ void DslArticleRequest::run()
|
|||
// index here.
|
||||
set< pair< uint32_t, unsigned > > articlesIncluded;
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
|
||||
for ( auto & x : chain ) {
|
||||
// Check if we're cancelled occasionally
|
||||
|
@ -1494,9 +1456,9 @@ void DslArticleRequest::run()
|
|||
|
||||
// Grab that article
|
||||
|
||||
wstring tildeValue;
|
||||
wstring displayedHeadword;
|
||||
wstring articleBody;
|
||||
std::u32string tildeValue;
|
||||
std::u32string displayedHeadword;
|
||||
std::u32string articleBody;
|
||||
unsigned headwordIndex;
|
||||
|
||||
string articleText, articleAfter;
|
||||
|
@ -1567,7 +1529,7 @@ void DslArticleRequest::run()
|
|||
articleText += articleAfter;
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "DSL: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
|
||||
qWarning( "DSL: Failed loading article from \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
|
||||
articleText =
|
||||
string( "<span class=\"dsl_article\">" ) + QObject::tr( "Article loading error" ).toStdString() + "</span>";
|
||||
}
|
||||
|
@ -1580,9 +1542,9 @@ void DslArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > DslDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > DslDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -1598,7 +1560,6 @@ class DslResourceRequest: public Dictionary::DataRequest
|
|||
string resourceName;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
QSemaphore hasExited;
|
||||
QFuture< void > f;
|
||||
|
||||
public:
|
||||
|
@ -1643,7 +1604,7 @@ void DslResourceRequest::run()
|
|||
|
||||
string n = dict.getContainingFolder().toStdString() + Utils::Fs::separator() + resourceName;
|
||||
|
||||
GD_DPRINTF( "dsl resource name is %s\n", n.c_str() );
|
||||
qDebug( "dsl resource name is %s", n.c_str() );
|
||||
|
||||
try {
|
||||
try {
|
||||
|
@ -1672,7 +1633,7 @@ void DslResourceRequest::run()
|
|||
if ( dict.resourceZip.isOpen() ) {
|
||||
QMutexLocker _( &dataMutex );
|
||||
|
||||
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
|
||||
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
|
||||
throw; // Make it fail since we couldn't read the archive
|
||||
}
|
||||
}
|
||||
|
@ -1695,10 +1656,10 @@ void DslResourceRequest::run()
|
|||
hasAnyData = true;
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "DSL: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "DSL: Failed loading resource \"%s\" for \"%s\", reason: %s",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
// Resource not loaded -- we don't set the hasAnyData flag then
|
||||
}
|
||||
|
||||
|
@ -1745,11 +1706,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
continue;
|
||||
}
|
||||
|
||||
// Make sure it's not an abbreviation file
|
||||
// Make sure it's not an abbreviation file. extSize of ".dsl" or ".dsl.dz"
|
||||
|
||||
int extSize = ( uncompressedDsl ? 4 : 7 );
|
||||
if ( fileName.size() - extSize >= 5
|
||||
&& strncasecmp( fileName.c_str() + fileName.size() - extSize - 5, "_abrv", 5 ) == 0 ) {
|
||||
if ( int extSize = ( uncompressedDsl ? 4 : 7 ); ( fileName.size() >= ( 5 + extSize ) )
|
||||
&& ( QByteArrayView( fileName ).chopped( extSize ).last( 5 ).compare( "_abrv", Qt::CaseInsensitive ) == 0 ) ) {
|
||||
// It is, skip it
|
||||
continue;
|
||||
}
|
||||
|
@ -1802,12 +1762,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
|
||||
// Building the index
|
||||
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) );
|
||||
initializing.indexingDictionary( Text::toUtf8( scanner.getDictionaryName() ) );
|
||||
|
||||
gdDebug( "Dsl: Building the index for dictionary: %s\n",
|
||||
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
|
||||
qDebug( "Dsl: Building the index for dictionary: %s",
|
||||
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader;
|
||||
|
||||
|
@ -1818,19 +1778,19 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
idx.write( idxHeader );
|
||||
|
||||
string dictionaryName = Utf8::encode( scanner.getDictionaryName() );
|
||||
string dictionaryName = Text::toUtf8( scanner.getDictionaryName() );
|
||||
|
||||
idx.write( (uint32_t)dictionaryName.size() );
|
||||
idx.write( dictionaryName.data(), dictionaryName.size() );
|
||||
|
||||
string soundDictName = Utf8::encode( scanner.getSoundDictionaryName() );
|
||||
string soundDictName = Text::toUtf8( scanner.getSoundDictionaryName() );
|
||||
if ( !soundDictName.empty() ) {
|
||||
idxHeader.hasSoundDictionaryName = 1;
|
||||
idx.write( (uint32_t)soundDictName.size() );
|
||||
idx.write( soundDictName.data(), soundDictName.size() );
|
||||
}
|
||||
|
||||
idxHeader.dslEncoding = scanner.getEncoding();
|
||||
idxHeader.dslEncoding = static_cast< uint32_t >( scanner.getEncoding() );
|
||||
|
||||
IndexedWords indexedWords;
|
||||
|
||||
|
@ -1844,7 +1804,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
map< string, string > abrv;
|
||||
|
||||
wstring curString;
|
||||
std::u32string curString;
|
||||
size_t curOffset;
|
||||
|
||||
for ( ;; ) {
|
||||
|
@ -1856,7 +1816,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
continue;
|
||||
}
|
||||
|
||||
list< wstring > keys;
|
||||
list< std::u32string > keys;
|
||||
|
||||
bool eof = false;
|
||||
|
||||
|
@ -1871,7 +1831,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
expandOptionalParts( curString, &keys );
|
||||
|
||||
if ( !abrvScanner.readNextLineWithoutComments( curString, curOffset ) || curString.empty() ) {
|
||||
gdWarning( "Premature end of file %s\n", abrvFileName.c_str() );
|
||||
qWarning( "Premature end of file %s", abrvFileName.c_str() );
|
||||
eof = true;
|
||||
break;
|
||||
}
|
||||
|
@ -1892,13 +1852,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
|
||||
// If the string has any dsl markup, we strip it
|
||||
string value = Utf8::encode( ArticleDom( curString ).root.renderAsText() );
|
||||
string value = Text::toUtf8( ArticleDom( curString ).root.renderAsText() );
|
||||
|
||||
for ( auto & key : keys ) {
|
||||
unescapeDsl( key );
|
||||
normalizeHeadword( key );
|
||||
|
||||
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
|
||||
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1910,7 +1870,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
chunks.addToBlock( &sz, sizeof( uint32_t ) );
|
||||
|
||||
for ( const auto & i : abrv ) {
|
||||
// GD_DPRINTF( "%s:%s\n", i->first.c_str(), i->second.c_str() );
|
||||
// qDebug( "%s:%s", i->first.c_str(), i->second.c_str() );
|
||||
|
||||
sz = i.first.size();
|
||||
chunks.addToBlock( &sz, sizeof( uint32_t ) );
|
||||
|
@ -1921,12 +1881,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Error reading abrv file \"%s\", error: %s. Skipping it.\n", abrvFileName.c_str(), e.what() );
|
||||
qWarning( "Error reading abrv file \"%s\", error: %s. Skipping it.", abrvFileName.c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
bool hasString = false;
|
||||
wstring curString;
|
||||
std::u32string curString;
|
||||
size_t curOffset;
|
||||
|
||||
uint32_t articleCount = 0, wordCount = 0;
|
||||
|
@ -1951,7 +1911,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// characters are blank, too.
|
||||
for ( size_t x = 1; x < curString.size(); ++x ) {
|
||||
if ( !isDslWs( curString[ x ] ) ) {
|
||||
gdWarning( "Garbage string in %s at offset 0x%lX\n", fileName.c_str(), curOffset );
|
||||
qWarning( "Garbage string in %s at offset 0x%lX", fileName.c_str(), curOffset );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1960,20 +1920,20 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
// Ok, got the headword
|
||||
|
||||
list< wstring > allEntryWords;
|
||||
list< std::u32string > allEntryWords;
|
||||
|
||||
processUnsortedParts( curString, true );
|
||||
expandOptionalParts( curString, &allEntryWords );
|
||||
|
||||
uint32_t articleOffset = curOffset;
|
||||
|
||||
//GD_DPRINTF( "Headword: %ls\n", curString.c_str() );
|
||||
//qDebug( "Headword: %ls", curString.c_str() );
|
||||
|
||||
// More headwords may follow
|
||||
|
||||
for ( ;; ) {
|
||||
if ( !( hasString = scanner.readNextLineWithoutComments( curString, curOffset ) ) ) {
|
||||
gdWarning( "Premature end of file %s\n", fileName.c_str() );
|
||||
qWarning( "Premature end of file %s", fileName.c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2013,10 +1973,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
wordCount += allEntryWords.size();
|
||||
|
||||
int insideInsided = 0;
|
||||
wstring headword;
|
||||
std::u32string headword;
|
||||
QList< InsidedCard > insidedCards;
|
||||
uint32_t offset = curOffset;
|
||||
QList< wstring > insidedHeadwords;
|
||||
QList< std::u32string > insidedHeadwords;
|
||||
unsigned linesInsideCard = 0;
|
||||
int dogLine = 0;
|
||||
bool wasEmptyLine = false;
|
||||
|
@ -2031,11 +1991,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
if ( !hasString || ( curString.size() && !isDslWs( curString[ 0 ] ) ) ) {
|
||||
if ( insideInsided ) {
|
||||
gdWarning( "Unclosed tag '@' at line %i", dogLine );
|
||||
qWarning( "Unclosed tag '@' at line %i", dogLine );
|
||||
insidedCards.append( InsidedCard( offset, curOffset - offset, insidedHeadwords ) );
|
||||
}
|
||||
if ( noSignificantLines ) {
|
||||
gdWarning( "Orphan headword at line %i", headwordLine );
|
||||
qWarning( "Orphan headword at line %i", headwordLine );
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -2049,7 +2009,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
else {
|
||||
if ( wasEmptyLine && !Folding::applyWhitespaceOnly( curString ).empty() ) {
|
||||
gdWarning( "Orphan string at line %i", scanner.getLinesRead() - 1 );
|
||||
qWarning( "Orphan string at line %i", scanner.getLinesRead() - 1 );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2059,8 +2019,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
// Find embedded cards
|
||||
|
||||
wstring::size_type n = curString.find( L'@' );
|
||||
if ( n == wstring::npos || curString[ n - 1 ] == L'\\' ) {
|
||||
std::u32string::size_type n = curString.find( L'@' );
|
||||
if ( n == std::u32string::npos || curString[ n - 1 ] == L'\\' ) {
|
||||
if ( insideInsided ) {
|
||||
linesInsideCard++;
|
||||
}
|
||||
|
@ -2070,7 +2030,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
else {
|
||||
// Embedded card tag must be placed at first position in line after spaces
|
||||
if ( !isAtSignFirst( curString ) ) {
|
||||
gdWarning( "Unescaped '@' symbol at line %i", scanner.getLinesRead() - 1 );
|
||||
qWarning( "Unescaped '@' symbol at line %i", scanner.getLinesRead() - 1 );
|
||||
|
||||
if ( insideInsided ) {
|
||||
linesInsideCard++;
|
||||
|
@ -2160,7 +2120,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// If there was a zip file, index it too
|
||||
|
||||
if ( zipFileName.size() ) {
|
||||
GD_DPRINTF( "Indexing zip file\n" );
|
||||
qDebug( "Indexing zip file" );
|
||||
|
||||
idxHeader.hasZipFile = 1;
|
||||
|
||||
|
@ -2216,7 +2176,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< DslDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "DSL dictionary reading failed: %s:%u, error: %s\n", fileName.c_str(), atLine, e.what() );
|
||||
qWarning( "DSL dictionary reading failed: %s:%u, error: %s", fileName.c_str(), atLine, e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,9 +5,8 @@
|
|||
|
||||
#include "folding.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "ufile.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
|
||||
#include <exception>
|
||||
#include <stdio.h>
|
||||
|
@ -18,9 +17,8 @@
|
|||
namespace Dsl {
|
||||
namespace Details {
|
||||
|
||||
using gd::wstring;
|
||||
using std::list;
|
||||
using Utf8::Encoding;
|
||||
using Text::Encoding;
|
||||
|
||||
static QMap< int, string > lang_codes = {
|
||||
{ 1, "en" }, { 1033, "en" }, { 2, "ru" }, { 1049, "ru" }, { 1068, "az" }, { 1025, "ar" }, { 1067, "am" },
|
||||
|
@ -41,7 +39,7 @@ string findCodeForDslId( int id )
|
|||
return lang_codes[ id ];
|
||||
}
|
||||
|
||||
bool isAtSignFirst( wstring const & str )
|
||||
bool isAtSignFirst( std::u32string const & str )
|
||||
{
|
||||
// Test if '@' is first in string except spaces and dsl tags
|
||||
QRegularExpression reg( R"([ \t]*(?:\[[^\]]+\][ \t]*)*@)", QRegularExpression::PatternOption::CaseInsensitiveOption );
|
||||
|
@ -50,13 +48,13 @@ bool isAtSignFirst( wstring const & str )
|
|||
|
||||
/////////////// ArticleDom
|
||||
|
||||
wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
|
||||
std::u32string ArticleDom::Node::renderAsText( bool stripTrsTag ) const
|
||||
{
|
||||
if ( !isTag ) {
|
||||
return text;
|
||||
}
|
||||
|
||||
wstring result;
|
||||
std::u32string result;
|
||||
|
||||
for ( const auto & i : *this ) {
|
||||
if ( !stripTrsTag || i.tagName != U"!trs" ) {
|
||||
|
@ -70,17 +68,17 @@ wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
|
|||
namespace {
|
||||
|
||||
/// @return true if @p tagName equals "mN" where N is a digit
|
||||
bool is_mN( wstring const & tagName )
|
||||
bool is_mN( std::u32string const & tagName )
|
||||
{
|
||||
return tagName.size() == 2 && tagName[ 0 ] == U'm' && iswdigit( tagName[ 1 ] );
|
||||
}
|
||||
|
||||
bool isAnyM( wstring const & tagName )
|
||||
bool isAnyM( std::u32string const & tagName )
|
||||
{
|
||||
return tagName == U"m" || is_mN( tagName );
|
||||
}
|
||||
|
||||
bool checkM( wstring const & dest, wstring const & src )
|
||||
bool checkM( std::u32string const & dest, std::u32string const & src )
|
||||
{
|
||||
return src == U"m" && is_mN( dest );
|
||||
}
|
||||
|
@ -98,8 +96,8 @@ struct MustTagBeClosed
|
|||
|
||||
} // unnamed namespace
|
||||
|
||||
ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring const & headword_ ):
|
||||
root( Node::Tag(), wstring(), wstring() ),
|
||||
ArticleDom::ArticleDom( std::u32string const & str, string const & dictName, std::u32string const & headword_ ):
|
||||
root( Node::Tag(), std::u32string(), std::u32string() ),
|
||||
stringPos( str.c_str() ),
|
||||
lineStartPos( str.c_str() ),
|
||||
transcriptionCount( 0 ),
|
||||
|
@ -119,15 +117,15 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
if ( !atSignFirstInLine() ) {
|
||||
// Not insided card
|
||||
if ( dictName.empty() ) {
|
||||
gdWarning( "Unescaped '@' symbol found" );
|
||||
qWarning( "Unescaped '@' symbol found" );
|
||||
}
|
||||
else {
|
||||
gdWarning( "Unescaped '@' symbol found in \"%s\"", dictName.c_str() );
|
||||
qWarning( "Unescaped '@' symbol found in \"%s\"", dictName.c_str() );
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Insided card
|
||||
wstring linkTo;
|
||||
std::u32string linkTo;
|
||||
nextChar();
|
||||
for ( ;; nextChar() ) {
|
||||
if ( ch == L'\n' ) {
|
||||
|
@ -143,13 +141,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
linkTo = Folding::trimWhitespace( linkTo );
|
||||
|
||||
if ( !linkTo.empty() ) {
|
||||
list< wstring > allLinkEntries;
|
||||
list< std::u32string > allLinkEntries;
|
||||
processUnsortedParts( linkTo, true );
|
||||
expandOptionalParts( linkTo, &allLinkEntries );
|
||||
|
||||
for ( auto entry = allLinkEntries.begin(); entry != allLinkEntries.end(); ) {
|
||||
if ( !textNode ) {
|
||||
Node text = Node( Node::Text(), wstring() );
|
||||
Node text = Node( Node::Text(), std::u32string() );
|
||||
|
||||
if ( stack.empty() ) {
|
||||
root.push_back( text );
|
||||
|
@ -169,10 +167,10 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
stack.pop_back();
|
||||
textNode = 0;
|
||||
|
||||
wstring linkText = Folding::trimWhitespace( *entry );
|
||||
std::u32string linkText = Folding::trimWhitespace( *entry );
|
||||
ArticleDom nodeDom( linkText, dictName, headword_ );
|
||||
|
||||
Node link( Node::Tag(), U"@", wstring() );
|
||||
Node link( Node::Tag(), U"@", std::u32string() );
|
||||
for ( auto & n : nodeDom.root ) {
|
||||
link.push_back( n );
|
||||
}
|
||||
|
@ -182,13 +180,13 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
if ( stack.empty() ) {
|
||||
root.push_back( link );
|
||||
if ( entry != allLinkEntries.end() ) { // Add line break before next entry
|
||||
root.push_back( Node( Node::Tag(), U"br", wstring() ) );
|
||||
root.push_back( Node( Node::Tag(), U"br", std::u32string() ) );
|
||||
}
|
||||
}
|
||||
else {
|
||||
stack.back()->push_back( link );
|
||||
if ( entry != allLinkEntries.end() ) {
|
||||
stack.back()->push_back( Node( Node::Tag(), U"br", wstring() ) );
|
||||
stack.back()->push_back( Node( Node::Tag(), U"br", std::u32string() ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -209,8 +207,8 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
if ( ch == L'[' && !escaped ) {
|
||||
// Beginning of a tag.
|
||||
bool isClosing;
|
||||
wstring name;
|
||||
wstring attrs;
|
||||
std::u32string name;
|
||||
std::u32string attrs;
|
||||
|
||||
try {
|
||||
do {
|
||||
|
@ -246,16 +244,16 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
}
|
||||
catch ( std::exception & ex ) {
|
||||
if ( !dictionaryName.empty() ) {
|
||||
gdWarning( R"(DSL: Unfinished tag "%s" with attributes "%s" found in "%s", article "%s".)",
|
||||
QString::fromStdU32String( name ).toUtf8().data(),
|
||||
QString::fromStdU32String( attrs ).toUtf8().data(),
|
||||
dictionaryName.c_str(),
|
||||
QString::fromStdU32String( headword ).toUtf8().data() );
|
||||
qWarning( R"(DSL: Unfinished tag "%s" with attributes "%s" found in "%s", article "%s".)",
|
||||
QString::fromStdU32String( name ).toUtf8().data(),
|
||||
QString::fromStdU32String( attrs ).toUtf8().data(),
|
||||
dictionaryName.c_str(),
|
||||
QString::fromStdU32String( headword ).toUtf8().data() );
|
||||
}
|
||||
else {
|
||||
gdWarning( R"(DSL: Unfinished tag "%s" with attributes "%s" found)",
|
||||
QString::fromStdU32String( name ).toUtf8().data(),
|
||||
QString::fromStdU32String( attrs ).toUtf8().data() );
|
||||
qWarning( R"(DSL: Unfinished tag "%s" with attributes "%s" found)",
|
||||
QString::fromStdU32String( name ).toUtf8().data(),
|
||||
QString::fromStdU32String( attrs ).toUtf8().data() );
|
||||
}
|
||||
|
||||
throw ex;
|
||||
|
@ -331,7 +329,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
nextChar();
|
||||
} while ( Folding::isWhitespace( ch ) );
|
||||
|
||||
wstring linkTo, linkText;
|
||||
std::u32string linkTo, linkText;
|
||||
|
||||
for ( ;; nextChar() ) {
|
||||
// Is it the end?
|
||||
|
@ -374,7 +372,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
processUnsortedParts( linkText, true );
|
||||
ArticleDom nodeDom( linkText, dictName, headword_ );
|
||||
|
||||
Node link( Node::Tag(), U"ref", wstring() );
|
||||
Node link( Node::Tag(), U"ref", std::u32string() );
|
||||
for ( auto & n : nodeDom.root ) {
|
||||
link.push_back( n );
|
||||
}
|
||||
|
@ -428,7 +426,7 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
|
||||
// If there's currently no text node, open one
|
||||
if ( !textNode ) {
|
||||
Node text = Node( Node::Text(), wstring() );
|
||||
Node text = Node( Node::Text(), std::u32string() );
|
||||
|
||||
if ( stack.empty() ) {
|
||||
root.push_back( text );
|
||||
|
@ -678,21 +676,21 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName, wstring co
|
|||
unsigned const unclosedTagCount = 1 + std::count_if( it, stack.end(), MustTagBeClosed() );
|
||||
|
||||
if ( dictName.empty() ) {
|
||||
gdWarning( "Warning: %u tag(s) were unclosed, first tag name \"%s\".\n",
|
||||
unclosedTagCount,
|
||||
firstTagName.constData() );
|
||||
qWarning( "Warning: %u tag(s) were unclosed, first tag name \"%s\".",
|
||||
unclosedTagCount,
|
||||
firstTagName.constData() );
|
||||
}
|
||||
else {
|
||||
gdWarning( "Warning: %u tag(s) were unclosed in \"%s\", article \"%s\", first tag name \"%s\".\n",
|
||||
unclosedTagCount,
|
||||
dictName.c_str(),
|
||||
QString::fromStdU32String( headword ).toUtf8().constData(),
|
||||
firstTagName.constData() );
|
||||
qWarning( "Warning: %u tag(s) were unclosed in \"%s\", article \"%s\", first tag name \"%s\".",
|
||||
unclosedTagCount,
|
||||
dictName.c_str(),
|
||||
QString::fromStdU32String( headword ).toUtf8().constData(),
|
||||
firstTagName.constData() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ArticleDom::openTag( wstring const & name, wstring const & attrs, list< Node * > & stack )
|
||||
void ArticleDom::openTag( std::u32string const & name, std::u32string const & attrs, list< Node * > & stack )
|
||||
{
|
||||
list< Node > nodesToReopen;
|
||||
|
||||
|
@ -747,7 +745,7 @@ void ArticleDom::openTag( wstring const & name, wstring const & attrs, list< Nod
|
|||
}
|
||||
}
|
||||
|
||||
void ArticleDom::closeTag( wstring const & name, list< Node * > & stack, bool warn )
|
||||
void ArticleDom::closeTag( std::u32string const & name, list< Node * > & stack, bool warn )
|
||||
{
|
||||
// Find the tag which is to be closed
|
||||
|
||||
|
@ -787,14 +785,14 @@ void ArticleDom::closeTag( wstring const & name, list< Node * > & stack, bool wa
|
|||
}
|
||||
else if ( warn ) {
|
||||
if ( !dictionaryName.empty() ) {
|
||||
gdWarning( R"(No corresponding opening tag for closing tag "%s" found in "%s", article "%s".)",
|
||||
QString::fromStdU32String( name ).toUtf8().data(),
|
||||
dictionaryName.c_str(),
|
||||
QString::fromStdU32String( headword ).toUtf8().data() );
|
||||
qWarning( R"(No corresponding opening tag for closing tag "%s" found in "%s", article "%s".)",
|
||||
QString::fromStdU32String( name ).toUtf8().data(),
|
||||
dictionaryName.c_str(),
|
||||
QString::fromStdU32String( headword ).toUtf8().data() );
|
||||
}
|
||||
else {
|
||||
gdWarning( "No corresponding opening tag for closing tag \"%s\" found.",
|
||||
QString::fromStdU32String( name ).toUtf8().data() );
|
||||
qWarning( "No corresponding opening tag for closing tag \"%s\" found.",
|
||||
QString::fromStdU32String( name ).toUtf8().data() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -840,13 +838,13 @@ bool ArticleDom::atSignFirstInLine()
|
|||
return true;
|
||||
}
|
||||
|
||||
return isAtSignFirst( wstring( lineStartPos ) );
|
||||
return isAtSignFirst( std::u32string( lineStartPos ) );
|
||||
}
|
||||
|
||||
/////////////// DslScanner
|
||||
|
||||
DslScanner::DslScanner( string const & fileName ):
|
||||
encoding( Utf8::Utf8 ),
|
||||
encoding( Text::Encoding::Utf8 ),
|
||||
readBufferPtr( readBuffer ),
|
||||
readBufferLeft( 0 ),
|
||||
linesRead( 0 )
|
||||
|
@ -877,19 +875,19 @@ DslScanner::DslScanner( string const & fileName ):
|
|||
guessedEncoding.has_value() ) {
|
||||
switch ( guessedEncoding.value() ) {
|
||||
case QStringConverter::Utf8:
|
||||
encoding = Utf8::Utf8;
|
||||
encoding = Text::Encoding::Utf8;
|
||||
break;
|
||||
case QStringConverter::Utf16LE:
|
||||
encoding = Utf8::Utf16LE;
|
||||
encoding = Text::Encoding::Utf16LE;
|
||||
break;
|
||||
case QStringConverter::Utf16BE:
|
||||
encoding = Utf8::Utf16BE;
|
||||
encoding = Text::Encoding::Utf16BE;
|
||||
break;
|
||||
case QStringConverter::Utf32LE:
|
||||
encoding = Utf8::Utf16LE;
|
||||
encoding = Text::Encoding::Utf16LE;
|
||||
break;
|
||||
case QStringConverter::Utf32BE:
|
||||
encoding = Utf8::Utf32BE;
|
||||
encoding = Text::Encoding::Utf32BE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -906,10 +904,10 @@ DslScanner::DslScanner( string const & fileName ):
|
|||
}
|
||||
|
||||
//iconv.reinit( encoding );
|
||||
lineFeed = Utf8::initLineFeed( encoding );
|
||||
lineFeed = Text::initLineFeed( encoding );
|
||||
// We now can use our own readNextLine() function
|
||||
|
||||
wstring str;
|
||||
std::u32string str;
|
||||
size_t offset;
|
||||
|
||||
for ( ;; ) {
|
||||
|
@ -947,7 +945,7 @@ DslScanner::DslScanner( string const & fileName ):
|
|||
|
||||
size_t beg = str.find_first_of( L'"' );
|
||||
|
||||
if ( beg == wstring::npos ) {
|
||||
if ( beg == std::u32string::npos ) {
|
||||
throw exMalformedDslFile( fileName );
|
||||
}
|
||||
|
||||
|
@ -957,7 +955,7 @@ DslScanner::DslScanner( string const & fileName ):
|
|||
throw exMalformedDslFile( fileName );
|
||||
}
|
||||
|
||||
wstring arg( str, beg + 1, end - beg - 1 );
|
||||
std::u32string arg( str, beg + 1, end - beg - 1 );
|
||||
|
||||
if ( isName ) {
|
||||
dictionaryName = arg;
|
||||
|
@ -975,16 +973,16 @@ DslScanner::DslScanner( string const & fileName ):
|
|||
// The encoding
|
||||
if ( !needExactEncoding ) {
|
||||
// We don't need that!
|
||||
GD_FDPRINTF( stderr, "Warning: encoding was specified in a Unicode file, ignoring.\n" );
|
||||
qWarning( "Warning: encoding was specified in a Unicode file, ignoring." );
|
||||
}
|
||||
else if ( !arg.compare( U"Latin" ) ) {
|
||||
encoding = Utf8::Windows1252;
|
||||
encoding = Text::Encoding::Windows1252;
|
||||
}
|
||||
else if ( !arg.compare( U"Cyrillic" ) ) {
|
||||
encoding = Utf8::Windows1251;
|
||||
encoding = Text::Encoding::Windows1251;
|
||||
}
|
||||
else if ( !arg.compare( U"EasternEuropean" ) ) {
|
||||
encoding = Utf8::Windows1250;
|
||||
encoding = Text::Encoding::Windows1250;
|
||||
}
|
||||
else {
|
||||
gzclose( f );
|
||||
|
@ -1010,7 +1008,7 @@ DslScanner::~DslScanner() noexcept
|
|||
gzclose( f );
|
||||
}
|
||||
|
||||
bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_word )
|
||||
bool DslScanner::readNextLine( std::u32string & out, size_t & offset, bool only_head_word )
|
||||
{
|
||||
offset = gztell( f ) - readBufferLeft /*+pos*/;
|
||||
|
||||
|
@ -1037,7 +1035,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
|
|||
return false;
|
||||
}
|
||||
|
||||
int pos = Utf8::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
|
||||
int pos = Text::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
|
||||
if ( pos == -1 ) {
|
||||
return false;
|
||||
}
|
||||
|
@ -1058,9 +1056,9 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
|
|||
}
|
||||
}
|
||||
|
||||
bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset, bool only_headword )
|
||||
bool DslScanner::readNextLineWithoutComments( std::u32string & out, size_t & offset, bool only_headword )
|
||||
{
|
||||
wstring str;
|
||||
std::u32string str;
|
||||
bool commentToNextLine = false;
|
||||
size_t currentOffset;
|
||||
|
||||
|
@ -1088,14 +1086,14 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset, bo
|
|||
|
||||
/////////////// DslScanner
|
||||
|
||||
void processUnsortedParts( wstring & str, bool strip )
|
||||
void processUnsortedParts( std::u32string & str, bool strip )
|
||||
{
|
||||
int refCount = 0;
|
||||
|
||||
size_t startPos = 0;
|
||||
|
||||
for ( size_t x = 0; x < str.size(); ) {
|
||||
wchar ch = str[ x ];
|
||||
char32_t ch = str[ x ];
|
||||
|
||||
if ( ch == L'\\' ) {
|
||||
// Escape code
|
||||
|
@ -1121,7 +1119,7 @@ void processUnsortedParts( wstring & str, bool strip )
|
|||
--refCount;
|
||||
|
||||
if ( refCount < 0 ) {
|
||||
GD_FDPRINTF( stderr, "Warning: an unmatched closing brace was encountered.\n" );
|
||||
qWarning( "Warning: an unmatched closing brace was encountered." );
|
||||
refCount = 0;
|
||||
// But we remove that thing either way
|
||||
str.erase( x, 1 );
|
||||
|
@ -1146,23 +1144,23 @@ void processUnsortedParts( wstring & str, bool strip )
|
|||
}
|
||||
|
||||
if ( strip && refCount ) {
|
||||
GD_FDPRINTF( stderr, "Warning: unclosed brace(s) encountered.\n" );
|
||||
qWarning( "Warning: unclosed brace(s) encountered." );
|
||||
str.erase( startPos );
|
||||
}
|
||||
}
|
||||
|
||||
void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, bool inside_recurse )
|
||||
void expandOptionalParts( std::u32string & str, list< std::u32string > * result, size_t x, bool inside_recurse )
|
||||
{
|
||||
// if str is too long ,it can never be headwords.
|
||||
if ( str.size() > 100 ) {
|
||||
return;
|
||||
}
|
||||
list< wstring > expanded;
|
||||
list< wstring > * headwords;
|
||||
list< std::u32string > expanded;
|
||||
list< std::u32string > * headwords;
|
||||
headwords = inside_recurse ? result : &expanded;
|
||||
|
||||
for ( ; x < str.size(); ) {
|
||||
wchar ch = str[ x ];
|
||||
char32_t ch = str[ x ];
|
||||
|
||||
if ( ch == L'\\' ) {
|
||||
// Escape code
|
||||
|
@ -1175,7 +1173,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
|
|||
int refCount = 1;
|
||||
|
||||
for ( size_t y = x + 1; y < str.size(); ++y ) {
|
||||
wchar ch = str[ y ];
|
||||
char32_t ch = str[ y ];
|
||||
|
||||
if ( ch == L'\\' ) {
|
||||
// Escape code
|
||||
|
@ -1191,7 +1189,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
|
|||
|
||||
if ( y != x + 1 ) // Only do for non-empty cases
|
||||
{
|
||||
wstring removed( str, 0, x );
|
||||
std::u32string removed( str, 0, x );
|
||||
removed.append( str, y + 1, str.size() - y - 1 );
|
||||
|
||||
expandOptionalParts( removed, headwords, x, true );
|
||||
|
@ -1205,7 +1203,7 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
|
|||
if ( refCount && x != str.size() - 1 ) {
|
||||
// Closing paren not found? Chop it.
|
||||
|
||||
wstring removed( str, 0, x );
|
||||
std::u32string removed( str, 0, x );
|
||||
|
||||
// Limit the amount of results to avoid excessive resource consumption
|
||||
if ( headwords->size() < 32 ) {
|
||||
|
@ -1243,10 +1241,10 @@ void expandOptionalParts( wstring & str, list< wstring > * result, size_t x, boo
|
|||
}
|
||||
}
|
||||
|
||||
static const wstring openBraces( U"{{" );
|
||||
static const wstring closeBraces( U"}}" );
|
||||
static const std::u32string openBraces( U"{{" );
|
||||
static const std::u32string closeBraces( U"}}" );
|
||||
|
||||
void stripComments( wstring & str, bool & nextLine )
|
||||
void stripComments( std::u32string & str, bool & nextLine )
|
||||
{
|
||||
string::size_type n = 0, n2 = 0;
|
||||
|
||||
|
@ -1270,9 +1268,9 @@ void stripComments( wstring & str, bool & nextLine )
|
|||
}
|
||||
}
|
||||
|
||||
void expandTildes( wstring & str, wstring const & tildeReplacement )
|
||||
void expandTildes( std::u32string & str, std::u32string const & tildeReplacement )
|
||||
{
|
||||
wstring tildeValue = Folding::trimWhitespace( tildeReplacement );
|
||||
std::u32string tildeValue = Folding::trimWhitespace( tildeReplacement );
|
||||
for ( size_t x = 0; x < str.size(); ) {
|
||||
if ( str[ x ] == L'\\' ) {
|
||||
x += 2;
|
||||
|
@ -1295,7 +1293,7 @@ void expandTildes( wstring & str, wstring const & tildeReplacement )
|
|||
}
|
||||
}
|
||||
|
||||
void unescapeDsl( wstring & str )
|
||||
void unescapeDsl( std::u32string & str )
|
||||
{
|
||||
for ( size_t x = 0; x < str.size(); ++x ) {
|
||||
if ( str[ x ] == L'\\' ) {
|
||||
|
@ -1304,7 +1302,7 @@ void unescapeDsl( wstring & str )
|
|||
}
|
||||
}
|
||||
|
||||
void normalizeHeadword( wstring & str )
|
||||
void normalizeHeadword( std::u32string & str )
|
||||
{
|
||||
for ( size_t x = str.size(); x-- > 1; ) // >1 -- Don't test the first char
|
||||
{
|
||||
|
@ -1332,7 +1330,7 @@ void normalizeHeadword( wstring & str )
|
|||
}
|
||||
|
||||
namespace {
|
||||
void cutEnding( wstring & where, wstring const & ending )
|
||||
void cutEnding( std::u32string & where, std::u32string const & ending )
|
||||
{
|
||||
if ( where.size() > ending.size() && where.compare( where.size() - ending.size(), ending.size(), ending ) == 0 ) {
|
||||
where.erase( where.size() - ending.size() );
|
||||
|
@ -1340,17 +1338,17 @@ void cutEnding( wstring & where, wstring const & ending )
|
|||
}
|
||||
} // namespace
|
||||
|
||||
quint32 dslLanguageToId( wstring const & name )
|
||||
quint32 dslLanguageToId( std::u32string const & name )
|
||||
{
|
||||
static wstring newSp( U"newspelling" );
|
||||
static wstring st( U"standard" );
|
||||
static wstring ms( U"modernsort" );
|
||||
static wstring ts( U"traditionalsort" );
|
||||
static wstring prc( U"prc" );
|
||||
static std::u32string newSp( U"newspelling" );
|
||||
static std::u32string st( U"standard" );
|
||||
static std::u32string ms( U"modernsort" );
|
||||
static std::u32string ts( U"traditionalsort" );
|
||||
static std::u32string prc( U"prc" );
|
||||
|
||||
// Any of those endings are to be removed
|
||||
|
||||
wstring nameStripped = Folding::apply( name );
|
||||
std::u32string nameStripped = Folding::apply( name );
|
||||
|
||||
cutEnding( nameStripped, newSp );
|
||||
cutEnding( nameStripped, st );
|
||||
|
|
|
@ -11,23 +11,21 @@
|
|||
#include "iconv.hh"
|
||||
#include <QtCore5Compat/QTextCodec>
|
||||
#include <QByteArray>
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
|
||||
// Implementation details for Dsl, not part of its interface
|
||||
namespace Dsl {
|
||||
namespace Details {
|
||||
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
using std::list;
|
||||
using std::vector;
|
||||
using Utf8::Encoding;
|
||||
using Utf8::LineFeed;
|
||||
using Text::Encoding;
|
||||
using Text::LineFeed;
|
||||
|
||||
string findCodeForDslId( int id );
|
||||
|
||||
bool isAtSignFirst( wstring const & str );
|
||||
bool isAtSignFirst( std::u32string const & str );
|
||||
|
||||
/// Parses the DSL language, representing it in its structural DOM form.
|
||||
struct ArticleDom
|
||||
|
@ -37,23 +35,23 @@ struct ArticleDom
|
|||
bool isTag; // true if it is a tag with subnodes, false if it's a leaf text
|
||||
// data.
|
||||
// Those are only used if isTag is true
|
||||
wstring tagName;
|
||||
wstring tagAttrs;
|
||||
wstring text; // This is only used if isTag is false
|
||||
std::u32string tagName;
|
||||
std::u32string tagAttrs;
|
||||
std::u32string text; // This is only used if isTag is false
|
||||
|
||||
class Text
|
||||
{};
|
||||
class Tag
|
||||
{};
|
||||
|
||||
Node( Tag, wstring const & name, wstring const & attrs ):
|
||||
Node( Tag, std::u32string const & name, std::u32string const & attrs ):
|
||||
isTag( true ),
|
||||
tagName( name ),
|
||||
tagAttrs( attrs )
|
||||
{
|
||||
}
|
||||
|
||||
Node( Text, wstring const & text_ ):
|
||||
Node( Text, std::u32string const & text_ ):
|
||||
isTag( false ),
|
||||
text( text_ )
|
||||
{
|
||||
|
@ -61,30 +59,32 @@ struct ArticleDom
|
|||
|
||||
/// Concatenates all childen text nodes recursively to form all text
|
||||
/// the node contains stripped of any markup.
|
||||
wstring renderAsText( bool stripTrsTag = false ) const;
|
||||
std::u32string renderAsText( bool stripTrsTag = false ) const;
|
||||
};
|
||||
|
||||
/// Does the parse at construction. Refer to the 'root' member variable
|
||||
/// afterwards.
|
||||
explicit ArticleDom( wstring const &, string const & dictName = string(), wstring const & headword_ = wstring() );
|
||||
explicit ArticleDom( std::u32string const &,
|
||||
string const & dictName = string(),
|
||||
std::u32string const & headword_ = std::u32string() );
|
||||
|
||||
/// Root of DOM's tree
|
||||
Node root;
|
||||
|
||||
private:
|
||||
|
||||
void openTag( wstring const & name, wstring const & attr, list< Node * > & stack );
|
||||
void openTag( std::u32string const & name, std::u32string const & attr, list< Node * > & stack );
|
||||
|
||||
void closeTag( wstring const & name, list< Node * > & stack, bool warn = true );
|
||||
void closeTag( std::u32string const & name, list< Node * > & stack, bool warn = true );
|
||||
|
||||
bool atSignFirstInLine();
|
||||
|
||||
wchar const *stringPos, *lineStartPos;
|
||||
char32_t const *stringPos, *lineStartPos;
|
||||
|
||||
class eot: std::exception
|
||||
{};
|
||||
|
||||
wchar ch;
|
||||
char32_t ch;
|
||||
bool escaped;
|
||||
unsigned transcriptionCount; // >0 = inside a [t] tag
|
||||
unsigned mediaCount; // >0 = inside a [s] tag
|
||||
|
@ -93,7 +93,7 @@ private:
|
|||
|
||||
/// Information for diagnostic purposes
|
||||
string dictionaryName;
|
||||
wstring headword;
|
||||
std::u32string headword;
|
||||
};
|
||||
|
||||
/// Opens the .dsl or .dsl.dz file and allows line-by-line reading. Auto-detects
|
||||
|
@ -103,9 +103,9 @@ class DslScanner
|
|||
gzFile f;
|
||||
Encoding encoding;
|
||||
QTextCodec * codec;
|
||||
wstring dictionaryName;
|
||||
wstring langFrom, langTo;
|
||||
wstring soundDictionary;
|
||||
std::u32string dictionaryName;
|
||||
std::u32string langFrom, langTo;
|
||||
std::u32string soundDictionary;
|
||||
char readBuffer[ 65536 ];
|
||||
char * readBufferPtr;
|
||||
LineFeed lineFeed;
|
||||
|
@ -132,25 +132,25 @@ public:
|
|||
}
|
||||
|
||||
/// Returns the dictionary's name, as was read from file's headers.
|
||||
wstring const & getDictionaryName() const
|
||||
std::u32string const & getDictionaryName() const
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
/// Returns the dictionary's source language, as was read from file's headers.
|
||||
wstring const & getLangFrom() const
|
||||
std::u32string const & getLangFrom() const
|
||||
{
|
||||
return langFrom;
|
||||
}
|
||||
|
||||
/// Returns the dictionary's target language, as was read from file's headers.
|
||||
wstring const & getLangTo() const
|
||||
std::u32string const & getLangTo() const
|
||||
{
|
||||
return langTo;
|
||||
}
|
||||
|
||||
/// Returns the preferred external dictionary with sounds, as was read from file's headers.
|
||||
wstring const & getSoundDictionaryName() const
|
||||
std::u32string const & getSoundDictionaryName() const
|
||||
{
|
||||
return soundDictionary;
|
||||
}
|
||||
|
@ -161,10 +161,10 @@ public:
|
|||
/// If end of file is reached, false is returned.
|
||||
/// Reading begins from the first line after the headers (ones which start
|
||||
/// with #).
|
||||
bool readNextLine( wstring &, size_t & offset, bool only_head_word = false );
|
||||
bool readNextLine( std::u32string &, size_t & offset, bool only_head_word = false );
|
||||
|
||||
/// Similar readNextLine but strip all DSL comments {{...}}
|
||||
bool readNextLineWithoutComments( wstring &, size_t & offset, bool only_headword = false );
|
||||
bool readNextLineWithoutComments( std::u32string &, size_t & offset, bool only_headword = false );
|
||||
|
||||
/// Returns the number of lines read so far from the file.
|
||||
unsigned getLinesRead() const
|
||||
|
@ -180,32 +180,35 @@ public:
|
|||
|
||||
/// This function either removes parts of string enclosed in braces, or leaves
|
||||
/// them intact. The braces themselves are removed always, though.
|
||||
void processUnsortedParts( wstring & str, bool strip );
|
||||
void processUnsortedParts( std::u32string & str, bool strip );
|
||||
|
||||
/// Expands optional parts of a headword (ones marked with parentheses),
|
||||
/// producing all possible combinations where they are present or absent.
|
||||
void expandOptionalParts( wstring & str, list< wstring > * result, size_t x = 0, bool inside_recurse = false );
|
||||
void expandOptionalParts( std::u32string & str,
|
||||
list< std::u32string > * result,
|
||||
size_t x = 0,
|
||||
bool inside_recurse = false );
|
||||
|
||||
/// Expands all unescaped tildes, inserting tildeReplacement text instead of
|
||||
/// them.
|
||||
void expandTildes( wstring & str, wstring const & tildeReplacement );
|
||||
void expandTildes( std::u32string & str, std::u32string const & tildeReplacement );
|
||||
|
||||
/// Unescapes any escaped chars. Be sure to handle all their special meanings
|
||||
/// before unescaping them.
|
||||
void unescapeDsl( wstring & str );
|
||||
void unescapeDsl( std::u32string & str );
|
||||
|
||||
/// Normalizes the headword. Currently turns any sequences of consecutive spaces
|
||||
/// into a single space.
|
||||
void normalizeHeadword( wstring & );
|
||||
void normalizeHeadword( std::u32string & );
|
||||
|
||||
/// Strip DSL {{...}} comments
|
||||
void stripComments( wstring &, bool & );
|
||||
void stripComments( std::u32string &, bool & );
|
||||
|
||||
inline size_t DslScanner::distanceToBytes( size_t x ) const
|
||||
{
|
||||
switch ( encoding ) {
|
||||
case Utf8::Utf16LE:
|
||||
case Utf8::Utf16BE:
|
||||
case Encoding::Utf16LE:
|
||||
case Encoding::Utf16BE:
|
||||
return x * 2;
|
||||
default:
|
||||
return x;
|
||||
|
@ -214,7 +217,7 @@ inline size_t DslScanner::distanceToBytes( size_t x ) const
|
|||
|
||||
/// Converts the given language name taken from Dsl header (i.e. getLangFrom(),
|
||||
/// getLangTo()) to its proper language id.
|
||||
quint32 dslLanguageToId( wstring const & name );
|
||||
quint32 dslLanguageToId( std::u32string const & name );
|
||||
|
||||
} // namespace Details
|
||||
} // namespace Dsl
|
||||
|
|
|
@ -1,26 +1,18 @@
|
|||
/* This file is (c) 2014 Abs62
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
#ifndef NO_EPWING_SUPPORT
|
||||
#include <QDir>
|
||||
#ifdef EPWING_SUPPORT
|
||||
|
||||
#include "epwing_book.hh"
|
||||
#include "epwing.hh"
|
||||
|
||||
#include <QByteArray>
|
||||
#include <QDir>
|
||||
#include <QRunnable>
|
||||
#include <QSemaphore>
|
||||
|
||||
#include <map>
|
||||
#include <QtConcurrent>
|
||||
#include <QtConcurrentRun>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "gddebug.hh"
|
||||
|
||||
#include "chunkedstorage.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "filetype.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "globalregex.hh"
|
||||
|
@ -37,7 +29,7 @@ using std::multimap;
|
|||
using std::vector;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
using gd::wstring;
|
||||
using std::u32string;
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -66,7 +58,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -82,7 +74,6 @@ class EpwingDictionary: public BtreeIndexing::BtreeDictionary
|
|||
QMutex idxMutex;
|
||||
File::Index idx;
|
||||
IdxHeader idxHeader;
|
||||
string bookName;
|
||||
ChunkedStorage::Reader chunks;
|
||||
Epwing::Book::EpwingBook eBook;
|
||||
QString cacheDirectory;
|
||||
|
@ -96,21 +87,6 @@ public:
|
|||
|
||||
~EpwingDictionary();
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return bookName;
|
||||
}
|
||||
|
||||
void setName( string _name ) noexcept override
|
||||
{
|
||||
bookName = _name;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.articleCount;
|
||||
|
@ -133,10 +109,10 @@ public:
|
|||
|
||||
QString const & getDescription() override;
|
||||
|
||||
void getHeadwordPos( wstring const & word_, QList< int > & pg, QList< int > & off );
|
||||
void getHeadwordPos( u32string const & word_, QList< int > & pg, QList< int > & off );
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -158,16 +134,16 @@ public:
|
|||
&& ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize );
|
||||
}
|
||||
|
||||
static int japaneseWriting( gd::wchar ch );
|
||||
static int japaneseWriting( char32_t ch );
|
||||
|
||||
static bool isSign( gd::wchar ch );
|
||||
static bool isSign( char32_t ch );
|
||||
|
||||
static bool isJapanesePunctiation( gd::wchar ch );
|
||||
static bool isJapanesePunctiation( char32_t ch );
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long ) override;
|
||||
sptr< Dictionary::WordSearchRequest > prefixMatch( u32string const &, unsigned long ) override;
|
||||
|
||||
sptr< Dictionary::WordSearchRequest >
|
||||
stemmedMatch( wstring const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ) override;
|
||||
stemmedMatch( u32string const &, unsigned minLength, unsigned maxSuffixVariation, unsigned long maxResults ) override;
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -180,7 +156,7 @@ private:
|
|||
quint32 address, string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
|
||||
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & word ) override;
|
||||
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( u32string const & word ) override;
|
||||
|
||||
void loadArticleNextPage( string & articleHeadword, string & articleText, int & articlePage, int & articleOffset );
|
||||
void
|
||||
|
@ -219,7 +195,7 @@ EpwingDictionary::EpwingDictionary( string const & id,
|
|||
vector< string > const & dictionaryFiles,
|
||||
int subBook ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
chunks( idx, idxHeader.chunksOffset )
|
||||
{
|
||||
|
@ -227,7 +203,7 @@ EpwingDictionary::EpwingDictionary( string const & id,
|
|||
idx.seek( sizeof( idxHeader ) );
|
||||
if ( data.size() > 0 ) {
|
||||
idx.read( &data.front(), idxHeader.nameSize );
|
||||
bookName = string( &data.front(), idxHeader.nameSize );
|
||||
dictionaryName = string( &data.front(), idxHeader.nameSize );
|
||||
}
|
||||
|
||||
// Initialize eBook
|
||||
|
@ -431,16 +407,14 @@ void EpwingDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
return;
|
||||
|
||||
|
||||
gdDebug( "Epwing: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Epwing: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Epwing: Failed building full-text search index for \"%s\", reason: %s\n",
|
||||
getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "Epwing: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( QString::fromStdString( ftsIdxName ) );
|
||||
}
|
||||
}
|
||||
|
@ -475,7 +449,7 @@ void EpwingDictionary::getArticleText( uint32_t articleAddress, QString & headwo
|
|||
|
||||
class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest
|
||||
{
|
||||
wstring str;
|
||||
u32string str;
|
||||
EpwingDictionary & dict;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
|
@ -483,7 +457,7 @@ class EpwingHeadwordsRequest: public Dictionary::WordSearchRequest
|
|||
|
||||
public:
|
||||
|
||||
EpwingHeadwordsRequest( wstring const & word_, EpwingDictionary & dict_ ):
|
||||
EpwingHeadwordsRequest( u32string const & word_, EpwingDictionary & dict_ ):
|
||||
str( word_ ),
|
||||
dict( dict_ )
|
||||
{
|
||||
|
@ -559,7 +533,7 @@ void EpwingHeadwordsRequest::run()
|
|||
|
||||
finish();
|
||||
}
|
||||
sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym( wstring const & word )
|
||||
sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym( u32string const & word )
|
||||
{
|
||||
return synonymSearchEnabled ? std::make_shared< EpwingHeadwordsRequest >( word, *this ) :
|
||||
Class::findHeadwordsForSynonym( word );
|
||||
|
@ -568,8 +542,8 @@ sptr< Dictionary::WordSearchRequest > EpwingDictionary::findHeadwordsForSynonym(
|
|||
|
||||
class EpwingArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
u32string word;
|
||||
vector< u32string > alts;
|
||||
EpwingDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -578,8 +552,8 @@ class EpwingArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
EpwingArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
EpwingArticleRequest( u32string const & word_,
|
||||
vector< u32string > const & alts_,
|
||||
EpwingDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -594,10 +568,10 @@ public:
|
|||
|
||||
void run();
|
||||
|
||||
void getBuiltInArticle( wstring const & word_,
|
||||
void getBuiltInArticle( u32string const & word_,
|
||||
QList< int > & pages,
|
||||
QList< int > & offsets,
|
||||
multimap< wstring, pair< string, string > > & mainArticles );
|
||||
multimap< u32string, pair< string, string > > & mainArticles );
|
||||
|
||||
void cancel() override
|
||||
{
|
||||
|
@ -627,13 +601,13 @@ void EpwingArticleRequest::run()
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< quint32 > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics )
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
|
||||
|
@ -667,11 +641,11 @@ void EpwingArticleRequest::run()
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
if ( ignoreDiacritics )
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
|
||||
|
@ -696,7 +670,7 @@ void EpwingArticleRequest::run()
|
|||
|
||||
string result = "<div class=\"epwing_article\">";
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
result += "<h3>";
|
||||
|
@ -745,10 +719,10 @@ void EpwingArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
void EpwingArticleRequest::getBuiltInArticle( wstring const & word_,
|
||||
void EpwingArticleRequest::getBuiltInArticle( u32string const & word_,
|
||||
QList< int > & pages,
|
||||
QList< int > & offsets,
|
||||
multimap< wstring, pair< string, string > > & mainArticles )
|
||||
multimap< u32string, pair< string, string > > & mainArticles )
|
||||
{
|
||||
try {
|
||||
string headword, articleText;
|
||||
|
@ -782,7 +756,7 @@ void EpwingArticleRequest::getBuiltInArticle( wstring const & word_,
|
|||
}
|
||||
}
|
||||
|
||||
void EpwingDictionary::getHeadwordPos( wstring const & word_, QList< int > & pg, QList< int > & off )
|
||||
void EpwingDictionary::getHeadwordPos( u32string const & word_, QList< int > & pg, QList< int > & off )
|
||||
{
|
||||
try {
|
||||
QMutexLocker _( &eBook.getLibMutex() );
|
||||
|
@ -793,9 +767,9 @@ void EpwingDictionary::getHeadwordPos( wstring const & word_, QList< int > & pg,
|
|||
}
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > EpwingDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > EpwingDictionary::getArticle( u32string const & word,
|
||||
vector< u32string > const & alts,
|
||||
u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -879,10 +853,10 @@ void EpwingResourceRequest::run()
|
|||
}
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Epwing: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "Epwing: Failed loading resource \"%s\" for \"%s\", reason: %s",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
// Resource not loaded -- we don't set the hasAnyData flag then
|
||||
}
|
||||
|
||||
|
@ -908,7 +882,7 @@ sptr< Dictionary::DataRequest > EpwingDictionary::getSearchResults( QString cons
|
|||
ignoreDiacritics );
|
||||
}
|
||||
|
||||
int EpwingDictionary::japaneseWriting( gd::wchar ch )
|
||||
int EpwingDictionary::japaneseWriting( char32_t ch )
|
||||
{
|
||||
if ( ( ch >= 0x30A0 && ch <= 0x30FF ) || ( ch >= 0x31F0 && ch <= 0x31FF ) || ( ch >= 0x3200 && ch <= 0x32FF )
|
||||
|| ( ch >= 0xFF00 && ch <= 0xFFEF ) || ( ch == 0x1B000 ) )
|
||||
|
@ -921,7 +895,7 @@ int EpwingDictionary::japaneseWriting( gd::wchar ch )
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool EpwingDictionary::isSign( gd::wchar ch )
|
||||
bool EpwingDictionary::isSign( char32_t ch )
|
||||
{
|
||||
switch ( ch ) {
|
||||
case 0x002B: // PLUS SIGN
|
||||
|
@ -941,7 +915,7 @@ bool EpwingDictionary::isSign( gd::wchar ch )
|
|||
}
|
||||
}
|
||||
|
||||
bool EpwingDictionary::isJapanesePunctiation( gd::wchar ch )
|
||||
bool EpwingDictionary::isJapanesePunctiation( char32_t ch )
|
||||
{
|
||||
return ch >= 0x3000 && ch <= 0x303F;
|
||||
}
|
||||
|
@ -955,7 +929,7 @@ class EpwingWordSearchRequest: public BtreeIndexing::BtreeWordSearchRequest
|
|||
public:
|
||||
|
||||
EpwingWordSearchRequest( EpwingDictionary & dict_,
|
||||
wstring const & str_,
|
||||
u32string const & str_,
|
||||
unsigned minLength_,
|
||||
int maxSuffixVariation_,
|
||||
bool allowMiddleMatches_,
|
||||
|
@ -1002,13 +976,13 @@ void EpwingWordSearchRequest::findMatches()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > EpwingDictionary::prefixMatch( wstring const & str, unsigned long maxResults )
|
||||
sptr< Dictionary::WordSearchRequest > EpwingDictionary::prefixMatch( u32string const & str, unsigned long maxResults )
|
||||
|
||||
{
|
||||
return std::make_shared< EpwingWordSearchRequest >( *this, str, 0, -1, true, maxResults );
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( wstring const & str,
|
||||
sptr< Dictionary::WordSearchRequest > EpwingDictionary::stemmedMatch( u32string const & str,
|
||||
unsigned minLength,
|
||||
unsigned maxSuffixVariation,
|
||||
unsigned long maxResults )
|
||||
|
@ -1047,20 +1021,20 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
|
|||
chunks.addToBlock( &head.page, sizeof( head.page ) );
|
||||
chunks.addToBlock( &head.offset, sizeof( head.offset ) );
|
||||
|
||||
wstring hw = head.headword.toStdU32String();
|
||||
u32string hw = head.headword.toStdU32String();
|
||||
|
||||
indexedWords.addWord( hw, offset );
|
||||
wordCount++;
|
||||
articleCount++;
|
||||
|
||||
vector< wstring > words;
|
||||
vector< u32string > words;
|
||||
|
||||
// Parse combined kanji/katakana/hiragana headwords
|
||||
|
||||
int w_prev = 0;
|
||||
wstring word;
|
||||
for ( wstring::size_type n = 0; n < hw.size(); n++ ) {
|
||||
gd::wchar ch = hw[ n ];
|
||||
u32string word;
|
||||
for ( u32string::size_type n = 0; n < hw.size(); n++ ) {
|
||||
char32_t ch = hw[ n ];
|
||||
|
||||
if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isSign( ch )
|
||||
|| EpwingDictionary::isJapanesePunctiation( ch ) )
|
||||
|
@ -1070,7 +1044,7 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
|
|||
|
||||
if ( w > 0 ) {
|
||||
// Store only separated words
|
||||
gd::wchar ch_prev = 0;
|
||||
char32_t ch_prev = 0;
|
||||
if ( n )
|
||||
ch_prev = hw[ n - 1 ];
|
||||
bool needStore = ( n == 0 || Folding::isPunct( ch_prev ) || Folding::isWhitespace( ch_prev )
|
||||
|
@ -1078,7 +1052,7 @@ void addWordToChunks( Epwing::Book::EpwingHeadword & head,
|
|||
|
||||
word.push_back( ch );
|
||||
w_prev = w;
|
||||
wstring::size_type i;
|
||||
u32string::size_type i;
|
||||
for ( i = n + 1; i < hw.size(); i++ ) {
|
||||
ch = hw[ i ];
|
||||
if ( Folding::isPunct( ch ) || Folding::isWhitespace( ch ) || EpwingDictionary::isJapanesePunctiation( ch ) )
|
||||
|
@ -1156,7 +1130,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
subBooksNumber = dict.setBook( mainDirectory );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Epwing dictionary initializing failed: %s, error: %s\n", mainDirectory.c_str(), e.what() );
|
||||
qWarning( "Epwing dictionary initializing failed: %s, error: %s", mainDirectory.c_str(), e.what() );
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1191,13 +1165,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
string indexFile = indicesDir + dictId;
|
||||
|
||||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
gdDebug( "Epwing: Building the index for dictionary in directory %s\n", dir.toUtf8().data() );
|
||||
qDebug( "Epwing: Building the index for dictionary in directory %s", dir.toUtf8().data() );
|
||||
|
||||
QString str = dict.title();
|
||||
QByteArray nameData = str.toUtf8();
|
||||
initializing.indexingDictionary( nameData.data() );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader{};
|
||||
|
||||
|
@ -1271,7 +1245,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< EpwingDictionary >( dictId, indexFile, dictFiles, sb ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Epwing dictionary initializing failed: %s, error: %s\n", dir.toUtf8().data(), e.what() );
|
||||
qWarning( "Epwing dictionary initializing failed: %s, error: %s", dir.toUtf8().data(), e.what() );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* This file is (c) 2014 Abs62
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#ifndef NO_EPWING_SUPPORT
|
||||
#ifdef EPWING_SUPPORT
|
||||
|
||||
#include "epwing_book.hh"
|
||||
|
||||
|
@ -9,11 +9,8 @@
|
|||
#include <QTextStream>
|
||||
#include <QTextDocumentFragment>
|
||||
#include <QHash>
|
||||
#include "gddebug.hh"
|
||||
|
||||
#include "audiolink.hh"
|
||||
#include "wstring.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "text.hh"
|
||||
#include "folding.hh"
|
||||
#include "epwing_charmap.hh"
|
||||
#include "htmlescape.hh"
|
||||
|
@ -577,7 +574,7 @@ QString EpwingBook::createCacheDir( QString const & dirName )
|
|||
QFileInfo info( mainCacheDir );
|
||||
if ( !info.exists() || !info.isDir() ) {
|
||||
if ( !dir.mkdir( mainCacheDir ) ) {
|
||||
gdWarning( "Epwing: can't create cache directory \"%s\"", mainCacheDir.toUtf8().data() );
|
||||
qWarning( "Epwing: can't create cache directory \"%s\"", mainCacheDir.toUtf8().data() );
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
@ -586,7 +583,7 @@ QString EpwingBook::createCacheDir( QString const & dirName )
|
|||
info = QFileInfo( cacheDir );
|
||||
if ( !info.exists() || !info.isDir() ) {
|
||||
if ( !dir.mkdir( cacheDir ) ) {
|
||||
gdWarning( "Epwing: can't create cache directory \"%s\"", cacheDir.toUtf8().data() );
|
||||
qWarning( "Epwing: can't create cache directory \"%s\"", cacheDir.toUtf8().data() );
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
@ -1137,7 +1134,7 @@ void EpwingBook::fixHeadword( QString & headword )
|
|||
// return;
|
||||
//}
|
||||
|
||||
gd::wstring folded = Folding::applyPunctOnly( fixed.toStdU32String() );
|
||||
std::u32string folded = Folding::applyPunctOnly( fixed.toStdU32String() );
|
||||
//fixed = QString::fromStdU32String( folded );
|
||||
|
||||
//if( isHeadwordCorrect( fixed ) )
|
||||
|
@ -1263,7 +1260,7 @@ const char * EpwingBook::beginDecoration( unsigned int code )
|
|||
str = "<sup>";
|
||||
break;
|
||||
default:
|
||||
gdWarning( "Epwing: Unknown decoration code %i", code );
|
||||
qWarning( "Epwing: Unknown decoration code %i", code );
|
||||
code = UNKNOWN;
|
||||
break;
|
||||
}
|
||||
|
@ -1285,7 +1282,7 @@ const char * EpwingBook::endDecoration( unsigned int code )
|
|||
storedCode = decorationStack.pop();
|
||||
|
||||
if ( storedCode != code ) {
|
||||
gdWarning( "Epwing: tags mismatch detected" );
|
||||
qWarning( "Epwing: tags mismatch detected" );
|
||||
if ( storedCode == UNKNOWN )
|
||||
storedCode = code;
|
||||
}
|
||||
|
@ -1405,7 +1402,7 @@ QByteArray EpwingBook::handleColorImage( EB_Hook_Code code, const unsigned int *
|
|||
EB_Error_Code ret = eb_set_binary_color_graphic( &book, &pos );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_set_binary_color_graphic", ret );
|
||||
gdWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
|
||||
return QByteArray();
|
||||
}
|
||||
|
||||
|
@ -1443,7 +1440,7 @@ QByteArray EpwingBook::handleColorImage( EB_Hook_Code code, const unsigned int *
|
|||
ret = eb_read_binary( &book, BinaryBufferSize, buffer.data(), &length );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_read_binary", ret );
|
||||
gdWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1480,7 +1477,7 @@ QByteArray EpwingBook::handleMonoImage( EB_Hook_Code code, const unsigned int *
|
|||
EB_Error_Code ret = eb_set_binary_mono_graphic( &book, &pos, monoWidth, monoHeight );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_set_binary_mono_graphic", ret );
|
||||
gdWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
|
||||
return QByteArray();
|
||||
}
|
||||
|
||||
|
@ -1515,7 +1512,7 @@ QByteArray EpwingBook::handleMonoImage( EB_Hook_Code code, const unsigned int *
|
|||
ret = eb_read_binary( &book, BinaryBufferSize, buffer.data(), &length );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_read_binary", ret );
|
||||
gdWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing image retrieve error: %s", error_string.toUtf8().data() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1585,7 +1582,7 @@ QByteArray EpwingBook::handleWave( EB_Hook_Code code, const unsigned int * argv
|
|||
EB_Error_Code ret = eb_read_binary( &book, BinaryBufferSize, buffer.data(), &length );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_read_binary", ret );
|
||||
gdWarning( "Epwing sound retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing sound retrieve error: %s", error_string.toUtf8().data() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1652,7 +1649,7 @@ QByteArray EpwingBook::handleMpeg( EB_Hook_Code code, const unsigned int * argv
|
|||
EB_Error_Code ret = eb_read_binary( &book, BinaryBufferSize, buffer.data(), &length );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_read_binary", ret );
|
||||
gdWarning( "Epwing movie retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing movie retrieve error: %s", error_string.toUtf8().data() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1720,7 +1717,7 @@ QByteArray EpwingBook::handleNarrowFont( const unsigned int * argv, bool text_on
|
|||
EB_Error_Code ret = eb_narrow_font_character_bitmap( &book, *argv, bitmap );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_narrow_font_character_bitmap", ret );
|
||||
gdWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
|
||||
return QByteArray( "?" );
|
||||
}
|
||||
|
||||
|
@ -1729,7 +1726,7 @@ QByteArray EpwingBook::handleNarrowFont( const unsigned int * argv, bool text_on
|
|||
ret = eb_bitmap_to_png( bitmap, 8, 16, buff, &nlen );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_bitmap_to_png", ret );
|
||||
gdWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
|
||||
return QByteArray( "?" );
|
||||
}
|
||||
|
||||
|
@ -1784,7 +1781,7 @@ QByteArray EpwingBook::handleWideFont( const unsigned int * argv, bool text_only
|
|||
EB_Error_Code ret = eb_wide_font_character_bitmap( &book, *argv, bitmap );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_wide_font_character_bitmap", ret );
|
||||
gdWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
|
||||
return QByteArray( "?" );
|
||||
}
|
||||
|
||||
|
@ -1793,7 +1790,7 @@ QByteArray EpwingBook::handleWideFont( const unsigned int * argv, bool text_only
|
|||
ret = eb_bitmap_to_png( bitmap, 16, 16, buff, &wlen );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_bitmap_to_png", ret );
|
||||
gdWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing: Font retrieve error: %s", error_string.toUtf8().data() );
|
||||
return QByteArray( "?" );
|
||||
}
|
||||
|
||||
|
@ -1873,14 +1870,14 @@ bool EpwingBook::getMatches( QString word, QList< QString > & matches )
|
|||
EB_Error_Code ret = eb_search_word( &book, bword.data() );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_search_word", ret );
|
||||
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
return false;
|
||||
}
|
||||
|
||||
ret = eb_hit_list( &book, 10, hits, &hitCount );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_hit_list", ret );
|
||||
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1889,14 +1886,14 @@ bool EpwingBook::getMatches( QString word, QList< QString > & matches )
|
|||
EB_Error_Code ret = eb_search_word( &book, bword2.data() );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_search_word", ret );
|
||||
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
return false;
|
||||
}
|
||||
|
||||
ret = eb_hit_list( &book, 10, hits, &hitCount );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_hit_list", ret );
|
||||
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1944,14 +1941,14 @@ bool EpwingBook::getArticlePos( QString word, QList< int > & pages, QList< int >
|
|||
EB_Error_Code ret = eb_search_exactword( &book, bword.data() );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_search_word", ret );
|
||||
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
return false;
|
||||
}
|
||||
|
||||
ret = eb_hit_list( &book, HitsBufferSize, hits, &hitCount );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_hit_list", ret );
|
||||
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1960,14 +1957,14 @@ bool EpwingBook::getArticlePos( QString word, QList< int > & pages, QList< int >
|
|||
EB_Error_Code ret = eb_search_exactword( &book, bword2.data() );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_search_word", ret );
|
||||
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
return false;
|
||||
}
|
||||
|
||||
ret = eb_hit_list( &book, HitsBufferSize, hits, &hitCount );
|
||||
if ( ret != EB_SUCCESS ) {
|
||||
setErrorString( "eb_hit_list", ret );
|
||||
gdWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
qWarning( "Epwing word search error: %s", error_string.toUtf8().data() );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1995,4 +1992,4 @@ QMutex EpwingBook::libMutex;
|
|||
|
||||
} // namespace Epwing
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -18,8 +18,12 @@
|
|||
#include <QString>
|
||||
#include <QtCore5Compat/QTextCodec>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
// POSIX symbol unavailable on Windows needed for eb headers
|
||||
#ifdef Q_OS_WIN
|
||||
#ifndef _SSIZE_T
|
||||
#define _SSIZE_T
|
||||
#define ssize_t long
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <eb/eb.h>
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* This file is (c) 2014 Abs62
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#ifndef NO_EPWING_SUPPORT
|
||||
#ifdef EPWING_SUPPORT
|
||||
|
||||
#include "epwing_charmap.hh"
|
||||
|
||||
|
|
|
@ -2,15 +2,13 @@
|
|||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "forvo.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include <QNetworkAccessManager>
|
||||
#include <QNetworkReply>
|
||||
#include <QtXml>
|
||||
#include <list>
|
||||
#include "audiolink.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "utf8.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "text.hh"
|
||||
|
||||
namespace Forvo {
|
||||
|
||||
|
@ -20,7 +18,6 @@ namespace {
|
|||
|
||||
class ForvoDictionary: public Dictionary::Class
|
||||
{
|
||||
string name;
|
||||
QString apiKey, languageCode;
|
||||
QNetworkAccessManager & netMgr;
|
||||
|
||||
|
@ -32,22 +29,13 @@ public:
|
|||
QString const & languageCode_,
|
||||
QNetworkAccessManager & netMgr_ ):
|
||||
Dictionary::Class( id, vector< string >() ),
|
||||
name( name_ ),
|
||||
apiKey( apiKey_ ),
|
||||
languageCode( languageCode_ ),
|
||||
netMgr( netMgr_ )
|
||||
{
|
||||
dictionaryName = name_;
|
||||
}
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
map< Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
|
@ -59,7 +47,7 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) override
|
||||
sptr< WordSearchRequest > prefixMatch( std::u32string const & /*word*/, unsigned long /*maxResults*/ ) override
|
||||
{
|
||||
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
|
||||
|
||||
|
@ -68,7 +56,8 @@ public:
|
|||
return sr;
|
||||
}
|
||||
|
||||
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
|
||||
sptr< DataRequest >
|
||||
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -100,8 +89,8 @@ class ForvoArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
ForvoArticleRequest( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
ForvoArticleRequest( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
QString const & apiKey_,
|
||||
QString const & languageCode_,
|
||||
string const & dictionaryId_,
|
||||
|
@ -111,14 +100,16 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
|
||||
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
|
||||
|
||||
private slots:
|
||||
virtual void requestFinished( QNetworkReply * );
|
||||
};
|
||||
|
||||
sptr< DataRequest >
|
||||
ForvoDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool )
|
||||
sptr< DataRequest > ForvoDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool )
|
||||
|
||||
{
|
||||
if ( word.size() > 80 || apiKey.isEmpty() ) {
|
||||
|
@ -148,8 +139,8 @@ void ForvoArticleRequest::cancel()
|
|||
finish();
|
||||
}
|
||||
|
||||
ForvoArticleRequest::ForvoArticleRequest( wstring const & str,
|
||||
vector< wstring > const & alts,
|
||||
ForvoArticleRequest::ForvoArticleRequest( std::u32string const & str,
|
||||
vector< std::u32string > const & alts,
|
||||
QString const & apiKey_,
|
||||
QString const & languageCode_,
|
||||
string const & dictionaryId_,
|
||||
|
@ -167,9 +158,9 @@ ForvoArticleRequest::ForvoArticleRequest( wstring const & str,
|
|||
}
|
||||
}
|
||||
|
||||
void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const & str )
|
||||
void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, std::u32string const & str )
|
||||
{
|
||||
gdDebug( "Forvo: requesting article %s\n", QString::fromStdU32String( str ).toUtf8().data() );
|
||||
qDebug( "Forvo: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() );
|
||||
|
||||
QString key = apiKey;
|
||||
|
||||
|
@ -184,16 +175,16 @@ void ForvoArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const &
|
|||
+ "/language/" + languageCode + "/order/rate-desc" )
|
||||
.toUtf8() );
|
||||
|
||||
// GD_DPRINTF( "req: %s\n", reqUrl.toEncoded().data() );
|
||||
// qDebug( "req: %s", reqUrl.toEncoded().data() );
|
||||
|
||||
sptr< QNetworkReply > netReply = std::shared_ptr< QNetworkReply >( mgr.get( QNetworkRequest( reqUrl ) ) );
|
||||
|
||||
netReplies.push_back( NetReply( netReply, Utf8::encode( str ) ) );
|
||||
netReplies.push_back( NetReply( netReply, Text::toUtf8( str ) ) );
|
||||
}
|
||||
|
||||
void ForvoArticleRequest::requestFinished( QNetworkReply * r )
|
||||
{
|
||||
GD_DPRINTF( "Finished.\n" );
|
||||
qDebug( "Finished." );
|
||||
|
||||
if ( isFinished() ) { // Was cancelled
|
||||
return;
|
||||
|
@ -232,7 +223,7 @@ void ForvoArticleRequest::requestFinished( QNetworkReply * r )
|
|||
QString( tr( "XML parse error: %1 at %2,%3" ).arg( errorStr ).arg( errorLine ).arg( errorColumn ) ) );
|
||||
}
|
||||
else {
|
||||
// GD_DPRINTF( "%s\n", dd.toByteArray().data() );
|
||||
// qDebug( "%s", dd.toByteArray().data() );
|
||||
|
||||
QDomNode items = dd.namedItem( "items" );
|
||||
|
||||
|
@ -335,7 +326,7 @@ void ForvoArticleRequest::requestFinished( QNetworkReply * r )
|
|||
setErrorString( text );
|
||||
}
|
||||
}
|
||||
GD_DPRINTF( "done.\n" );
|
||||
qDebug( "done." );
|
||||
}
|
||||
else {
|
||||
setErrorString( netReply->errorString() );
|
||||
|
|
205
src/dict/gls.cc
205
src/dict/gls.cc
|
@ -8,40 +8,27 @@
|
|||
#include "ufile.hh"
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "utf8.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "text.hh"
|
||||
#include "chunkedstorage.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "dictzip.hh"
|
||||
#include "indexedzip.hh"
|
||||
#include "ftshelpers.hh"
|
||||
|
||||
#include "htmlescape.hh"
|
||||
#include "filetype.hh"
|
||||
#include "tiff.hh"
|
||||
#include "audiolink.hh"
|
||||
|
||||
#include <QString>
|
||||
#include <QSemaphore>
|
||||
#include <QThreadPool>
|
||||
#include <QAtomicInt>
|
||||
// For TIFF conversion
|
||||
#include <QImage>
|
||||
#include <QByteArray>
|
||||
#include <QBuffer>
|
||||
|
||||
#include <QDir>
|
||||
#include <QRegularExpression>
|
||||
#include <QtCore5Compat/QTextCodec>
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
namespace Gls {
|
||||
|
||||
|
@ -51,14 +38,12 @@ using std::set;
|
|||
using std::multimap;
|
||||
using std::pair;
|
||||
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
|
||||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
using BtreeIndexing::IndexInfo;
|
||||
using Utf8::Encoding;
|
||||
using Utf8::LineFeed;
|
||||
using Text::Encoding;
|
||||
using Text::LineFeed;
|
||||
|
||||
/////////////// GlsScanner
|
||||
|
||||
|
@ -67,9 +52,9 @@ class GlsScanner
|
|||
gzFile f;
|
||||
Encoding encoding;
|
||||
QTextCodec * codec;
|
||||
wstring dictionaryName;
|
||||
wstring dictionaryDecription, dictionaryAuthor;
|
||||
wstring langFrom, langTo;
|
||||
std::u32string dictionaryName;
|
||||
std::u32string dictionaryDecription, dictionaryAuthor;
|
||||
std::u32string langFrom, langTo;
|
||||
char readBuffer[ 10000 ];
|
||||
char * readBufferPtr;
|
||||
size_t readBufferLeft;
|
||||
|
@ -94,31 +79,31 @@ public:
|
|||
}
|
||||
|
||||
/// Returns the dictionary's name, as was read from file's headers.
|
||||
wstring const & getDictionaryName() const
|
||||
std::u32string const & getDictionaryName() const
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
/// Returns the dictionary's author, as was read from file's headers.
|
||||
wstring const & getDictionaryAuthor() const
|
||||
std::u32string const & getDictionaryAuthor() const
|
||||
{
|
||||
return dictionaryAuthor;
|
||||
}
|
||||
|
||||
/// Returns the dictionary's description, as was read from file's headers.
|
||||
wstring const & getDictionaryDescription() const
|
||||
std::u32string const & getDictionaryDescription() const
|
||||
{
|
||||
return dictionaryDecription;
|
||||
}
|
||||
|
||||
/// Returns the dictionary's source language, as was read from file's headers.
|
||||
wstring const & getLangFrom() const
|
||||
std::u32string const & getLangFrom() const
|
||||
{
|
||||
return langFrom;
|
||||
}
|
||||
|
||||
/// Returns the dictionary's target language, as was read from file's headers.
|
||||
wstring const & getLangTo() const
|
||||
std::u32string const & getLangTo() const
|
||||
{
|
||||
return langTo;
|
||||
}
|
||||
|
@ -129,7 +114,7 @@ public:
|
|||
/// If end of file is reached, false is returned.
|
||||
/// Reading begins from the first line after the headers (ones which end
|
||||
/// by the "### Glossary section:" line).
|
||||
bool readNextLine( wstring &, size_t & offset );
|
||||
bool readNextLine( std::u32string &, size_t & offset );
|
||||
/// Returns the number of lines read so far from the file.
|
||||
unsigned getLinesRead() const
|
||||
{
|
||||
|
@ -138,7 +123,7 @@ public:
|
|||
};
|
||||
|
||||
GlsScanner::GlsScanner( string const & fileName ):
|
||||
encoding( Utf8::Utf8 ),
|
||||
encoding( Encoding::Utf8 ),
|
||||
readBufferPtr( readBuffer ),
|
||||
readBufferLeft( 0 ),
|
||||
linesRead( 0 )
|
||||
|
@ -164,10 +149,10 @@ GlsScanner::GlsScanner( string const & fileName ):
|
|||
// If the file begins with the dedicated Unicode marker, we just consume
|
||||
// it. If, on the other hand, it's not, we return the bytes back
|
||||
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) {
|
||||
encoding = Utf8::Utf16LE;
|
||||
encoding = Encoding::Utf16LE;
|
||||
}
|
||||
else if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) {
|
||||
encoding = Utf8::Utf16BE;
|
||||
encoding = Encoding::Utf16BE;
|
||||
}
|
||||
else if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) {
|
||||
// Looks like Utf8, read one more byte
|
||||
|
@ -176,29 +161,29 @@ GlsScanner::GlsScanner( string const & fileName ):
|
|||
gzclose( f );
|
||||
throw exMalformedGlsFile( fileName );
|
||||
}
|
||||
encoding = Utf8::Utf8;
|
||||
encoding = Encoding::Utf8;
|
||||
}
|
||||
else {
|
||||
if ( gzrewind( f ) ) {
|
||||
gzclose( f );
|
||||
throw exCantOpen( fileName );
|
||||
}
|
||||
encoding = Utf8::Utf8;
|
||||
encoding = Encoding::Utf8;
|
||||
}
|
||||
|
||||
codec = QTextCodec::codecForName( Utf8::getEncodingNameFor( encoding ) );
|
||||
codec = QTextCodec::codecForName( Text::getEncodingNameFor( encoding ) );
|
||||
// We now can use our own readNextLine() function
|
||||
lineFeed = Utf8::initLineFeed( encoding );
|
||||
lineFeed = Text::initLineFeed( encoding );
|
||||
|
||||
wstring str;
|
||||
wstring * currentField = 0;
|
||||
wstring mark = U"###";
|
||||
wstring titleMark = U"### Glossary title:";
|
||||
wstring authorMark = U"### Author:";
|
||||
wstring descriptionMark = U"### Description:";
|
||||
wstring langFromMark = U"### Source language:";
|
||||
wstring langToMark = U"### Target language:";
|
||||
wstring endOfHeaderMark = U"### Glossary section:";
|
||||
std::u32string str;
|
||||
std::u32string * currentField = 0;
|
||||
std::u32string mark = U"###";
|
||||
std::u32string titleMark = U"### Glossary title:";
|
||||
std::u32string authorMark = U"### Author:";
|
||||
std::u32string descriptionMark = U"### Description:";
|
||||
std::u32string langFromMark = U"### Source language:";
|
||||
std::u32string langToMark = U"### Target language:";
|
||||
std::u32string endOfHeaderMark = U"### Glossary section:";
|
||||
size_t offset;
|
||||
|
||||
for ( ;; ) {
|
||||
|
@ -211,22 +196,22 @@ GlsScanner::GlsScanner( string const & fileName ):
|
|||
currentField = 0;
|
||||
|
||||
if ( str.compare( 0, titleMark.size(), titleMark ) == 0 ) {
|
||||
dictionaryName = wstring( str, titleMark.size(), str.size() - titleMark.size() );
|
||||
dictionaryName = std::u32string( str, titleMark.size(), str.size() - titleMark.size() );
|
||||
currentField = &dictionaryName;
|
||||
}
|
||||
else if ( str.compare( 0, authorMark.size(), authorMark ) == 0 ) {
|
||||
dictionaryAuthor = wstring( str, authorMark.size(), str.size() - authorMark.size() );
|
||||
dictionaryAuthor = std::u32string( str, authorMark.size(), str.size() - authorMark.size() );
|
||||
currentField = &dictionaryAuthor;
|
||||
}
|
||||
else if ( str.compare( 0, descriptionMark.size(), descriptionMark ) == 0 ) {
|
||||
dictionaryDecription = wstring( str, descriptionMark.size(), str.size() - descriptionMark.size() );
|
||||
dictionaryDecription = std::u32string( str, descriptionMark.size(), str.size() - descriptionMark.size() );
|
||||
currentField = &dictionaryDecription;
|
||||
}
|
||||
else if ( str.compare( 0, langFromMark.size(), langFromMark ) == 0 ) {
|
||||
langFrom = wstring( str, langFromMark.size(), str.size() - langFromMark.size() );
|
||||
langFrom = std::u32string( str, langFromMark.size(), str.size() - langFromMark.size() );
|
||||
}
|
||||
else if ( str.compare( 0, langToMark.size(), langToMark ) == 0 ) {
|
||||
langTo = wstring( str, langToMark.size(), str.size() - langToMark.size() );
|
||||
langTo = std::u32string( str, langToMark.size(), str.size() - langToMark.size() );
|
||||
}
|
||||
else if ( str.compare( 0, endOfHeaderMark.size(), endOfHeaderMark ) == 0 ) {
|
||||
break;
|
||||
|
@ -241,7 +226,7 @@ GlsScanner::GlsScanner( string const & fileName ):
|
|||
}
|
||||
}
|
||||
|
||||
bool GlsScanner::readNextLine( wstring & out, size_t & offset )
|
||||
bool GlsScanner::readNextLine( std::u32string & out, size_t & offset )
|
||||
{
|
||||
offset = (size_t)( gztell( f ) - readBufferLeft );
|
||||
|
||||
|
@ -268,7 +253,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset )
|
|||
return false;
|
||||
}
|
||||
|
||||
int pos = Utf8::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
|
||||
int pos = Text::findFirstLinePosition( readBufferPtr, readBufferLeft, lineFeed.lineFeed, lineFeed.length );
|
||||
if ( pos == -1 ) {
|
||||
return false;
|
||||
}
|
||||
|
@ -335,7 +320,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile, bool hasZipFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -361,16 +346,6 @@ public:
|
|||
|
||||
~GlsDictionary();
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.articleCount;
|
||||
|
@ -391,10 +366,12 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
|
||||
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -443,7 +420,7 @@ private:
|
|||
|
||||
GlsDictionary::GlsDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
dz( 0 ),
|
||||
chunks( idx, idxHeader.chunksOffset )
|
||||
|
@ -461,11 +438,7 @@ GlsDictionary::GlsDictionary( string const & id, string const & indexFile, vecto
|
|||
|
||||
idx.seek( sizeof( idxHeader ) );
|
||||
|
||||
vector< char > dName( idx.read< uint32_t >() );
|
||||
if ( dName.size() > 0 ) {
|
||||
idx.read( &dName.front(), dName.size() );
|
||||
dictionaryName = string( &dName.front(), dName.size() );
|
||||
}
|
||||
idx.readU32SizeAndData<>( dictionaryName );
|
||||
|
||||
// Initialize the index
|
||||
|
||||
|
@ -529,11 +502,11 @@ QString const & GlsDictionary::getDescription()
|
|||
|
||||
try {
|
||||
GlsScanner scanner( getDictionaryFilenames()[ 0 ] );
|
||||
string str = Utf8::encode( scanner.getDictionaryAuthor() );
|
||||
string str = Text::toUtf8( scanner.getDictionaryAuthor() );
|
||||
if ( !str.empty() ) {
|
||||
dictionaryDescription = QObject::tr( "Author: %1%2" ).arg( QString::fromUtf8( str.c_str() ) ).arg( "\n\n" );
|
||||
}
|
||||
str = Utf8::encode( scanner.getDictionaryDescription() );
|
||||
str = Text::toUtf8( scanner.getDictionaryDescription() );
|
||||
if ( !str.empty() ) {
|
||||
QString desc = QString::fromUtf8( str.c_str() );
|
||||
desc.replace( "\t", "<br/>" );
|
||||
|
@ -543,7 +516,7 @@ QString const & GlsDictionary::getDescription()
|
|||
}
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "GLS dictionary description reading failed: %s, error: %s\n", getName().c_str(), e.what() );
|
||||
qWarning( "GLS dictionary description reading failed: %s, error: %s", getName().c_str(), e.what() );
|
||||
}
|
||||
|
||||
if ( dictionaryDescription.isEmpty() ) {
|
||||
|
@ -574,14 +547,14 @@ void GlsDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "Gls: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Gls: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Gls: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Gls: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( ftsIdxName.c_str() );
|
||||
}
|
||||
}
|
||||
|
@ -618,7 +591,7 @@ void GlsDictionary::loadArticleText( uint32_t address, vector< string > & headwo
|
|||
}
|
||||
else {
|
||||
string articleData =
|
||||
Iconv::toUtf8( Utf8::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
|
||||
Iconv::toUtf8( Text::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
|
||||
string::size_type start_pos = 0, end_pos = 0;
|
||||
|
||||
for ( ;; ) {
|
||||
|
@ -647,7 +620,7 @@ void GlsDictionary::loadArticleText( uint32_t address, vector< string > & headwo
|
|||
end_pos = 0;
|
||||
for ( ;; ) {
|
||||
end_pos = headword.find( '|', start_pos );
|
||||
if ( end_pos == wstring::npos ) {
|
||||
if ( end_pos == std::u32string::npos ) {
|
||||
string hw = headword.substr( start_pos );
|
||||
if ( !hw.empty() ) {
|
||||
headwords.push_back( hw );
|
||||
|
@ -822,7 +795,7 @@ void GlsDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
text = Html::unescape( QString::fromStdString( articleStr ) );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Gls: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Gls: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -830,7 +803,7 @@ void GlsDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
|
||||
class GlsHeadwordsRequest: public Dictionary::WordSearchRequest
|
||||
{
|
||||
wstring word;
|
||||
std::u32string word;
|
||||
GlsDictionary & dict;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
|
@ -838,7 +811,7 @@ class GlsHeadwordsRequest: public Dictionary::WordSearchRequest
|
|||
|
||||
public:
|
||||
|
||||
GlsHeadwordsRequest( wstring const & word_, GlsDictionary & dict_ ):
|
||||
GlsHeadwordsRequest( std::u32string const & word_, GlsDictionary & dict_ ):
|
||||
word( word_ ),
|
||||
dict( dict_ )
|
||||
{
|
||||
|
@ -871,7 +844,7 @@ void GlsHeadwordsRequest::run()
|
|||
try {
|
||||
vector< WordArticleLink > chain = dict.findArticles( word );
|
||||
|
||||
wstring caseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string caseFolded = Folding::applySimpleCaseOnly( word );
|
||||
|
||||
for ( auto & x : chain ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||
|
@ -884,7 +857,7 @@ void GlsHeadwordsRequest::run()
|
|||
|
||||
dict.loadArticleText( x.articleOffset, headwords, articleText );
|
||||
|
||||
wstring headwordDecoded = Utf8::decode( headwords.front() );
|
||||
std::u32string headwordDecoded = Text::toUtf32( headwords.front() );
|
||||
|
||||
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) {
|
||||
// The headword seems to differ from the input word, which makes the
|
||||
|
@ -902,7 +875,7 @@ void GlsHeadwordsRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( wstring const & word )
|
||||
sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( std::u32string const & word )
|
||||
|
||||
{
|
||||
return synonymSearchEnabled ? std::make_shared< GlsHeadwordsRequest >( word, *this ) :
|
||||
|
@ -915,8 +888,8 @@ sptr< Dictionary::WordSearchRequest > GlsDictionary::findHeadwordsForSynonym( ws
|
|||
class GlsArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
GlsDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -925,8 +898,8 @@ class GlsArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
GlsArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
GlsArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
GlsDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -970,13 +943,13 @@ void GlsArticleRequest::run()
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -1002,16 +975,16 @@ void GlsArticleRequest::run()
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( Utf8::decode( headword ) );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( Text::toUtf32( headword ) );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< std::u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert(
|
||||
pair( Folding::applySimpleCaseOnly( Utf8::decode( headword ) ), pair( headword, articleText ) ) );
|
||||
pair( Folding::applySimpleCaseOnly( Text::toUtf32( headword ) ), pair( headword, articleText ) ) );
|
||||
|
||||
articlesIncluded.insert( x.articleOffset );
|
||||
}
|
||||
|
@ -1024,7 +997,7 @@ void GlsArticleRequest::run()
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< std::u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
result += i->second.second;
|
||||
|
@ -1045,9 +1018,9 @@ void GlsArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > GlsDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > GlsDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -1102,7 +1075,7 @@ void GlsResourceRequest::run()
|
|||
try {
|
||||
string n = dict.getContainingFolder().toStdString() + Utils::Fs::separator() + resourceName;
|
||||
|
||||
GD_DPRINTF( "gls resource name is %s\n", n.c_str() );
|
||||
qDebug( "gls resource name is %s", n.c_str() );
|
||||
|
||||
try {
|
||||
QMutexLocker _( &dataMutex );
|
||||
|
@ -1123,7 +1096,7 @@ void GlsResourceRequest::run()
|
|||
if ( dict.resourceZip.isOpen() ) {
|
||||
QMutexLocker _( &dataMutex );
|
||||
|
||||
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
|
||||
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
|
||||
throw; // Make it fail since we couldn't read the archive
|
||||
}
|
||||
}
|
||||
|
@ -1187,10 +1160,10 @@ void GlsResourceRequest::run()
|
|||
hasAnyData = true;
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "GLS: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "GLS: Failed loading resource \"%s\" for \"%s\", reason: %s",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
// Resource not loaded -- we don't set the hasAnyData flag then
|
||||
}
|
||||
|
||||
|
@ -1265,12 +1238,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// which the incident happened. We need alive scanner for that.
|
||||
|
||||
// Building the index
|
||||
initializing.indexingDictionary( Utf8::encode( scanner.getDictionaryName() ) );
|
||||
initializing.indexingDictionary( Text::toUtf8( scanner.getDictionaryName() ) );
|
||||
|
||||
gdDebug( "Gls: Building the index for dictionary: %s\n",
|
||||
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
|
||||
qDebug( "Gls: Building the index for dictionary: %s",
|
||||
QString::fromStdU32String( scanner.getDictionaryName() ).toUtf8().data() );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader;
|
||||
|
||||
|
@ -1281,18 +1254,18 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
idx.write( idxHeader );
|
||||
|
||||
string dictionaryName = Utf8::encode( scanner.getDictionaryName() );
|
||||
string dictionaryName = Text::toUtf8( scanner.getDictionaryName() );
|
||||
|
||||
idx.write( (uint32_t)dictionaryName.size() );
|
||||
idx.write( dictionaryName.data(), dictionaryName.size() );
|
||||
|
||||
idxHeader.glsEncoding = scanner.getEncoding();
|
||||
idxHeader.glsEncoding = static_cast< uint32_t >( scanner.getEncoding() );
|
||||
|
||||
IndexedWords indexedWords;
|
||||
|
||||
ChunkedStorage::Writer chunks( idx );
|
||||
|
||||
wstring curString;
|
||||
std::u32string curString;
|
||||
size_t curOffset;
|
||||
|
||||
uint32_t articleCount = 0, wordCount = 0;
|
||||
|
@ -1312,12 +1285,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
// Parse headwords
|
||||
|
||||
list< wstring > allEntryWords;
|
||||
wstring::size_type start_pos = 0, end_pos = 0;
|
||||
list< std::u32string > allEntryWords;
|
||||
std::u32string::size_type start_pos = 0, end_pos = 0;
|
||||
for ( ;; ) {
|
||||
end_pos = curString.find( '|', start_pos );
|
||||
if ( end_pos == wstring::npos ) {
|
||||
wstring headword = curString.substr( start_pos );
|
||||
if ( end_pos == std::u32string::npos ) {
|
||||
std::u32string headword = curString.substr( start_pos );
|
||||
if ( !headword.empty() ) {
|
||||
allEntryWords.push_back( headword );
|
||||
}
|
||||
|
@ -1370,7 +1343,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// If there was a zip file, index it too
|
||||
|
||||
if ( zipFileName.size() ) {
|
||||
GD_DPRINTF( "Indexing zip file\n" );
|
||||
qDebug( "Indexing zip file" );
|
||||
|
||||
idxHeader.hasZipFile = 1;
|
||||
|
||||
|
@ -1435,7 +1408,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< GlsDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "GLS dictionary reading failed: %s:%u, error: %s\n", fileName.c_str(), atLine, e.what() );
|
||||
qWarning( "GLS dictionary reading failed: %s:%u, error: %s", fileName.c_str(), atLine, e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,46 +2,30 @@
|
|||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "hunspell.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "iconv.hh"
|
||||
#include "folding.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "language.hh"
|
||||
#include "langcoder.hh"
|
||||
|
||||
#include <QRunnable>
|
||||
#include <QThreadPool>
|
||||
#include <QSemaphore>
|
||||
|
||||
#include <QRegularExpression>
|
||||
|
||||
#include <QDir>
|
||||
#include <QCoreApplication>
|
||||
#include <QFileInfo>
|
||||
|
||||
#include <set>
|
||||
#ifndef INCLUDE_LIBRARY_PATH
|
||||
#include <hunspell.hxx>
|
||||
#else
|
||||
#include <hunspell/hunspell.hxx>
|
||||
#endif
|
||||
#include "gddebug.hh"
|
||||
|
||||
#include "utils.hh"
|
||||
#include <QtConcurrent>
|
||||
#include <QtConcurrentRun>
|
||||
#include <hunspell/hunspell.hxx>
|
||||
|
||||
namespace HunspellMorpho {
|
||||
|
||||
using namespace Dictionary;
|
||||
|
||||
using gd::wchar;
|
||||
|
||||
namespace {
|
||||
|
||||
class HunspellDictionary: public Dictionary::Class
|
||||
{
|
||||
string name;
|
||||
Hunspell hunspell;
|
||||
|
||||
#ifdef Q_OS_WIN32
|
||||
|
@ -56,23 +40,13 @@ public:
|
|||
/// files[ 0 ] should be .aff file, files[ 1 ] should be .dic file.
|
||||
HunspellDictionary( string const & id, string const & name_, vector< string > const & files ):
|
||||
Dictionary::Class( id, files ),
|
||||
name( name_ ),
|
||||
#ifdef Q_OS_WIN32
|
||||
hunspell( Utf8ToLocal8Bit( files[ 0 ] ).c_str(), Utf8ToLocal8Bit( files[ 1 ] ).c_str() )
|
||||
#else
|
||||
hunspell( files[ 0 ].c_str(), files[ 1 ].c_str() )
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
map< Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Property, string >();
|
||||
dictionaryName = name_;
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
|
@ -85,18 +59,19 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
|
||||
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
|
||||
|
||||
sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
|
||||
sptr< WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
|
||||
|
||||
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
|
||||
sptr< DataRequest >
|
||||
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
|
||||
|
||||
bool isLocalDictionary() override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
vector< wstring > getAlternateWritings( const wstring & word ) noexcept override;
|
||||
vector< std::u32string > getAlternateWritings( const std::u32string & word ) noexcept override;
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -117,27 +92,23 @@ private:
|
|||
// QMutex hunspellMutex;
|
||||
};
|
||||
|
||||
/// Encodes the given string to be passed to the hunspell object. May throw
|
||||
/// Iconv::Ex
|
||||
string encodeToHunspell( Hunspell &, wstring const & );
|
||||
|
||||
/// Decodes the given string returned by the hunspell object. May throw
|
||||
/// Iconv::Ex
|
||||
wstring decodeFromHunspell( Hunspell &, char const * );
|
||||
std::u32string decodeFromHunspell( Hunspell &, char const * );
|
||||
|
||||
/// Generates suggestions via hunspell
|
||||
QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hunspell );
|
||||
QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex, Hunspell & hunspell );
|
||||
|
||||
/// Generates suggestions for compound expression
|
||||
void getSuggestionsForExpression( wstring const & expression,
|
||||
vector< wstring > & suggestions,
|
||||
void getSuggestionsForExpression( std::u32string const & expression,
|
||||
vector< std::u32string > & suggestions,
|
||||
QMutex & hunspellMutex,
|
||||
Hunspell & hunspell );
|
||||
|
||||
/// Returns true if the string contains whitespace, false otherwise
|
||||
bool containsWhitespace( wstring const & str )
|
||||
bool containsWhitespace( std::u32string const & str )
|
||||
{
|
||||
wchar const * next = str.c_str();
|
||||
char32_t const * next = str.c_str();
|
||||
|
||||
for ( ; *next; ++next ) {
|
||||
if ( Folding::isWhitespace( *next ) ) {
|
||||
|
@ -167,9 +138,9 @@ void HunspellDictionary::loadIcon() noexcept
|
|||
dictionaryIconLoaded = true;
|
||||
}
|
||||
|
||||
vector< wstring > HunspellDictionary::getAlternateWritings( wstring const & word ) noexcept
|
||||
vector< std::u32string > HunspellDictionary::getAlternateWritings( std::u32string const & word ) noexcept
|
||||
{
|
||||
vector< wstring > results;
|
||||
vector< std::u32string > results;
|
||||
|
||||
if ( containsWhitespace( word ) ) {
|
||||
getSuggestionsForExpression( word, results, getHunspellMutex(), hunspell );
|
||||
|
@ -185,14 +156,14 @@ class HunspellArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
QMutex & hunspellMutex;
|
||||
Hunspell & hunspell;
|
||||
wstring word;
|
||||
std::u32string word;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
QFuture< void > f;
|
||||
|
||||
public:
|
||||
|
||||
HunspellArticleRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
|
||||
HunspellArticleRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
|
||||
hunspellMutex( hunspellMutex_ ),
|
||||
hunspell( hunspell_ ),
|
||||
word( word_ )
|
||||
|
@ -226,7 +197,7 @@ void HunspellArticleRequest::run()
|
|||
vector< string > suggestions;
|
||||
|
||||
try {
|
||||
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
|
||||
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
|
||||
|
||||
if ( containsWhitespace( trimmedWord ) ) {
|
||||
// For now we don't analyze whitespace-containing phrases
|
||||
|
@ -236,25 +207,25 @@ void HunspellArticleRequest::run()
|
|||
|
||||
QMutexLocker _( &hunspellMutex );
|
||||
|
||||
string encodedWord = encodeToHunspell( hunspell, trimmedWord );
|
||||
string trimmedWord_utf8 = Iconv::toUtf8( Text::utf32_le, trimmedWord );
|
||||
|
||||
if ( hunspell.spell( encodedWord ) ) {
|
||||
if ( hunspell.spell( trimmedWord_utf8 ) ) {
|
||||
// Good word -- no spelling suggestions then.
|
||||
finish();
|
||||
return;
|
||||
}
|
||||
|
||||
suggestions = hunspell.suggest( encodedWord );
|
||||
suggestions = hunspell.suggest( trimmedWord_utf8 );
|
||||
if ( !suggestions.empty() ) {
|
||||
// There were some suggestions made for us. Make an appropriate output.
|
||||
|
||||
string result = "<div class=\"gdspellsuggestion\">"
|
||||
+ Html::escape( QCoreApplication::translate( "Hunspell", "Spelling suggestions: " ).toUtf8().data() );
|
||||
|
||||
wstring lowercasedWord = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string lowercasedWord = Folding::applySimpleCaseOnly( word );
|
||||
|
||||
for ( vector< string >::size_type x = 0; x < suggestions.size(); ++x ) {
|
||||
wstring suggestion = decodeFromHunspell( hunspell, suggestions[ x ].c_str() );
|
||||
std::u32string suggestion = decodeFromHunspell( hunspell, suggestions[ x ].c_str() );
|
||||
|
||||
if ( Folding::applySimpleCaseOnly( suggestion ) == lowercasedWord ) {
|
||||
// If among suggestions we see the same word just with the different
|
||||
|
@ -265,7 +236,7 @@ void HunspellArticleRequest::run()
|
|||
|
||||
return;
|
||||
}
|
||||
string suggestionUtf8 = Utf8::encode( suggestion );
|
||||
string suggestionUtf8 = Text::toUtf8( suggestion );
|
||||
|
||||
result += "<a href=\"bword:";
|
||||
result += Html::escape( suggestionUtf8 ) + "\">";
|
||||
|
@ -284,17 +255,19 @@ void HunspellArticleRequest::run()
|
|||
}
|
||||
}
|
||||
catch ( Iconv::Ex & e ) {
|
||||
gdWarning( "Hunspell: charset conversion error, no processing's done: %s\n", e.what() );
|
||||
qWarning( "Hunspell: charset conversion error, no processing's done: %s", e.what() );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Hunspell: error: %s\n", e.what() );
|
||||
qWarning( "Hunspell: error: %s", e.what() );
|
||||
}
|
||||
|
||||
finish();
|
||||
}
|
||||
|
||||
sptr< DataRequest >
|
||||
HunspellDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
|
||||
sptr< DataRequest > HunspellDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const &,
|
||||
std::u32string const &,
|
||||
bool )
|
||||
|
||||
{
|
||||
return std::make_shared< HunspellArticleRequest >( word, getHunspellMutex(), hunspell );
|
||||
|
@ -307,7 +280,7 @@ class HunspellHeadwordsRequest: public Dictionary::WordSearchRequest
|
|||
|
||||
QMutex & hunspellMutex;
|
||||
Hunspell & hunspell;
|
||||
wstring word;
|
||||
std::u32string word;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
QFuture< void > f;
|
||||
|
@ -315,7 +288,7 @@ class HunspellHeadwordsRequest: public Dictionary::WordSearchRequest
|
|||
|
||||
public:
|
||||
|
||||
HunspellHeadwordsRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
|
||||
HunspellHeadwordsRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
|
||||
hunspellMutex( hunspellMutex_ ),
|
||||
hunspell( hunspell_ ),
|
||||
word( word_ )
|
||||
|
@ -347,7 +320,7 @@ void HunspellHeadwordsRequest::run()
|
|||
return;
|
||||
}
|
||||
|
||||
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
|
||||
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
|
||||
|
||||
if ( trimmedWord.size() > 80 ) {
|
||||
// We won't do anything for overly long sentences since that would probably
|
||||
|
@ -357,7 +330,7 @@ void HunspellHeadwordsRequest::run()
|
|||
}
|
||||
|
||||
if ( containsWhitespace( trimmedWord ) ) {
|
||||
vector< wstring > results;
|
||||
vector< std::u32string > results;
|
||||
|
||||
getSuggestionsForExpression( trimmedWord, results, hunspellMutex, hunspell );
|
||||
|
||||
|
@ -367,7 +340,7 @@ void HunspellHeadwordsRequest::run()
|
|||
}
|
||||
}
|
||||
else {
|
||||
QList< wstring > suggestions = suggest( trimmedWord, hunspellMutex, hunspell );
|
||||
QList< std::u32string > suggestions = suggest( trimmedWord, hunspellMutex, hunspell );
|
||||
|
||||
if ( !suggestions.empty() ) {
|
||||
QMutexLocker _( &dataMutex );
|
||||
|
@ -381,22 +354,18 @@ void HunspellHeadwordsRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hunspell )
|
||||
QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex, Hunspell & hunspell )
|
||||
{
|
||||
QList< wstring > result;
|
||||
|
||||
vector< string > suggestions;
|
||||
QList< std::u32string > result;
|
||||
|
||||
try {
|
||||
QMutexLocker _( &hunspellMutex );
|
||||
|
||||
string encodedWord = encodeToHunspell( hunspell, word );
|
||||
|
||||
suggestions = hunspell.analyze( encodedWord );
|
||||
auto suggestions = hunspell.analyze( Iconv::toUtf8( Text::utf32_le, word ) );
|
||||
if ( !suggestions.empty() ) {
|
||||
// There were some suggestions made for us. Make an appropriate output.
|
||||
|
||||
wstring lowercasedWord = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string lowercasedWord = Folding::applySimpleCaseOnly( word );
|
||||
|
||||
static QRegularExpression cutStem( R"(^\s*st:(((\s+(?!\w{2}:)(?!-)(?!\+))|\S+)+))" );
|
||||
|
||||
|
@ -409,11 +378,11 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
|
|||
suggestion.chop( suggestion.length() - n );
|
||||
}
|
||||
|
||||
GD_DPRINTF( ">>>Sugg: %s\n", suggestion.toLocal8Bit().data() );
|
||||
qDebug( ">>>Sugg: %s", suggestion.toLocal8Bit().data() );
|
||||
|
||||
auto match = cutStem.match( suggestion.trimmed() );
|
||||
if ( match.hasMatch() ) {
|
||||
wstring alt = match.captured( 1 ).toStdU32String();
|
||||
std::u32string alt = match.captured( 1 ).toStdU32String();
|
||||
|
||||
if ( Folding::applySimpleCaseOnly( alt ) != lowercasedWord ) // No point in providing same word
|
||||
{
|
||||
|
@ -424,14 +393,14 @@ QList< wstring > suggest( wstring & word, QMutex & hunspellMutex, Hunspell & hun
|
|||
}
|
||||
}
|
||||
catch ( Iconv::Ex & e ) {
|
||||
gdWarning( "Hunspell: charset conversion error, no processing's done: %s\n", e.what() );
|
||||
qWarning( "Hunspell: charset conversion error, no processing's done: %s", e.what() );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
sptr< WordSearchRequest > HunspellDictionary::findHeadwordsForSynonym( wstring const & word )
|
||||
sptr< WordSearchRequest > HunspellDictionary::findHeadwordsForSynonym( std::u32string const & word )
|
||||
|
||||
{
|
||||
return std::make_shared< HunspellHeadwordsRequest >( word, getHunspellMutex(), hunspell );
|
||||
|
@ -445,14 +414,14 @@ class HunspellPrefixMatchRequest: public Dictionary::WordSearchRequest
|
|||
|
||||
QMutex & hunspellMutex;
|
||||
Hunspell & hunspell;
|
||||
wstring word;
|
||||
std::u32string word;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
QFuture< void > f;
|
||||
|
||||
public:
|
||||
|
||||
HunspellPrefixMatchRequest( wstring const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
|
||||
HunspellPrefixMatchRequest( std::u32string const & word_, QMutex & hunspellMutex_, Hunspell & hunspell_ ):
|
||||
hunspellMutex( hunspellMutex_ ),
|
||||
hunspell( hunspell_ ),
|
||||
word( word_ )
|
||||
|
@ -485,7 +454,7 @@ void HunspellPrefixMatchRequest::run()
|
|||
}
|
||||
|
||||
try {
|
||||
wstring trimmedWord = Folding::trimWhitespaceOrPunct( word );
|
||||
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( word );
|
||||
|
||||
if ( trimmedWord.empty() || containsWhitespace( trimmedWord ) ) {
|
||||
// For now we don't analyze whitespace-containing phrases
|
||||
|
@ -495,9 +464,7 @@ void HunspellPrefixMatchRequest::run()
|
|||
|
||||
QMutexLocker _( &hunspellMutex );
|
||||
|
||||
string encodedWord = encodeToHunspell( hunspell, trimmedWord );
|
||||
|
||||
if ( hunspell.spell( encodedWord ) ) {
|
||||
if ( hunspell.spell( Iconv::toUtf8( Text::utf32_le, trimmedWord ) ) ) {
|
||||
// Known word -- add it to the result
|
||||
|
||||
QMutexLocker _( &dataMutex );
|
||||
|
@ -506,20 +473,20 @@ void HunspellPrefixMatchRequest::run()
|
|||
}
|
||||
}
|
||||
catch ( Iconv::Ex & e ) {
|
||||
gdWarning( "Hunspell: charset conversion error, no processing's done: %s\n", e.what() );
|
||||
qWarning( "Hunspell: charset conversion error, no processing's done: %s", e.what() );
|
||||
}
|
||||
|
||||
finish();
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > HunspellDictionary::prefixMatch( wstring const & word, unsigned long /*maxResults*/ )
|
||||
sptr< WordSearchRequest > HunspellDictionary::prefixMatch( std::u32string const & word, unsigned long /*maxResults*/ )
|
||||
|
||||
{
|
||||
return std::make_shared< HunspellPrefixMatchRequest >( word, getHunspellMutex(), hunspell );
|
||||
}
|
||||
|
||||
void getSuggestionsForExpression( wstring const & expression,
|
||||
vector< wstring > & suggestions,
|
||||
void getSuggestionsForExpression( std::u32string const & expression,
|
||||
vector< std::u32string > & suggestions,
|
||||
QMutex & hunspellMutex,
|
||||
Hunspell & hunspell )
|
||||
{
|
||||
|
@ -527,15 +494,15 @@ void getSuggestionsForExpression( wstring const & expression,
|
|||
// This is useful for compound expressions where some words is
|
||||
// in different form, e.g. "dozing off" -> "doze off".
|
||||
|
||||
wstring trimmedWord = Folding::trimWhitespaceOrPunct( expression );
|
||||
wstring word, punct;
|
||||
QList< wstring > words;
|
||||
std::u32string trimmedWord = Folding::trimWhitespaceOrPunct( expression );
|
||||
std::u32string word, punct;
|
||||
QList< std::u32string > words;
|
||||
|
||||
suggestions.clear();
|
||||
|
||||
// Parse string to separate words
|
||||
|
||||
for ( wchar const * c = trimmedWord.c_str();; ++c ) {
|
||||
for ( char32_t const * c = trimmedWord.c_str();; ++c ) {
|
||||
if ( !*c || Folding::isPunct( *c ) || Folding::isWhitespace( *c ) ) {
|
||||
if ( word.size() ) {
|
||||
words.push_back( word );
|
||||
|
@ -566,7 +533,7 @@ void getSuggestionsForExpression( wstring const & expression,
|
|||
|
||||
// Combine result strings from suggestions
|
||||
|
||||
QList< wstring > results;
|
||||
QList< std::u32string > results;
|
||||
|
||||
for ( const auto & i : words ) {
|
||||
word = i;
|
||||
|
@ -576,13 +543,13 @@ void getSuggestionsForExpression( wstring const & expression,
|
|||
}
|
||||
}
|
||||
else {
|
||||
QList< wstring > sugg = suggest( word, hunspellMutex, hunspell );
|
||||
QList< std::u32string > sugg = suggest( word, hunspellMutex, hunspell );
|
||||
int suggNum = sugg.size() + 1;
|
||||
if ( suggNum > 3 ) {
|
||||
suggNum = 3;
|
||||
}
|
||||
int resNum = results.size();
|
||||
wstring resultStr;
|
||||
std::u32string resultStr;
|
||||
|
||||
if ( resNum == 0 ) {
|
||||
for ( int k = 0; k < suggNum; k++ ) {
|
||||
|
@ -612,35 +579,17 @@ void getSuggestionsForExpression( wstring const & expression,
|
|||
}
|
||||
}
|
||||
|
||||
string encodeToHunspell( Hunspell & hunspell, wstring const & str )
|
||||
{
|
||||
Iconv conv( Iconv::GdWchar );
|
||||
|
||||
void const * in = str.data();
|
||||
size_t inLeft = str.size() * sizeof( wchar );
|
||||
|
||||
vector< char > result( str.size() * 4 + 1 ); // +1 isn't actually needed,
|
||||
// but then iconv complains on empty
|
||||
// words
|
||||
|
||||
void * out = &result.front();
|
||||
size_t outLeft = result.size();
|
||||
|
||||
QString convStr = conv.convert( in, inLeft );
|
||||
return convStr.toStdString();
|
||||
}
|
||||
|
||||
wstring decodeFromHunspell( Hunspell & hunspell, char const * str )
|
||||
std::u32string decodeFromHunspell( Hunspell & hunspell, char const * str )
|
||||
{
|
||||
Iconv conv( hunspell.get_dic_encoding() );
|
||||
|
||||
void const * in = str;
|
||||
size_t inLeft = strlen( str );
|
||||
|
||||
vector< wchar > result( inLeft + 1 ); // +1 isn't needed, but see above
|
||||
vector< char32_t > result( inLeft + 1 ); // +1 isn't needed, but see above
|
||||
|
||||
void * out = &result.front();
|
||||
size_t outLeft = result.size() * sizeof( wchar );
|
||||
size_t outLeft = result.size() * sizeof( char32_t );
|
||||
|
||||
QString convStr = conv.convert( in, inLeft );
|
||||
return convStr.toStdU32String();
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include "lingualibre.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "audiolink.hh"
|
||||
|
||||
#include <QJsonArray>
|
||||
|
@ -40,8 +40,8 @@ class LinguaArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
LinguaArticleRequest( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
LinguaArticleRequest( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
QString const & languageCode_,
|
||||
QString const & langWikipediaID_,
|
||||
string const & dictionaryId_,
|
||||
|
@ -51,7 +51,7 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
|
||||
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
|
||||
|
||||
private slots:
|
||||
virtual void requestFinished( QNetworkReply * );
|
||||
|
@ -60,7 +60,6 @@ private slots:
|
|||
class LinguaDictionary: public Dictionary::Class
|
||||
{
|
||||
|
||||
string name;
|
||||
QString languageCode;
|
||||
QString langWikipediaID;
|
||||
QNetworkAccessManager & netMgr;
|
||||
|
@ -68,10 +67,10 @@ class LinguaDictionary: public Dictionary::Class
|
|||
public:
|
||||
LinguaDictionary( string const & id, string name_, QString languageCode_, QNetworkAccessManager & netMgr_ ):
|
||||
Dictionary::Class( id, vector< string >() ),
|
||||
name( std::move( name_ ) ),
|
||||
languageCode( std::move( languageCode_ ) ),
|
||||
netMgr( netMgr_ )
|
||||
{
|
||||
dictionaryName = name_;
|
||||
/* map of iso lang code to wikipedia lang id
|
||||
|
||||
Data was obtained by this query on https://commons-query.wikimedia.org/
|
||||
|
@ -166,16 +165,6 @@ WHERE {
|
|||
}
|
||||
}
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
map< Property, string > getProperties() noexcept override
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return 0;
|
||||
|
@ -186,7 +175,7 @@ WHERE {
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ ) override
|
||||
sptr< WordSearchRequest > prefixMatch( std::u32string const & /*word*/, unsigned long /*maxResults*/ ) override
|
||||
{
|
||||
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
|
||||
|
||||
|
@ -195,7 +184,10 @@ WHERE {
|
|||
return sr;
|
||||
}
|
||||
|
||||
sptr< DataRequest > getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ) override
|
||||
sptr< DataRequest > getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ) override
|
||||
{
|
||||
if ( word.size() < 50 ) {
|
||||
return std::make_shared< LinguaArticleRequest >( word, alts, languageCode, langWikipediaID, getId(), netMgr );
|
||||
|
@ -242,8 +234,8 @@ void LinguaArticleRequest::cancel()
|
|||
finish();
|
||||
}
|
||||
|
||||
LinguaArticleRequest::LinguaArticleRequest( const wstring & str,
|
||||
const vector< wstring > & alts,
|
||||
LinguaArticleRequest::LinguaArticleRequest( const std::u32string & str,
|
||||
const vector< std::u32string > & alts,
|
||||
const QString & languageCode_,
|
||||
const QString & langWikipediaID,
|
||||
const string & dictionaryId_,
|
||||
|
@ -256,7 +248,7 @@ LinguaArticleRequest::LinguaArticleRequest( const wstring & str,
|
|||
addQuery( mgr, str );
|
||||
}
|
||||
|
||||
void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const wstring & word )
|
||||
void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const std::u32string & word )
|
||||
{
|
||||
|
||||
// Doc of the <https://www.mediawiki.org/wiki/API:Query>
|
||||
|
@ -284,7 +276,7 @@ void LinguaArticleRequest::addQuery( QNetworkAccessManager & mgr, const wstring
|
|||
auto netReply = std::shared_ptr< QNetworkReply >( mgr.get( netRequest ) );
|
||||
|
||||
|
||||
netReplies.emplace_back( netReply, Utf8::encode( word ) );
|
||||
netReplies.emplace_back( netReply, Text::toUtf8( word ) );
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -11,17 +11,10 @@
|
|||
#include "dict/sounddir.hh"
|
||||
#include "dict/hunspell.hh"
|
||||
#include "dictdfiles.hh"
|
||||
#include "dict/romaji.hh"
|
||||
#include "dict/customtransliteration.hh"
|
||||
#include "dict/russiantranslit.hh"
|
||||
#include "dict/german.hh"
|
||||
#include "dict/greektranslit.hh"
|
||||
#include "dict/belarusiantranslit.hh"
|
||||
#include "dict/website.hh"
|
||||
#include "dict/forvo.hh"
|
||||
#include "dict/programs.hh"
|
||||
#include "dict/voiceengines.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "dict/xdxf.hh"
|
||||
#include "dict/sdict.hh"
|
||||
#include "dict/aard.hh"
|
||||
|
@ -34,12 +27,19 @@
|
|||
#include "dict/lingualibre.hh"
|
||||
#include "metadata.hh"
|
||||
|
||||
#ifndef NO_EPWING_SUPPORT
|
||||
#include "dict/transliteration/belarusian.hh"
|
||||
#include "dict/transliteration/custom.hh"
|
||||
#include "dict/transliteration/german.hh"
|
||||
#include "dict/transliteration/greek.hh"
|
||||
#include "dict/transliteration/romaji.hh"
|
||||
#include "dict/transliteration/russian.hh"
|
||||
|
||||
#ifdef EPWING_SUPPORT
|
||||
#include "dict/epwing.hh"
|
||||
#endif
|
||||
|
||||
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT
|
||||
#include "dict/chinese.hh"
|
||||
#include "dict/transliteration/chinese.hh"
|
||||
#endif
|
||||
|
||||
#include <QMessageBox>
|
||||
|
@ -83,7 +83,7 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
|
|||
<< "*.zim"
|
||||
<< "*.zimaa"
|
||||
#endif
|
||||
#ifndef NO_EPWING_SUPPORT
|
||||
#ifdef EPWING_SUPPORT
|
||||
<< "*catalogs"
|
||||
#endif
|
||||
;
|
||||
|
@ -181,7 +181,7 @@ void LoadDictionaries::handlePath( Config::Path const & path )
|
|||
#ifdef MAKE_ZIM_SUPPORT
|
||||
addDicts( Zim::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this, maxHeadwordToExpand ) );
|
||||
#endif
|
||||
#ifndef NO_EPWING_SUPPORT
|
||||
#ifdef EPWING_SUPPORT
|
||||
addDicts( Epwing::makeDictionaries( allFiles, Config::getIndexDir().toStdString(), *this ) );
|
||||
#endif
|
||||
}
|
||||
|
@ -243,10 +243,10 @@ void loadDictionaries( QWidget * parent,
|
|||
///// We create transliterations synchronously since they are very simple
|
||||
|
||||
#ifdef MAKE_CHINESE_CONVERSION_SUPPORT
|
||||
addDicts( Chinese::makeDictionaries( cfg.transliteration.chinese ) );
|
||||
addDicts( ChineseTranslit::makeDictionaries( cfg.transliteration.chinese ) );
|
||||
#endif
|
||||
|
||||
addDicts( Romaji::makeDictionaries( cfg.transliteration.romaji ) );
|
||||
addDicts( RomajiTranslit::makeDictionaries( cfg.transliteration.romaji ) );
|
||||
addDicts( CustomTranslit::makeDictionaries( cfg.transliteration.customTrans ) );
|
||||
|
||||
// Make Russian transliteration
|
||||
|
@ -274,13 +274,13 @@ void loadDictionaries( QWidget * parent,
|
|||
addDicts( Forvo::makeDictionaries( loadDicts, cfg.forvo, dictNetMgr ) );
|
||||
addDicts( Lingua::makeDictionaries( loadDicts, cfg.lingua, dictNetMgr ) );
|
||||
addDicts( Programs::makeDictionaries( cfg.programs ) );
|
||||
#ifndef NO_TTS_SUPPORT
|
||||
#ifdef TTS_SUPPORT
|
||||
addDicts( VoiceEngines::makeDictionaries( cfg.voiceEngines ) );
|
||||
#endif
|
||||
addDicts( DictServer::makeDictionaries( cfg.dictServers ) );
|
||||
|
||||
|
||||
GD_DPRINTF( "Load done\n" );
|
||||
qDebug( "Load done" );
|
||||
|
||||
// Remove any stale index files
|
||||
|
||||
|
@ -290,12 +290,12 @@ void loadDictionaries( QWidget * parent,
|
|||
for ( unsigned x = dictionaries.size(); x--; ) {
|
||||
ret = ids.insert( dictionaries[ x ]->getId() );
|
||||
if ( !ret.second ) {
|
||||
gdWarning( R"(Duplicate dictionary ID found: ID=%s, name="%s", path="%s")",
|
||||
dictionaries[ x ]->getId().c_str(),
|
||||
dictionaries[ x ]->getName().c_str(),
|
||||
dictionaries[ x ]->getDictionaryFilenames().empty() ?
|
||||
"" :
|
||||
dictionaries[ x ]->getDictionaryFilenames()[ 0 ].c_str() );
|
||||
qWarning( R"(Duplicate dictionary ID found: ID=%s, name="%s", path="%s")",
|
||||
dictionaries[ x ]->getId().c_str(),
|
||||
dictionaries[ x ]->getName().c_str(),
|
||||
dictionaries[ x ]->getDictionaryFilenames().empty() ?
|
||||
"" :
|
||||
dictionaries[ x ]->getDictionaryFilenames()[ 0 ].c_str() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,19 +5,14 @@
|
|||
#include "dictfile.hh"
|
||||
#include "iconv.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "btreeidx.hh"
|
||||
|
||||
#include "audiolink.hh"
|
||||
#include "gddebug.hh"
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#define OV_EXCLUDE_STATIC_CALLBACKS
|
||||
#include <vorbis/vorbisfile.h>
|
||||
#include <QDir>
|
||||
|
@ -29,7 +24,6 @@
|
|||
namespace Lsa {
|
||||
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using std::map;
|
||||
using std::multimap;
|
||||
using std::set;
|
||||
|
@ -65,7 +59,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -149,7 +143,7 @@ Entry::Entry( File::Index & f )
|
|||
// Read the size of the recording, in samples
|
||||
samplesLength = f.read< uint32_t >();
|
||||
|
||||
name = Iconv::toUtf8( Iconv::Utf16Le, &filenameBuffer.front(), read * sizeof( uint16_t ) );
|
||||
name = Iconv::toUtf8( Text::utf16_le, &filenameBuffer.front(), read * sizeof( uint16_t ) );
|
||||
}
|
||||
|
||||
class LsaDictionary: public BtreeIndexing::BtreeDictionary
|
||||
|
@ -164,11 +158,6 @@ public:
|
|||
|
||||
string getName() noexcept override;
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.soundsCount;
|
||||
|
@ -179,8 +168,10 @@ public:
|
|||
return getArticleCount();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -201,7 +192,7 @@ string LsaDictionary::getName() noexcept
|
|||
|
||||
LsaDictionary::LsaDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() )
|
||||
{
|
||||
// Initialize the index
|
||||
|
@ -209,9 +200,9 @@ LsaDictionary::LsaDictionary( string const & id, string const & indexFile, vecto
|
|||
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > LsaDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -225,13 +216,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, string > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, string > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -246,12 +237,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, string > & mapToUse = ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
multimap< std::u32string, string > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.word ) );
|
||||
|
||||
|
@ -264,7 +256,7 @@ sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, string >::const_iterator i;
|
||||
multimap< std::u32string, string >::const_iterator i;
|
||||
|
||||
result += "<table class=\"lsa_play\">";
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
|
@ -399,13 +391,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getResource( string const & name
|
|||
|
||||
string strippedName = Utils::endsWithIgnoreCase( name, ".wav" ) ? string( name, 0, name.size() - 4 ) : name;
|
||||
|
||||
vector< WordArticleLink > chain = findArticles( Utf8::decode( strippedName ) );
|
||||
vector< WordArticleLink > chain = findArticles( Text::toUtf32( strippedName ) );
|
||||
|
||||
if ( chain.empty() ) {
|
||||
return std::make_shared< Dictionary::DataRequestInstant >( false ); // No such resource
|
||||
}
|
||||
|
||||
File::Index f( getDictionaryFilenames()[ 0 ], "rb" );
|
||||
File::Index f( getDictionaryFilenames()[ 0 ], QIODevice::ReadOnly );
|
||||
|
||||
f.seek( chain[ 0 ].articleOffset );
|
||||
Entry e( f );
|
||||
|
@ -468,13 +460,13 @@ sptr< Dictionary::DataRequest > LsaDictionary::getResource( string const & name
|
|||
long result = ov_read( &vf, ptr, left, 0, 2, 1, &bitstream );
|
||||
|
||||
if ( result <= 0 ) {
|
||||
gdWarning( "Failed to read Vorbis data (code = %ld)\n", result );
|
||||
qWarning( "Failed to read Vorbis data (code = %ld)", result );
|
||||
memset( ptr, 0, left );
|
||||
break;
|
||||
}
|
||||
|
||||
if ( result > left ) {
|
||||
GD_FDPRINTF( stderr, "Warning: Vorbis decode returned more data than requested.\n" );
|
||||
qWarning( "Warning: Vorbis decode returned more data than requested." );
|
||||
|
||||
result = left;
|
||||
}
|
||||
|
@ -522,7 +514,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
|
||||
try {
|
||||
File::Index f( *i, "rb" );
|
||||
File::Index f( *i, QIODevice::ReadOnly );
|
||||
|
||||
/// Check the signature
|
||||
|
||||
|
@ -543,11 +535,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
// Building the index
|
||||
|
||||
gdDebug( "Lsa: Building the index for dictionary: %s\n", i->c_str() );
|
||||
qDebug( "Lsa: Building the index for dictionary: %s", i->c_str() );
|
||||
|
||||
initializing.indexingDictionary( Utils::Fs::basename( *i ) );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader;
|
||||
|
||||
|
@ -563,7 +555,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
/// XXX handle big-endian machines here!
|
||||
auto entriesCount = f.read< uint32_t >();
|
||||
|
||||
GD_DPRINTF( "%s: %u entries\n", i->c_str(), entriesCount );
|
||||
qDebug( "%s: %u entries", i->c_str(), entriesCount );
|
||||
|
||||
idxHeader.soundsCount = entriesCount;
|
||||
|
||||
|
@ -578,11 +570,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// Remove the extension, no need for that in the index
|
||||
e.name = stripExtension( e.name );
|
||||
|
||||
GD_DPRINTF( "Read filename %s (%u at %u)<\n", e.name.c_str(), e.samplesLength, e.samplesOffset );
|
||||
qDebug( "Read filename %s (%u at %u)<", e.name.c_str(), e.samplesLength, e.samplesOffset );
|
||||
|
||||
// Insert new entry into an index
|
||||
|
||||
indexedWords.addWord( Utf8::decode( e.name ), offset );
|
||||
indexedWords.addWord( Text::toUtf32( e.name ), offset );
|
||||
}
|
||||
|
||||
idxHeader.vorbisOffset = f.tell();
|
||||
|
@ -617,7 +609,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< LsaDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Lingvo's LSA reading failed: %s, error: %s\n", i->c_str(), e.what() );
|
||||
qWarning( "Lingvo's LSA reading failed: %s, error: %s", i->c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,7 +35,6 @@
|
|||
#include <QtCore5Compat/QTextCodec>
|
||||
|
||||
#include "decompress.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "ripemd.hh"
|
||||
#include "utils.hh"
|
||||
#include "htmlescape.hh"
|
||||
|
@ -118,7 +117,7 @@ bool MdictParser::open( const char * filename )
|
|||
filename_ = QString::fromUtf8( filename );
|
||||
file_ = new QFile( filename_ );
|
||||
|
||||
gdDebug( "MdictParser: open %s", filename );
|
||||
qDebug( "MdictParser: open %s", filename );
|
||||
|
||||
if ( file_.isNull() || !file_->exists() ) {
|
||||
return false;
|
||||
|
@ -233,7 +232,7 @@ bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize,
|
|||
case 0x00000000:
|
||||
// No compression
|
||||
if ( !checkAdler32( buf, size, checksum ) ) {
|
||||
gdWarning( "MDict: parseCompressedBlock: plain: checksum not match" );
|
||||
qWarning( "MDict: parseCompressedBlock: plain: checksum not match" );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -248,13 +247,13 @@ bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize,
|
|||
result = lzo1x_decompress_safe( (const uchar *)buf, size, (uchar *)decompressedBlock.data(), &blockSize, NULL );
|
||||
|
||||
if ( result != LZO_E_OK || blockSize != (lzo_uint)decompressedBlockSize ) {
|
||||
gdWarning( "MDict: parseCompressedBlock: decompression failed" );
|
||||
qWarning( "MDict: parseCompressedBlock: decompression failed" );
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( checksum
|
||||
!= lzo_adler32( lzo_adler32( 0, NULL, 0 ), (const uchar *)decompressedBlock.constData(), blockSize ) ) {
|
||||
gdWarning( "MDict: parseCompressedBlock: lzo: checksum does not match" );
|
||||
qWarning( "MDict: parseCompressedBlock: lzo: checksum does not match" );
|
||||
return false;
|
||||
}
|
||||
} break;
|
||||
|
@ -263,12 +262,12 @@ bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize,
|
|||
// zlib compression
|
||||
decompressedBlock = zlibDecompress( buf, size, checksum );
|
||||
if ( decompressedBlock.isEmpty() ) {
|
||||
gdWarning( "MDict: parseCompressedBlock: zlib: failed to decompress or checksum does not match" );
|
||||
qWarning( "MDict: parseCompressedBlock: zlib: failed to decompress or checksum does not match" );
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
gdWarning( "MDict: parseCompressedBlock: unknown type" );
|
||||
qWarning( "MDict: parseCompressedBlock: unknown type" );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -320,7 +319,7 @@ bool MdictParser::readHeader( QDataStream & in )
|
|||
in.setByteOrder( QDataStream::LittleEndian );
|
||||
in >> checksum;
|
||||
if ( !checkAdler32( headerTextUtf16.constData(), headerTextUtf16.size(), checksum ) ) {
|
||||
gdWarning( "MDict: readHeader: checksum does not match" );
|
||||
qWarning( "MDict: readHeader: checksum does not match" );
|
||||
return false;
|
||||
}
|
||||
headerTextUtf16.clear();
|
||||
|
|
121
src/dict/mdx.cc
121
src/dict/mdx.cc
|
@ -4,29 +4,21 @@
|
|||
#include "mdx.hh"
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "dictfile.hh"
|
||||
#include "wstring.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "text.hh"
|
||||
#include "chunkedstorage.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "langcoder.hh"
|
||||
|
||||
#include "audiolink.hh"
|
||||
#include "ex.hh"
|
||||
#include "mdictparser.hh"
|
||||
#include "filetype.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "htmlescape.hh"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <list>
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include "globalregex.hh"
|
||||
#include "tiff.hh"
|
||||
#include "utils.hh"
|
||||
|
@ -35,16 +27,15 @@
|
|||
#include <QDir>
|
||||
#include <QRegularExpression>
|
||||
#include <QString>
|
||||
#include <QStringBuilder>
|
||||
#include <QThreadPool>
|
||||
#include <QtConcurrent>
|
||||
#include <QtConcurrentRun>
|
||||
|
||||
namespace Mdx {
|
||||
|
||||
using std::map;
|
||||
using std::multimap;
|
||||
using std::set;
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
using std::list;
|
||||
using std::pair;
|
||||
using std::string;
|
||||
|
@ -135,7 +126,7 @@ public:
|
|||
|
||||
/// Checks whether the given file exists in the mdd file or not.
|
||||
/// Note that this function is thread-safe, since it does not access mdd file.
|
||||
bool hasFile( gd::wstring const & name )
|
||||
bool hasFile( std::u32string const & name )
|
||||
{
|
||||
if ( !isFileOpen ) {
|
||||
return false;
|
||||
|
@ -146,7 +137,7 @@ public:
|
|||
|
||||
/// Attempts loading the given file into the given vector. Returns true on
|
||||
/// success, false otherwise.
|
||||
bool loadFile( gd::wstring const & name, std::vector< char > & result )
|
||||
bool loadFile( std::u32string const & name, std::vector< char > & result )
|
||||
{
|
||||
if ( !isFileOpen ) {
|
||||
return false;
|
||||
|
@ -218,16 +209,6 @@ public:
|
|||
|
||||
void deferredInit() override;
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.articleCount;
|
||||
|
@ -248,8 +229,10 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
QString const & getDescription() override;
|
||||
|
||||
|
@ -297,12 +280,12 @@ private:
|
|||
|
||||
friend class MdxArticleRequest;
|
||||
friend class MddResourceRequest;
|
||||
void loadResourceFile( const wstring & resourceName, vector< char > & data );
|
||||
void loadResourceFile( const std::u32string & resourceName, vector< char > & data );
|
||||
};
|
||||
|
||||
MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxFileName( indexFile ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
chunks( idx, idxHeader.chunksOffset ),
|
||||
|
@ -310,12 +293,7 @@ MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vecto
|
|||
{
|
||||
// Read the dictionary's name
|
||||
idx.seek( sizeof( idxHeader ) );
|
||||
size_t len = idx.read< uint32_t >();
|
||||
vector< char > buf( len );
|
||||
if ( len > 0 ) {
|
||||
idx.read( &buf.front(), len );
|
||||
dictionaryName = string( &buf.front(), len );
|
||||
}
|
||||
idx.readU32SizeAndData<>( dictionaryName );
|
||||
|
||||
//fallback, use filename as dictionary name
|
||||
if ( dictionaryName.empty() ) {
|
||||
|
@ -324,12 +302,7 @@ MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vecto
|
|||
}
|
||||
|
||||
// then read the dictionary's encoding
|
||||
len = idx.read< uint32_t >();
|
||||
if ( len > 0 ) {
|
||||
buf.resize( len );
|
||||
idx.read( &buf.front(), len );
|
||||
encoding = string( &buf.front(), len );
|
||||
}
|
||||
idx.readU32SizeAndData<>( encoding );
|
||||
|
||||
dictFile.setFileName( QString::fromUtf8( dictionaryFiles[ 0 ].c_str() ) );
|
||||
dictFile.open( QIODevice::ReadOnly );
|
||||
|
@ -467,7 +440,7 @@ void MdxDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
// return;
|
||||
|
||||
|
||||
gdDebug( "MDict: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
qDebug( "MDict: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
auto _dict = std::make_shared< MdxDictionary >( this->getId(), idxFileName, this->getDictionaryFilenames() );
|
||||
|
@ -478,7 +451,7 @@ void MdxDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "MDict: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
qWarning( "MDict: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( ftsIdxName.c_str() );
|
||||
}
|
||||
}
|
||||
|
@ -493,7 +466,7 @@ void MdxDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
text = Html::unescape( QString::fromUtf8( articleText.data(), articleText.size() ) );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "MDict: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
qWarning( "MDict: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -514,8 +487,8 @@ sptr< Dictionary::DataRequest > MdxDictionary::getSearchResults( QString const &
|
|||
|
||||
class MdxArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
MdxDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -524,8 +497,8 @@ class MdxArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
MdxArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
MdxArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
MdxDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -628,8 +601,8 @@ void MdxArticleRequest::run()
|
|||
|
||||
// Handle internal redirects
|
||||
if ( strncmp( articleBody.c_str(), "@@@LINK=", 8 ) == 0 ) {
|
||||
wstring target = Utf8::decode( articleBody.c_str() + 8 );
|
||||
target = Folding::trimWhitespace( target );
|
||||
std::u32string target = Text::toUtf32( articleBody.c_str() + 8 );
|
||||
target = Folding::trimWhitespace( target );
|
||||
// Make an additional query for this redirection
|
||||
vector< WordArticleLink > altChain = dict.findArticles( target );
|
||||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
|
@ -652,9 +625,9 @@ void MdxArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const wstring & word,
|
||||
const vector< wstring > & alts,
|
||||
const wstring &,
|
||||
sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const std::u32string & word,
|
||||
const vector< std::u32string > & alts,
|
||||
const std::u32string &,
|
||||
bool ignoreDiacritics )
|
||||
{
|
||||
return std::make_shared< MdxArticleRequest >( word, alts, *this, ignoreDiacritics );
|
||||
|
@ -664,7 +637,7 @@ sptr< Dictionary::DataRequest > MdxDictionary::getArticle( const wstring & word,
|
|||
class MddResourceRequest: public Dictionary::DataRequest
|
||||
{
|
||||
MdxDictionary & dict;
|
||||
wstring resourceName;
|
||||
std::u32string resourceName;
|
||||
QAtomicInt isCancelled;
|
||||
QFuture< void > f;
|
||||
|
||||
|
@ -673,7 +646,7 @@ public:
|
|||
MddResourceRequest( MdxDictionary & dict_, string const & resourceName_ ):
|
||||
Dictionary::DataRequest( &dict_ ),
|
||||
dict( dict_ ),
|
||||
resourceName( Utf8::decode( resourceName_ ) )
|
||||
resourceName( Text::toUtf32( resourceName_ ) )
|
||||
{
|
||||
f = QtConcurrent::run( [ this ]() {
|
||||
this->run();
|
||||
|
@ -748,7 +721,7 @@ void MddResourceRequest::run()
|
|||
}
|
||||
|
||||
// In order to prevent recursive internal redirection...
|
||||
set< wstring, std::less<> > resourceIncluded;
|
||||
set< std::u32string, std::less<> > resourceIncluded;
|
||||
|
||||
for ( ;; ) {
|
||||
// Some runnables linger enough that they are cancelled before they start
|
||||
|
@ -756,7 +729,7 @@ void MddResourceRequest::run()
|
|||
finish();
|
||||
return;
|
||||
}
|
||||
string u8ResourceName = Utf8::encode( resourceName );
|
||||
string u8ResourceName = Text::toUtf8( resourceName );
|
||||
if ( !resourceIncluded.insert( resourceName ).second ) {
|
||||
finish();
|
||||
return;
|
||||
|
@ -904,7 +877,8 @@ QString & MdxDictionary::filterResource( QString & article )
|
|||
void MdxDictionary::replaceLinks( QString & id, QString & article )
|
||||
{
|
||||
QString articleNewText;
|
||||
int linkPos = 0;
|
||||
qsizetype linkPos = 0;
|
||||
|
||||
QRegularExpressionMatchIterator it = RX::Mdx::allLinksRe.globalMatch( article );
|
||||
while ( it.hasNext() ) {
|
||||
QRegularExpressionMatch allLinksMatch = it.next();
|
||||
|
@ -980,7 +954,8 @@ void MdxDictionary::replaceLinks( QString & id, QString & article )
|
|||
articleNewText += linkTxt;
|
||||
match = RX::Mdx::closeScriptTagRe.match( article, linkPos );
|
||||
if ( match.hasMatch() ) {
|
||||
articleNewText += article.mid( linkPos, match.capturedEnd() - linkPos );
|
||||
articleNewText += QString( QStringLiteral( "gdOnReady(()=>{%1});</script>" ) )
|
||||
.arg( article.mid( linkPos, match.capturedStart() - linkPos ) );
|
||||
linkPos = match.capturedEnd();
|
||||
}
|
||||
continue;
|
||||
|
@ -1141,7 +1116,7 @@ QString MdxDictionary::getCachedFileName( QString filename )
|
|||
QFileInfo info( cacheDirName );
|
||||
if ( !info.exists() || !info.isDir() ) {
|
||||
if ( !dir.mkdir( cacheDirName ) ) {
|
||||
gdWarning( "Mdx: can't create cache directory \"%s\"", cacheDirName.toUtf8().data() );
|
||||
qWarning( "Mdx: can't create cache directory \"%s\"", cacheDirName.toUtf8().data() );
|
||||
return QString();
|
||||
}
|
||||
}
|
||||
|
@ -1159,7 +1134,7 @@ QString MdxDictionary::getCachedFileName( QString filename )
|
|||
QFileInfo dirInfo( dirName );
|
||||
if ( !dirInfo.exists() ) {
|
||||
if ( !dir.mkdir( dirName ) ) {
|
||||
gdWarning( "Mdx: can't create cache directory \"%s\"", dirName.toUtf8().data() );
|
||||
qWarning( "Mdx: can't create cache directory \"%s\"", dirName.toUtf8().data() );
|
||||
return QString();
|
||||
}
|
||||
}
|
||||
|
@ -1174,14 +1149,14 @@ QString MdxDictionary::getCachedFileName( QString filename )
|
|||
}
|
||||
QFile f( fullName );
|
||||
if ( !f.open( QFile::WriteOnly ) ) {
|
||||
gdWarning( R"(Mdx: file "%s" creating error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
|
||||
qWarning( R"(Mdx: file "%s" creating error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
|
||||
return QString();
|
||||
}
|
||||
gd::wstring resourceName = filename.toStdU32String();
|
||||
std::u32string resourceName = filename.toStdU32String();
|
||||
vector< char > data;
|
||||
|
||||
// In order to prevent recursive internal redirection...
|
||||
set< wstring, std::less<> > resourceIncluded;
|
||||
set< std::u32string, std::less<> > resourceIncluded;
|
||||
|
||||
for ( ;; ) {
|
||||
if ( !resourceIncluded.insert( resourceName ).second ) {
|
||||
|
@ -1214,16 +1189,16 @@ QString MdxDictionary::getCachedFileName( QString filename )
|
|||
f.close();
|
||||
|
||||
if ( n < (qint64)data.size() ) {
|
||||
gdWarning( R"(Mdx: file "%s" writing error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
|
||||
qWarning( R"(Mdx: file "%s" writing error: "%s")", fullName.toUtf8().data(), f.errorString().toUtf8().data() );
|
||||
return QString();
|
||||
}
|
||||
return fullName;
|
||||
}
|
||||
|
||||
void MdxDictionary::loadResourceFile( const wstring & resourceName, vector< char > & data )
|
||||
void MdxDictionary::loadResourceFile( const std::u32string & resourceName, vector< char > & data )
|
||||
{
|
||||
wstring newResourceName = resourceName;
|
||||
string u8ResourceName = Utf8::encode( resourceName );
|
||||
std::u32string newResourceName = resourceName;
|
||||
string u8ResourceName = Text::toUtf8( resourceName );
|
||||
|
||||
// Convert to the Windows separator
|
||||
std::replace( newResourceName.begin(), newResourceName.end(), '/', '\\' );
|
||||
|
@ -1307,7 +1282,7 @@ private:
|
|||
|
||||
static bool indexIsOldOrBad( vector< string > const & dictFiles, string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
IdxHeader header;
|
||||
|
||||
return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || header.signature != kSignature
|
||||
|
@ -1362,7 +1337,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( dictFiles, indexFile ) ) {
|
||||
// Building the index
|
||||
|
||||
gdDebug( "MDict: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
qDebug( "MDict: Building the index for dictionary: %s", fileName.c_str() );
|
||||
|
||||
MdictParser parser;
|
||||
list< sptr< MdictParser > > mddParsers;
|
||||
|
@ -1378,14 +1353,14 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( File::exists( *mddIter ) ) {
|
||||
sptr< MdictParser > mddParser = std::make_shared< MdictParser >();
|
||||
if ( !mddParser->open( mddIter->c_str() ) ) {
|
||||
gdWarning( "Broken mdd (resource) file: %s\n", mddIter->c_str() );
|
||||
qWarning( "Broken mdd (resource) file: %s", mddIter->c_str() );
|
||||
continue;
|
||||
}
|
||||
mddParsers.push_back( mddParser );
|
||||
}
|
||||
}
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
IdxHeader idxHeader;
|
||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||
// We write a dummy header first. At the end of the process the header
|
||||
|
@ -1453,7 +1428,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// Finish with the chunks
|
||||
idxHeader.chunksOffset = chunks.finish();
|
||||
|
||||
GD_DPRINTF( "Writing index...\n" );
|
||||
qDebug( "Writing index..." );
|
||||
|
||||
// Good. Now build the index
|
||||
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
|
||||
|
|
|
@ -2,14 +2,12 @@
|
|||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "mediawiki.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include <QNetworkAccessManager>
|
||||
#include <QNetworkReply>
|
||||
#include <QUrl>
|
||||
#include <QtXml>
|
||||
#include <algorithm>
|
||||
#include <list>
|
||||
#include "gddebug.hh"
|
||||
#include "audiolink.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "utils.hh"
|
||||
|
@ -57,11 +55,6 @@ public:
|
|||
return name;
|
||||
}
|
||||
|
||||
map< Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return 0;
|
||||
|
@ -72,9 +65,10 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > prefixMatch( wstring const &, unsigned long maxResults ) override;
|
||||
sptr< WordSearchRequest > prefixMatch( std::u32string const &, unsigned long maxResults ) override;
|
||||
|
||||
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
|
||||
sptr< DataRequest >
|
||||
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
|
||||
|
||||
quint32 getLangFrom() const override
|
||||
{
|
||||
|
@ -139,7 +133,10 @@ class MediaWikiWordSearchRequest: public MediaWikiWordSearchRequestSlots
|
|||
|
||||
public:
|
||||
|
||||
MediaWikiWordSearchRequest( wstring const &, QString const & url, QString const & lang, QNetworkAccessManager & mgr );
|
||||
MediaWikiWordSearchRequest( std::u32string const &,
|
||||
QString const & url,
|
||||
QString const & lang,
|
||||
QNetworkAccessManager & mgr );
|
||||
|
||||
~MediaWikiWordSearchRequest();
|
||||
|
||||
|
@ -150,13 +147,13 @@ private:
|
|||
void downloadFinished() override;
|
||||
};
|
||||
|
||||
MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( wstring const & str,
|
||||
MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( std::u32string const & str,
|
||||
QString const & url,
|
||||
QString const & lang,
|
||||
QNetworkAccessManager & mgr ):
|
||||
isCancelling( false )
|
||||
{
|
||||
GD_DPRINTF( "wiki request begin\n" );
|
||||
qDebug( "wiki request begin" );
|
||||
QUrl reqUrl( url + "/api.php?action=query&list=allpages&aplimit=40&format=xml" );
|
||||
|
||||
GlobalBroadcaster::instance()->addWhitelist( reqUrl.host() );
|
||||
|
@ -180,7 +177,7 @@ MediaWikiWordSearchRequest::MediaWikiWordSearchRequest( wstring const & str,
|
|||
|
||||
MediaWikiWordSearchRequest::~MediaWikiWordSearchRequest()
|
||||
{
|
||||
GD_DPRINTF( "request end\n" );
|
||||
qDebug( "request end" );
|
||||
}
|
||||
|
||||
void MediaWikiWordSearchRequest::cancel()
|
||||
|
@ -194,7 +191,7 @@ void MediaWikiWordSearchRequest::cancel()
|
|||
|
||||
finish();
|
||||
|
||||
GD_DPRINTF( "cancel the request" );
|
||||
qDebug( "cancel the request" );
|
||||
}
|
||||
|
||||
void MediaWikiWordSearchRequest::downloadFinished()
|
||||
|
@ -227,7 +224,7 @@ void MediaWikiWordSearchRequest::downloadFinished()
|
|||
}
|
||||
}
|
||||
}
|
||||
GD_DPRINTF( "done.\n" );
|
||||
qDebug( "done." );
|
||||
}
|
||||
else {
|
||||
setErrorString( netReply->errorString() );
|
||||
|
@ -258,11 +255,11 @@ public:
|
|||
|
||||
QDomElement const sectionsElement = parseNode.firstChildElement( "sections" );
|
||||
if ( sectionsElement.isNull() ) {
|
||||
gdWarning( "MediaWiki: empty table of contents and missing sections element." );
|
||||
qWarning( "MediaWiki: empty table of contents and missing sections element." );
|
||||
return;
|
||||
}
|
||||
|
||||
gdDebug( "MediaWiki: generating table of contents from the sections element." );
|
||||
qDebug( "MediaWiki: generating table of contents from the sections element." );
|
||||
MediaWikiSectionsParser parser;
|
||||
parser.generateTableOfContents( sectionsElement );
|
||||
articleString.replace( emptyTocPos, emptyTocIndicator.size(), parser.tableOfContents );
|
||||
|
@ -343,17 +340,17 @@ bool MediaWikiSectionsParser::addListLevel( QString const & levelString )
|
|||
int const level = levelString.toInt( &convertedToInt );
|
||||
|
||||
if ( !convertedToInt ) {
|
||||
gdWarning( "MediaWiki: sections level is not an integer: %s", levelString.toUtf8().constData() );
|
||||
qWarning( "MediaWiki: sections level is not an integer: %s", levelString.toUtf8().constData() );
|
||||
return false;
|
||||
}
|
||||
if ( level <= 0 ) {
|
||||
gdWarning( "MediaWiki: unsupported nonpositive sections level: %s", levelString.toUtf8().constData() );
|
||||
qWarning( "MediaWiki: unsupported nonpositive sections level: %s", levelString.toUtf8().constData() );
|
||||
return false;
|
||||
}
|
||||
if ( level > previousLevel + 1 ) {
|
||||
gdWarning( "MediaWiki: unsupported sections level increase by more than one: from %d to %s",
|
||||
previousLevel,
|
||||
levelString.toUtf8().constData() );
|
||||
qWarning( "MediaWiki: unsupported sections level increase by more than one: from %d to %s",
|
||||
previousLevel,
|
||||
levelString.toUtf8().constData() );
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -396,8 +393,8 @@ class MediaWikiArticleRequest: public MediaWikiDataRequestSlots
|
|||
|
||||
public:
|
||||
|
||||
MediaWikiArticleRequest( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
MediaWikiArticleRequest( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
QString const & url,
|
||||
QString const & lang,
|
||||
QNetworkAccessManager & mgr,
|
||||
|
@ -407,7 +404,7 @@ public:
|
|||
|
||||
private:
|
||||
|
||||
void addQuery( QNetworkAccessManager & mgr, wstring const & word );
|
||||
void addQuery( QNetworkAccessManager & mgr, std::u32string const & word );
|
||||
|
||||
void requestFinished( QNetworkReply * ) override;
|
||||
|
||||
|
@ -441,8 +438,8 @@ void MediaWikiArticleRequest::cancel()
|
|||
finish();
|
||||
}
|
||||
|
||||
MediaWikiArticleRequest::MediaWikiArticleRequest( wstring const & str,
|
||||
vector< wstring > const & alts,
|
||||
MediaWikiArticleRequest::MediaWikiArticleRequest( std::u32string const & str,
|
||||
vector< std::u32string > const & alts,
|
||||
QString const & url_,
|
||||
QString const & lang_,
|
||||
QNetworkAccessManager & mgr,
|
||||
|
@ -464,9 +461,9 @@ MediaWikiArticleRequest::MediaWikiArticleRequest( wstring const & str,
|
|||
}
|
||||
}
|
||||
|
||||
void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring const & str )
|
||||
void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, std::u32string const & str )
|
||||
{
|
||||
gdDebug( "MediaWiki: requesting article %s\n", QString::fromStdU32String( str ).toUtf8().data() );
|
||||
qDebug( "MediaWiki: requesting article %s", QString::fromStdU32String( str ).toUtf8().data() );
|
||||
|
||||
QUrl reqUrl( url + "/api.php?action=parse&prop=text|revid|sections&format=xml&redirects" );
|
||||
|
||||
|
@ -490,7 +487,7 @@ void MediaWikiArticleRequest::addQuery( QNetworkAccessManager & mgr, wstring con
|
|||
|
||||
void MediaWikiArticleRequest::requestFinished( QNetworkReply * r )
|
||||
{
|
||||
GD_DPRINTF( "Finished.\n" );
|
||||
qDebug( "Finished." );
|
||||
|
||||
if ( isFinished() ) { // Was cancelled
|
||||
return;
|
||||
|
@ -693,7 +690,7 @@ void MediaWikiArticleRequest::requestFinished( QNetworkReply * r )
|
|||
}
|
||||
}
|
||||
}
|
||||
GD_DPRINTF( "done.\n" );
|
||||
qDebug( "done." );
|
||||
}
|
||||
else {
|
||||
setErrorString( netReply->errorString() );
|
||||
|
@ -711,7 +708,7 @@ void MediaWikiArticleRequest::requestFinished( QNetworkReply * r )
|
|||
}
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( wstring const & word, unsigned long maxResults )
|
||||
sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( std::u32string const & word, unsigned long maxResults )
|
||||
|
||||
{
|
||||
(void)maxResults;
|
||||
|
@ -725,8 +722,10 @@ sptr< WordSearchRequest > MediaWikiDictionary::prefixMatch( wstring const & word
|
|||
}
|
||||
}
|
||||
|
||||
sptr< DataRequest >
|
||||
MediaWikiDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const &, bool )
|
||||
sptr< DataRequest > MediaWikiDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool )
|
||||
|
||||
{
|
||||
if ( word.size() > 80 ) {
|
||||
|
|
|
@ -4,8 +4,7 @@
|
|||
#include "programs.hh"
|
||||
#include "audiolink.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "utf8.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "text.hh"
|
||||
#include "iconv.hh"
|
||||
#include "utils.hh"
|
||||
#include "globalbroadcaster.hh"
|
||||
|
@ -36,11 +35,6 @@ public:
|
|||
return prg.name.toUtf8().data();
|
||||
}
|
||||
|
||||
map< Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return 0;
|
||||
|
@ -51,16 +45,17 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long maxResults ) override;
|
||||
sptr< WordSearchRequest > prefixMatch( std::u32string const & word, unsigned long maxResults ) override;
|
||||
|
||||
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
|
||||
sptr< DataRequest >
|
||||
getArticle( std::u32string const &, vector< std::u32string > const & alts, std::u32string const &, bool ) override;
|
||||
|
||||
protected:
|
||||
|
||||
void loadIcon() noexcept override;
|
||||
};
|
||||
|
||||
sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( wstring const & word, unsigned long /*maxResults*/ )
|
||||
sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( std::u32string const & word, unsigned long /*maxResults*/ )
|
||||
|
||||
{
|
||||
if ( prg.type == Config::Program::PrefixMatch ) {
|
||||
|
@ -75,8 +70,10 @@ sptr< WordSearchRequest > ProgramsDictionary::prefixMatch( wstring const & word,
|
|||
}
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
ProgramsDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
|
||||
sptr< Dictionary::DataRequest > ProgramsDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const &,
|
||||
std::u32string const &,
|
||||
bool )
|
||||
|
||||
{
|
||||
switch ( prg.type ) {
|
||||
|
@ -84,7 +81,7 @@ ProgramsDictionary::getArticle( wstring const & word, vector< wstring > const &,
|
|||
// Audio results are instantaneous
|
||||
string result;
|
||||
|
||||
string wordUtf8( Utf8::encode( word ) );
|
||||
string wordUtf8( Text::toUtf8( word ) );
|
||||
|
||||
result += "<table class=\"programs_play\"><tr>";
|
||||
|
||||
|
|
|
@ -6,14 +6,13 @@
|
|||
#include <QProcess>
|
||||
#include "dictionary.hh"
|
||||
#include "config.hh"
|
||||
#include "wstring.hh"
|
||||
#include "text.hh"
|
||||
|
||||
/// Support for arbitrary programs.
|
||||
namespace Programs {
|
||||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
|
||||
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Programs const & );
|
||||
|
||||
|
|
|
@ -6,25 +6,18 @@
|
|||
#include "decompress.hh"
|
||||
#include "folding.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "sdict.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include <map>
|
||||
#include <QAtomicInt>
|
||||
#include <QDir>
|
||||
#include <QRegularExpression>
|
||||
#include <QSemaphore>
|
||||
#include <QString>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "utils.hh"
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
namespace Sdict {
|
||||
|
||||
|
@ -33,7 +26,6 @@ using std::multimap;
|
|||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
|
||||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
|
@ -97,7 +89,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -119,15 +111,6 @@ public:
|
|||
|
||||
~SdictDictionary();
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
|
@ -149,8 +132,10 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
QString const & getDescription() override;
|
||||
|
||||
|
@ -188,19 +173,15 @@ SdictDictionary::SdictDictionary( string const & id,
|
|||
string const & indexFile,
|
||||
vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
chunks( idx, idxHeader.chunksOffset ),
|
||||
df( dictionaryFiles[ 0 ], "rb" )
|
||||
df( dictionaryFiles[ 0 ], QIODevice::ReadOnly )
|
||||
{
|
||||
// Read dictionary name
|
||||
|
||||
idx.seek( sizeof( idxHeader ) );
|
||||
vector< char > dName( idx.read< uint32_t >() );
|
||||
if ( dName.size() > 0 ) {
|
||||
idx.read( &dName.front(), dName.size() );
|
||||
dictionaryName = string( &dName.front(), dName.size() );
|
||||
}
|
||||
idx.readU32SizeAndData<>( dictionaryName );
|
||||
|
||||
// Initialize the index
|
||||
|
||||
|
@ -237,7 +218,7 @@ void SdictDictionary::loadIcon() noexcept
|
|||
|
||||
string SdictDictionary::convert( string const & in )
|
||||
{
|
||||
// GD_DPRINTF( "Source>>>>>>>>>>: %s\n\n\n", in.c_str() );
|
||||
// qDebug( "Source>>>>>>>>>>: %s\n\n", in.c_str() );
|
||||
|
||||
string inConverted;
|
||||
|
||||
|
@ -388,14 +369,14 @@ void SdictDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "SDict: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "SDict: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "SDict: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "SDict: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( ftsIdxName.c_str() );
|
||||
}
|
||||
}
|
||||
|
@ -416,7 +397,7 @@ void SdictDictionary::getArticleText( uint32_t articleAddress, QString & headwor
|
|||
}
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "SDict: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "SDict: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -436,8 +417,8 @@ SdictDictionary::getSearchResults( QString const & searchString, int searchMode,
|
|||
class SdictArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
SdictDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -447,8 +428,8 @@ class SdictArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
SdictArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
SdictArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
SdictDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -492,13 +473,13 @@ void SdictArticleRequest::run()
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -527,12 +508,12 @@ void SdictArticleRequest::run()
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< std::u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
|
||||
|
@ -540,7 +521,7 @@ void SdictArticleRequest::run()
|
|||
articlesIncluded.insert( x.articleOffset );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "SDict: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
|
||||
qWarning( "SDict: Failed loading article from \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -552,7 +533,7 @@ void SdictArticleRequest::run()
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< std::u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
result += dict.isFromLanguageRTL() ? "<h3 dir=\"rtl\">" : "<h3>";
|
||||
|
@ -581,9 +562,9 @@ void SdictArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > SdictDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > SdictDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -651,7 +632,7 @@ QString const & SdictDictionary::getDescription()
|
|||
QObject::tr( "Version: %1%2" ).arg( QString::fromUtf8( str.c_str(), str.size() ) ).arg( "\n\n" );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "SDict: Failed description reading for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "SDict: Failed description reading for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
|
||||
if ( dictionaryDescription.isEmpty() ) {
|
||||
|
@ -687,15 +668,15 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
try {
|
||||
gdDebug( "SDict: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
qDebug( "SDict: Building the index for dictionary: %s", fileName.c_str() );
|
||||
|
||||
File::Index df( fileName, "rb" );
|
||||
File::Index df( fileName, QIODevice::ReadOnly );
|
||||
|
||||
DCT_header dictHeader;
|
||||
|
||||
df.read( &dictHeader, sizeof( dictHeader ) );
|
||||
if ( strncmp( dictHeader.signature, "sdct", 4 ) ) {
|
||||
gdWarning( "File \"%s\" is not valid SDictionary file", fileName.c_str() );
|
||||
qWarning( "File \"%s\" is not valid SDictionary file", fileName.c_str() );
|
||||
continue;
|
||||
}
|
||||
int compression = dictHeader.compression & 0x0F;
|
||||
|
@ -722,7 +703,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
initializing.indexingDictionary( dictName );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
IdxHeader idxHeader;
|
||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||
|
||||
|
@ -761,7 +742,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
// Insert new entry
|
||||
|
||||
indexedWords.addWord( Utf8::decode( string( data.data(), size ) ), articleOffset );
|
||||
indexedWords.addWord( Text::toUtf32( string( data.data(), size ) ), articleOffset );
|
||||
|
||||
pos += el.nextWord;
|
||||
}
|
||||
|
@ -795,11 +776,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
idx.write( &idxHeader, sizeof( idxHeader ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Sdictionary dictionary indexing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Sdictionary dictionary indexing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
continue;
|
||||
}
|
||||
catch ( ... ) {
|
||||
qWarning( "Sdictionary dictionary indexing failed\n" );
|
||||
qWarning( "Sdictionary dictionary indexing failed" );
|
||||
continue;
|
||||
}
|
||||
} // if need to rebuild
|
||||
|
@ -807,7 +788,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< SdictDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Sdictionary dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Sdictionary dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
return dictionaries;
|
||||
|
|
|
@ -6,33 +6,25 @@
|
|||
#include "btreeidx.hh"
|
||||
|
||||
#include "folding.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "decompress.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "filetype.hh"
|
||||
#include "tiff.hh"
|
||||
#include "utils.hh"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include "iconv.hh"
|
||||
|
||||
#include <QString>
|
||||
#include <QStringBuilder>
|
||||
#include <QFile>
|
||||
#include <QFileInfo>
|
||||
#include <QDir>
|
||||
#include <QMap>
|
||||
#include <QProcess>
|
||||
#include <QList>
|
||||
|
||||
#include <QtEndian>
|
||||
#include <QRegularExpression>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
@ -48,7 +40,6 @@ using std::vector;
|
|||
using std::multimap;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using gd::wstring;
|
||||
|
||||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
|
@ -97,7 +88,7 @@ struct RefEntry
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -618,16 +609,6 @@ public:
|
|||
|
||||
~SlobDictionary();
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.articleCount;
|
||||
|
@ -648,8 +629,10 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -702,7 +685,7 @@ private:
|
|||
SlobDictionary::SlobDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idxFileName( indexFile ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() )
|
||||
{
|
||||
// Open data file
|
||||
|
@ -871,7 +854,7 @@ void SlobDictionary::loadResource( std::string & resourceName, string & data )
|
|||
vector< WordArticleLink > link;
|
||||
RefEntry entry;
|
||||
|
||||
link = resourceIndex.findArticles( Utf8::decode( resourceName ) );
|
||||
link = resourceIndex.findArticles( Text::toUtf32( resourceName ) );
|
||||
|
||||
if ( link.empty() ) {
|
||||
return;
|
||||
|
@ -947,7 +930,7 @@ void SlobDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "Slob: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Slob: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
const auto slob_dic = std::make_unique< SlobDictionary >( getId(), idxFileName, getDictionaryFilenames() );
|
||||
|
@ -955,7 +938,7 @@ void SlobDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Slob: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Slob: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( ftsIdxName.c_str() );
|
||||
}
|
||||
}
|
||||
|
@ -985,7 +968,7 @@ void SlobDictionary::getArticleText( uint32_t articleAddress, QString & headword
|
|||
}
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Slob: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Slob: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1007,8 +990,8 @@ SlobDictionary::getSearchResults( QString const & searchString, int searchMode,
|
|||
class SlobArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
SlobDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -1017,8 +1000,8 @@ class SlobArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
SlobArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
SlobArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
SlobDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -1063,13 +1046,13 @@ void SlobArticleRequest::run()
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< quint64 > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -1102,12 +1085,12 @@ void SlobArticleRequest::run()
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< std::u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
|
||||
|
@ -1123,7 +1106,7 @@ void SlobArticleRequest::run()
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< std::u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
result += R"(<div class="slobdict"><h3 class="slobdict_headword">)";
|
||||
|
@ -1146,9 +1129,9 @@ void SlobArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > SlobDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > SlobDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -1233,10 +1216,10 @@ void SlobResourceRequest::run()
|
|||
hasAnyData = true;
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "SLOB: Failed loading resource \"%s\" from \"%s\", reason: %s\n",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "SLOB: Failed loading resource \"%s\" from \"%s\", reason: %s",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
// Resource not loaded -- we don't set the hasAnyData flag then
|
||||
}
|
||||
|
||||
|
@ -1279,13 +1262,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
SlobFile sf;
|
||||
|
||||
gdDebug( "Slob: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
qDebug( "Slob: Building the index for dictionary: %s", fileName.c_str() );
|
||||
|
||||
sf.open( firstName );
|
||||
|
||||
initializing.indexingDictionary( sf.getDictionaryName().toUtf8().constData() );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
IdxHeader idxHeader;
|
||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||
|
||||
|
@ -1373,11 +1356,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< SlobDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Slob dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Slob dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
continue;
|
||||
}
|
||||
catch ( ... ) {
|
||||
qWarning( "Slob dictionary initializing failed\n" );
|
||||
qWarning( "Slob dictionary initializing failed" );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,13 +3,12 @@
|
|||
|
||||
#include "sounddir.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "btreeidx.hh"
|
||||
#include "chunkedstorage.hh"
|
||||
#include "filetype.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "audiolink.hh"
|
||||
#include "wstring_qt.hh"
|
||||
|
||||
#include "utils.hh"
|
||||
|
||||
|
@ -21,7 +20,6 @@
|
|||
namespace SoundDir {
|
||||
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using std::map;
|
||||
using std::multimap;
|
||||
using std::set;
|
||||
|
@ -51,7 +49,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -61,7 +59,6 @@ bool indexIsOldOrBad( string const & indexFile )
|
|||
|
||||
class SoundDirDictionary: public BtreeIndexing::BtreeDictionary
|
||||
{
|
||||
string name;
|
||||
QMutex idxMutex;
|
||||
File::Index idx;
|
||||
IdxHeader idxHeader;
|
||||
|
@ -76,16 +73,6 @@ public:
|
|||
vector< string > const & dictionaryFiles,
|
||||
QString const & iconFilename_ );
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.soundsCount;
|
||||
|
@ -96,8 +83,10 @@ public:
|
|||
return getArticleCount();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -113,20 +102,21 @@ SoundDirDictionary::SoundDirDictionary( string const & id,
|
|||
vector< string > const & dictionaryFiles,
|
||||
QString const & iconFilename_ ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
name( name_ ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
chunks( idx, idxHeader.chunksOffset ),
|
||||
iconFilename( iconFilename_ )
|
||||
{
|
||||
dictionaryName = name_;
|
||||
|
||||
// Initialize the index
|
||||
|
||||
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex );
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
{
|
||||
vector< WordArticleLink > chain = findArticles( word, ignoreDiacritics );
|
||||
|
@ -140,13 +130,13 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
|
|||
}
|
||||
|
||||
// maps to the chain number
|
||||
multimap< wstring, unsigned > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, unsigned > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -161,12 +151,12 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( chain[ x ].word );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, unsigned > & mapToUse =
|
||||
multimap< std::u32string, unsigned > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( chain[ x ].word ), x ) );
|
||||
|
@ -180,7 +170,7 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const &
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, uint32_t >::const_iterator i;
|
||||
multimap< std::u32string, uint32_t >::const_iterator i;
|
||||
|
||||
string displayedName;
|
||||
vector< char > chunk;
|
||||
|
@ -370,7 +360,7 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getResource( string const &
|
|||
// Now try loading that file
|
||||
|
||||
try {
|
||||
File::Index f( fileName.toStdString(), "rb" );
|
||||
File::Index f( fileName.toStdString(), QIODevice::ReadOnly );
|
||||
|
||||
sptr< Dictionary::DataRequestInstant > dr = std::make_shared< Dictionary::DataRequestInstant >( true );
|
||||
|
||||
|
@ -409,11 +399,11 @@ void addDir( QDir const & baseDir,
|
|||
const uint32_t articleOffset = chunks.startNewBlock();
|
||||
chunks.addToBlock( fileName.c_str(), fileName.size() + 1 );
|
||||
|
||||
wstring name = i->fileName().toStdU32String();
|
||||
std::u32string name = i->fileName().toStdU32String();
|
||||
|
||||
const wstring::size_type pos = name.rfind( L'.' );
|
||||
const std::u32string::size_type pos = name.rfind( L'.' );
|
||||
|
||||
if ( pos != wstring::npos ) {
|
||||
if ( pos != std::u32string::npos ) {
|
||||
name.erase( pos );
|
||||
}
|
||||
|
||||
|
@ -479,7 +469,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::SoundDirs const &
|
|||
|
||||
initializing.indexingDictionary( soundDir.name.toUtf8().data() );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader;
|
||||
|
||||
|
|
|
@ -4,46 +4,36 @@
|
|||
#include "stardict.hh"
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "chunkedstorage.hh"
|
||||
#include "dictzip.hh"
|
||||
#include "xdxf2html.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "gddebug.hh"
|
||||
|
||||
#include "filetype.hh"
|
||||
#include "indexedzip.hh"
|
||||
#include "tiff.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "audiolink.hh"
|
||||
|
||||
#include <zlib.h>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <QString>
|
||||
#include <QAtomicInt>
|
||||
#include <QDomDocument>
|
||||
#include "ufile.hh"
|
||||
#include "utils.hh"
|
||||
#include <QRegularExpression>
|
||||
#include "globalregex.hh"
|
||||
#include <QDir>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef Q_OS_WIN
|
||||
#include <arpa/inet.h>
|
||||
#else
|
||||
#include <winsock.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include <QString>
|
||||
#include <QSemaphore>
|
||||
#include <QAtomicInt>
|
||||
#include <QStringList>
|
||||
#include <QDomDocument>
|
||||
#include "ufile.hh"
|
||||
#include "utils.hh"
|
||||
|
||||
#include <QRegularExpression>
|
||||
#include "globalregex.hh"
|
||||
|
||||
namespace Stardict {
|
||||
|
||||
|
@ -52,7 +42,6 @@ using std::multimap;
|
|||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
|
||||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
|
@ -76,13 +65,15 @@ DEF_EX_STR( exIncorrectOffset, "Incorrect offset encountered in file", Dictionar
|
|||
/// Contents of an ifo file
|
||||
struct Ifo
|
||||
{
|
||||
string version;
|
||||
string bookname;
|
||||
uint32_t wordcount, synwordcount, idxfilesize, idxoffsetbits;
|
||||
uint32_t wordcount = 0;
|
||||
uint32_t synwordcount = 0;
|
||||
uint32_t idxfilesize = 0;
|
||||
uint32_t idxoffsetbits = 32;
|
||||
string sametypesequence, dicttype, description;
|
||||
string copyright, author, email, website, date;
|
||||
|
||||
explicit Ifo( File::Index & );
|
||||
explicit Ifo( const QString & fileName );
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -116,7 +107,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -129,9 +120,8 @@ class StardictDictionary: public BtreeIndexing::BtreeDictionary
|
|||
QMutex idxMutex;
|
||||
File::Index idx;
|
||||
IdxHeader idxHeader;
|
||||
string bookName;
|
||||
string sameTypeSequence;
|
||||
ChunkedStorage::Reader chunks;
|
||||
std::unique_ptr< ChunkedStorage::Reader > chunks;
|
||||
QMutex dzMutex;
|
||||
dictData * dz;
|
||||
QMutex resourceZipMutex;
|
||||
|
@ -143,22 +133,6 @@ public:
|
|||
|
||||
~StardictDictionary();
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return bookName;
|
||||
}
|
||||
|
||||
void setName( string _name ) noexcept override
|
||||
{
|
||||
bookName = _name;
|
||||
}
|
||||
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.wordCount;
|
||||
|
@ -179,10 +153,12 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) override;
|
||||
sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -235,12 +211,14 @@ StardictDictionary::StardictDictionary( string const & id,
|
|||
string const & indexFile,
|
||||
vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
bookName( loadString( idxHeader.bookNameSize ) ),
|
||||
sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ),
|
||||
chunks( idx, idxHeader.chunksOffset )
|
||||
idx( indexFile, QIODevice::ReadOnly )
|
||||
{
|
||||
// reading headers, note that reading order matters
|
||||
idxHeader = idx.read< IdxHeader >();
|
||||
dictionaryName = loadString( idxHeader.bookNameSize );
|
||||
sameTypeSequence = loadString( idxHeader.sameTypeSequenceSize );
|
||||
chunks = std::make_unique< ChunkedStorage::Reader >( idx, idxHeader.chunksOffset );
|
||||
|
||||
// Open the .dict file
|
||||
|
||||
DZ_ERRORS error;
|
||||
|
@ -321,7 +299,7 @@ void StardictDictionary::getArticleProps( uint32_t articleAddress,
|
|||
|
||||
QMutexLocker _( &idxMutex );
|
||||
|
||||
char * articleData = chunks.getBlock( articleAddress, chunk );
|
||||
char * articleData = chunks->getBlock( articleAddress, chunk );
|
||||
|
||||
memcpy( &offset, articleData, sizeof( uint32_t ) );
|
||||
articleData += sizeof( uint32_t );
|
||||
|
@ -950,9 +928,7 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
entrySize = size;
|
||||
}
|
||||
else if ( !size ) {
|
||||
gdWarning( "Stardict: short entry for the word %s encountered in \"%s\".\n",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: short entry for the word %s encountered in \"%s\".", headword.c_str(), getName().c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -965,9 +941,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
}
|
||||
|
||||
if ( size < entrySize ) {
|
||||
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -985,9 +961,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
|
||||
if ( !entrySizeKnown ) {
|
||||
if ( size < sizeof( uint32_t ) ) {
|
||||
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1000,9 +976,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
}
|
||||
|
||||
if ( size < entrySize ) {
|
||||
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1012,10 +988,10 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
size -= entrySize;
|
||||
}
|
||||
else {
|
||||
gdWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".\n",
|
||||
type,
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".",
|
||||
type,
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1028,9 +1004,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
size_t len = strlen( ptr + 1 );
|
||||
|
||||
if ( size < len + 2 ) {
|
||||
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1042,9 +1018,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
else if ( isupper( *ptr ) ) {
|
||||
// An entry which havs its size before contents
|
||||
if ( size < sizeof( uint32_t ) + 1 ) {
|
||||
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1055,9 +1031,9 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
entrySize = ntohl( entrySize );
|
||||
|
||||
if ( size < sizeof( uint32_t ) + 1 + entrySize ) {
|
||||
gdWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".\n",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: malformed entry for the word %s encountered in \"%s\".",
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1067,10 +1043,10 @@ void StardictDictionary::loadArticle( uint32_t address, string & headword, strin
|
|||
size -= sizeof( uint32_t ) + 1 + entrySize;
|
||||
}
|
||||
else {
|
||||
gdWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".\n",
|
||||
(unsigned)*ptr,
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
qWarning( "Stardict: non-alpha entry type 0x%x for the word %s encountered in \"%s\".",
|
||||
(unsigned)*ptr,
|
||||
headword.c_str(),
|
||||
getName().c_str() );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1085,40 +1061,36 @@ QString const & StardictDictionary::getDescription()
|
|||
return dictionaryDescription;
|
||||
}
|
||||
|
||||
File::Index ifoFile( getDictionaryFilenames()[ 0 ], "r" );
|
||||
Ifo ifo( ifoFile );
|
||||
Ifo ifo( QString::fromStdString( getDictionaryFilenames()[ 0 ] ) );
|
||||
|
||||
if ( !ifo.copyright.empty() ) {
|
||||
QString copyright = QString::fromUtf8( ifo.copyright.c_str() ).replace( "<br>", "\n", Qt::CaseInsensitive );
|
||||
dictionaryDescription += QObject::tr( "Copyright: %1%2" ).arg( copyright ).arg( "\n\n" );
|
||||
QString copyright = QString::fromUtf8( ifo.copyright.c_str() );
|
||||
dictionaryDescription += QObject::tr( "Copyright: %1%2" ).arg( copyright ).arg( "<br><br>" );
|
||||
}
|
||||
|
||||
if ( !ifo.author.empty() ) {
|
||||
QString author = QString::fromUtf8( ifo.author.c_str() );
|
||||
dictionaryDescription += QObject::tr( "Author: %1%2" ).arg( author ).arg( "\n\n" );
|
||||
dictionaryDescription += QObject::tr( "Author: %1%2" ).arg( author ).arg( "<br><br>" );
|
||||
}
|
||||
|
||||
if ( !ifo.email.empty() ) {
|
||||
QString email = QString::fromUtf8( ifo.email.c_str() );
|
||||
dictionaryDescription += QObject::tr( "E-mail: %1%2" ).arg( email ).arg( "\n\n" );
|
||||
dictionaryDescription += QObject::tr( "E-mail: %1%2" ).arg( email ).arg( "<br><br>" );
|
||||
}
|
||||
|
||||
if ( !ifo.website.empty() ) {
|
||||
QString website = QString::fromUtf8( ifo.website.c_str() );
|
||||
dictionaryDescription += QObject::tr( "Website: %1%2" ).arg( website ).arg( "\n\n" );
|
||||
dictionaryDescription += QObject::tr( "Website: %1%2" ).arg( website ).arg( "<br><br>" );
|
||||
}
|
||||
|
||||
if ( !ifo.date.empty() ) {
|
||||
QString date = QString::fromUtf8( ifo.date.c_str() );
|
||||
dictionaryDescription += QObject::tr( "Date: %1%2" ).arg( date ).arg( "\n\n" );
|
||||
dictionaryDescription += QObject::tr( "Date: %1%2" ).arg( date ).arg( "<br><br>" );
|
||||
}
|
||||
|
||||
if ( !ifo.description.empty() ) {
|
||||
QString desc = QString::fromUtf8( ifo.description.c_str() );
|
||||
desc.replace( "\t", "<br/>" );
|
||||
desc.replace( "\\n", "<br/>" );
|
||||
desc.replace( "<br>", "<br/>", Qt::CaseInsensitive );
|
||||
dictionaryDescription += Html::unescape( desc, Html::HtmlOption::Keep );
|
||||
dictionaryDescription += desc;
|
||||
}
|
||||
|
||||
if ( dictionaryDescription.isEmpty() ) {
|
||||
|
@ -1149,16 +1121,14 @@ void StardictDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "Stardict: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Stardict: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Stardict: Failed building full-text search index for \"%s\", reason: %s\n",
|
||||
getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "Stardict: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( ftsIdxName.c_str() );
|
||||
}
|
||||
}
|
||||
|
@ -1174,7 +1144,7 @@ void StardictDictionary::getArticleText( uint32_t articleAddress, QString & head
|
|||
text = Html::unescape( QString::fromStdString( articleStr ) );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Stardict: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Stardict: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1195,7 +1165,7 @@ sptr< Dictionary::DataRequest > StardictDictionary::getSearchResults( QString co
|
|||
class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
|
||||
{
|
||||
|
||||
wstring word;
|
||||
std::u32string word;
|
||||
StardictDictionary & dict;
|
||||
|
||||
QAtomicInt isCancelled;
|
||||
|
@ -1203,7 +1173,7 @@ class StardictHeadwordsRequest: public Dictionary::WordSearchRequest
|
|||
|
||||
public:
|
||||
|
||||
StardictHeadwordsRequest( wstring const & word_, StardictDictionary & dict_ ):
|
||||
StardictHeadwordsRequest( std::u32string const & word_, StardictDictionary & dict_ ):
|
||||
word( word_ ),
|
||||
dict( dict_ )
|
||||
{
|
||||
|
@ -1238,7 +1208,7 @@ void StardictHeadwordsRequest::run()
|
|||
//limited the synomys to at most 10 entries
|
||||
vector< WordArticleLink > chain = dict.findArticles( word, false, 10 );
|
||||
|
||||
wstring caseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string caseFolded = Folding::applySimpleCaseOnly( word );
|
||||
|
||||
for ( auto & x : chain ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||
|
@ -1250,7 +1220,7 @@ void StardictHeadwordsRequest::run()
|
|||
|
||||
dict.loadArticle( x.articleOffset, headword, articleText );
|
||||
|
||||
wstring headwordDecoded = Utf8::decode( headword );
|
||||
std::u32string headwordDecoded = Text::toUtf32( headword );
|
||||
|
||||
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) {
|
||||
// The headword seems to differ from the input word, which makes the
|
||||
|
@ -1268,7 +1238,7 @@ void StardictHeadwordsRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( wstring const & word )
|
||||
sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( std::u32string const & word )
|
||||
{
|
||||
return synonymSearchEnabled ? std::make_shared< StardictHeadwordsRequest >( word, *this ) :
|
||||
Class::findHeadwordsForSynonym( word );
|
||||
|
@ -1281,8 +1251,8 @@ sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynony
|
|||
class StardictArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
StardictDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -1292,8 +1262,8 @@ class StardictArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
StardictArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
StardictArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
StardictDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -1343,13 +1313,13 @@ void StardictArticleRequest::run()
|
|||
}
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -1376,12 +1346,12 @@ void StardictArticleRequest::run()
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< std::u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
|
||||
|
@ -1397,7 +1367,7 @@ void StardictArticleRequest::run()
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< std::u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
string cleaner = Utils::Html::getHtmlCleaner();
|
||||
|
||||
|
@ -1440,9 +1410,9 @@ void StardictArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > StardictDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > StardictDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -1457,85 +1427,77 @@ static char const * beginsWith( char const * substr, char const * str )
|
|||
return strncmp( str, substr, len ) == 0 ? str + len : 0;
|
||||
}
|
||||
|
||||
Ifo::Ifo( File::Index & f ):
|
||||
wordcount( 0 ),
|
||||
synwordcount( 0 ),
|
||||
idxfilesize( 0 ),
|
||||
idxoffsetbits( 32 )
|
||||
Ifo::Ifo( const QString & fileName )
|
||||
{
|
||||
static string const versionEq( "version=" );
|
||||
QFile f( fileName );
|
||||
if ( !f.open( QIODevice::ReadOnly ) ) {
|
||||
throw exCantReadFile( "Cannot open IFO file -> " + fileName.toStdString() );
|
||||
};
|
||||
|
||||
static string const booknameEq( "bookname=" );
|
||||
|
||||
//GD_DPRINTF( "%s<\n", f.gets().c_str() );
|
||||
//GD_DPRINTF( "%s<\n", f.gets().c_str() );
|
||||
|
||||
if ( QString::fromUtf8( f.gets().c_str() ) != "StarDict's dict ifo file"
|
||||
|| f.gets().compare( 0, versionEq.size(), versionEq ) ) {
|
||||
if ( !f.readLine().startsWith( "StarDict's dict ifo file" ) || !f.readLine().startsWith( "version=" ) ) {
|
||||
throw exNotAnIfoFile();
|
||||
}
|
||||
|
||||
/// Now go through the file and parse options
|
||||
{
|
||||
while ( !f.atEnd() ) {
|
||||
auto line = f.readLine();
|
||||
auto option = QByteArrayView( line ).trimmed();
|
||||
// Empty lines are allowed in .ifo file
|
||||
|
||||
try {
|
||||
char option[ 16384 ];
|
||||
|
||||
for ( ;; ) {
|
||||
if ( !f.gets( option, sizeof( option ), true ) ) {
|
||||
break;
|
||||
if ( option.isEmpty() ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( char const * val = beginsWith( "bookname=", option ) ) {
|
||||
if ( char const * val = beginsWith( "bookname=", option.data() ) ) {
|
||||
bookname = val;
|
||||
}
|
||||
else if ( char const * val = beginsWith( "wordcount=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "wordcount=", option.data() ) ) {
|
||||
if ( sscanf( val, "%u", &wordcount ) != 1 ) {
|
||||
throw exBadFieldInIfo( option );
|
||||
throw exBadFieldInIfo( option.data() );
|
||||
}
|
||||
}
|
||||
else if ( char const * val = beginsWith( "synwordcount=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "synwordcount=", option.data() ) ) {
|
||||
if ( sscanf( val, "%u", &synwordcount ) != 1 ) {
|
||||
throw exBadFieldInIfo( option );
|
||||
throw exBadFieldInIfo( option.data() );
|
||||
}
|
||||
}
|
||||
else if ( char const * val = beginsWith( "idxfilesize=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "idxfilesize=", option.data() ) ) {
|
||||
if ( sscanf( val, "%u", &idxfilesize ) != 1 ) {
|
||||
throw exBadFieldInIfo( option );
|
||||
throw exBadFieldInIfo( option.data() );
|
||||
}
|
||||
}
|
||||
else if ( char const * val = beginsWith( "idxoffsetbits=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "idxoffsetbits=", option.data() ) ) {
|
||||
if ( sscanf( val, "%u", &idxoffsetbits ) != 1 || ( idxoffsetbits != 32 && idxoffsetbits != 64 ) ) {
|
||||
throw exBadFieldInIfo( option );
|
||||
throw exBadFieldInIfo( option.data() );
|
||||
}
|
||||
}
|
||||
else if ( char const * val = beginsWith( "sametypesequence=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "sametypesequence=", option.data() ) ) {
|
||||
sametypesequence = val;
|
||||
}
|
||||
else if ( char const * val = beginsWith( "dicttype=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "dicttype=", option.data() ) ) {
|
||||
dicttype = val;
|
||||
}
|
||||
else if ( char const * val = beginsWith( "description=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "description=", option.data() ) ) {
|
||||
description = val;
|
||||
}
|
||||
else if ( char const * val = beginsWith( "copyright=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "copyright=", option.data() ) ) {
|
||||
copyright = val;
|
||||
}
|
||||
else if ( char const * val = beginsWith( "author=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "author=", option.data() ) ) {
|
||||
author = val;
|
||||
}
|
||||
else if ( char const * val = beginsWith( "email=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "email=", option.data() ) ) {
|
||||
email = val;
|
||||
}
|
||||
else if ( char const * val = beginsWith( "website=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "website=", option.data() ) ) {
|
||||
website = val;
|
||||
}
|
||||
else if ( char const * val = beginsWith( "date=", option ) ) {
|
||||
else if ( char const * val = beginsWith( "date=", option.data() ) ) {
|
||||
date = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch ( File::exReadError & ) {
|
||||
}
|
||||
}
|
||||
|
||||
//// StardictDictionary::getResource()
|
||||
|
@ -1595,7 +1557,7 @@ void StardictResourceRequest::run()
|
|||
string n =
|
||||
dict.getContainingFolder().toStdString() + Utils::Fs::separator() + "res" + Utils::Fs::separator() + resourceName;
|
||||
|
||||
GD_DPRINTF( "startdict resource name is %s\n", n.c_str() );
|
||||
qDebug( "startdict resource name is %s", n.c_str() );
|
||||
|
||||
try {
|
||||
QMutexLocker _( &dataMutex );
|
||||
|
@ -1608,7 +1570,7 @@ void StardictResourceRequest::run()
|
|||
if ( dict.resourceZip.isOpen() ) {
|
||||
QMutexLocker _( &dataMutex );
|
||||
|
||||
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
|
||||
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
|
||||
throw; // Make it fail since we couldn't read the archive
|
||||
}
|
||||
}
|
||||
|
@ -1672,10 +1634,10 @@ void StardictResourceRequest::run()
|
|||
hasAnyData = true;
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Stardict: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "Stardict: Failed loading resource \"%s\" for \"%s\", reason: %s",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
// Resource not loaded -- we don't set the hasAnyData flag then
|
||||
}
|
||||
catch ( ... ) {
|
||||
|
@ -1757,7 +1719,7 @@ static void handleIdxSynFile( string const & fileName,
|
|||
size_t wordLen = strlen( ptr );
|
||||
|
||||
if ( ptr + wordLen + 1 + ( isSynFile ? sizeof( uint32_t ) : sizeof( uint32_t ) * 2 ) > &image.back() ) {
|
||||
GD_FDPRINTF( stderr, "Warning: sudden end of file %s\n", fileName.c_str() );
|
||||
qWarning( "Warning: sudden end of file %s", fileName.c_str() );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1840,14 +1802,14 @@ static void handleIdxSynFile( string const & fileName,
|
|||
// Insert new entry into an index
|
||||
|
||||
if ( parseHeadwords ) {
|
||||
indexedWords.addWord( Utf8::decode( word ), offset );
|
||||
indexedWords.addWord( Text::toUtf32( word ), offset );
|
||||
}
|
||||
else {
|
||||
indexedWords.addSingleWord( Utf8::decode( word ), offset );
|
||||
indexedWords.addSingleWord( Text::toUtf32( word ), offset );
|
||||
}
|
||||
}
|
||||
|
||||
GD_DPRINTF( "%u entires made\n", (unsigned)indexedWords.size() );
|
||||
qDebug( "%u entires made", (unsigned)indexedWords.size() );
|
||||
}
|
||||
|
||||
|
||||
|
@ -1897,11 +1859,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
// Building the index
|
||||
|
||||
File::Index ifoFile( fileName, "r" );
|
||||
Ifo ifo( QString::fromStdString( fileName ) );
|
||||
|
||||
Ifo ifo( ifoFile );
|
||||
|
||||
gdDebug( "Stardict: Building the index for dictionary: %s\n", ifo.bookname.c_str() );
|
||||
qDebug( "Stardict: Building the index for dictionary: %s", ifo.bookname.c_str() );
|
||||
|
||||
if ( ifo.idxoffsetbits == 64 ) {
|
||||
throw ex64BitsNotSupported();
|
||||
|
@ -1913,24 +1873,24 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
if ( synFileName.empty() ) {
|
||||
if ( ifo.synwordcount ) {
|
||||
GD_DPRINTF(
|
||||
qDebug(
|
||||
"Warning: dictionary has synwordcount specified, but no "
|
||||
"corresponding .syn file was found\n" );
|
||||
ifo.synwordcount = 0; // Pretend it wasn't there
|
||||
}
|
||||
}
|
||||
else if ( !ifo.synwordcount ) {
|
||||
GD_DPRINTF( "Warning: ignoring .syn file %s, since there's no synwordcount in .ifo specified\n",
|
||||
synFileName.c_str() );
|
||||
qDebug( "Warning: ignoring .syn file %s, since there's no synwordcount in .ifo specified",
|
||||
synFileName.c_str() );
|
||||
}
|
||||
|
||||
|
||||
GD_DPRINTF( "bookname = %s\n", ifo.bookname.c_str() );
|
||||
GD_DPRINTF( "wordcount = %u\n", ifo.wordcount );
|
||||
qDebug( "bookname = %s", ifo.bookname.c_str() );
|
||||
qDebug( "wordcount = %u", ifo.wordcount );
|
||||
|
||||
initializing.indexingDictionary( ifo.bookname );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader;
|
||||
|
||||
|
@ -2011,7 +1971,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// If there was a zip file, index it too
|
||||
|
||||
if ( zipFileName.size() ) {
|
||||
GD_DPRINTF( "Indexing zip file\n" );
|
||||
qDebug( "Indexing zip file" );
|
||||
|
||||
idxHeader.hasZipFile = 1;
|
||||
|
||||
|
@ -2050,7 +2010,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< StardictDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Stardict dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Stardict dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
1
src/dict/transliteration/README.md
Normal file
1
src/dict/transliteration/README.md
Normal file
|
@ -0,0 +1 @@
|
|||
Translit
|
|
@ -1,7 +1,7 @@
|
|||
/* This file is (c) 2013 Maksim Tamkovicz <quendimax@gmail.com>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "belarusiantranslit.hh"
|
||||
#include "belarusian.hh"
|
||||
#include "transliteration.hh"
|
||||
#include <QCoreApplication>
|
||||
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "dictionary.hh"
|
||||
|
||||
// Support for Belarusian transliteration
|
|
@ -4,16 +4,12 @@
|
|||
#include "chinese.hh"
|
||||
#include <stdexcept>
|
||||
#include <QCoreApplication>
|
||||
// #ifdef Q_OS_MAC
|
||||
#include <opencc/opencc.h>
|
||||
// #endif
|
||||
// #include <opencc/SimpleConverter.hpp>
|
||||
#include "folding.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "transliteration.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
|
||||
namespace Chinese {
|
||||
namespace ChineseTranslit {
|
||||
|
||||
class CharacterConversionDictionary: public Transliteration::BaseTransliterationDictionary
|
||||
{
|
||||
|
@ -31,7 +27,7 @@ public:
|
|||
QString const & openccConfig );
|
||||
~CharacterConversionDictionary();
|
||||
|
||||
std::vector< gd::wstring > getAlternateWritings( gd::wstring const & ) noexcept override;
|
||||
std::vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept override;
|
||||
};
|
||||
|
||||
CharacterConversionDictionary::CharacterConversionDictionary( std::string const & id,
|
||||
|
@ -45,18 +41,18 @@ CharacterConversionDictionary::CharacterConversionDictionary( std::string const
|
|||
// #ifdef Q_OS_MAC
|
||||
converter = opencc_open( openccConfig.toLocal8Bit().constData() );
|
||||
if ( converter == reinterpret_cast< opencc_t >( -1 ) ) {
|
||||
gdWarning( "CharacterConversionDictionary: failed to initialize OpenCC from config %s: %s\n",
|
||||
openccConfig.toLocal8Bit().constData(),
|
||||
opencc_error() );
|
||||
qWarning( "CharacterConversionDictionary: failed to initialize OpenCC from config %s: %s",
|
||||
openccConfig.toLocal8Bit().constData(),
|
||||
opencc_error() );
|
||||
}
|
||||
// #else
|
||||
// converter = new opencc::SimpleConverter( openccConfig.toLocal8Bit().constData() );
|
||||
// #endif
|
||||
}
|
||||
catch ( std::runtime_error & e ) {
|
||||
gdWarning( "CharacterConversionDictionary: failed to initialize OpenCC from config %s: %s\n",
|
||||
openccConfig.toLocal8Bit().constData(),
|
||||
e.what() );
|
||||
qWarning( "CharacterConversionDictionary: failed to initialize OpenCC from config %s: %s",
|
||||
openccConfig.toLocal8Bit().constData(),
|
||||
e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -72,15 +68,15 @@ CharacterConversionDictionary::~CharacterConversionDictionary()
|
|||
// #endif
|
||||
}
|
||||
|
||||
std::vector< gd::wstring > CharacterConversionDictionary::getAlternateWritings( gd::wstring const & str ) noexcept
|
||||
std::vector< std::u32string > CharacterConversionDictionary::getAlternateWritings( std::u32string const & str ) noexcept
|
||||
{
|
||||
std::vector< gd::wstring > results;
|
||||
std::vector< std::u32string > results;
|
||||
|
||||
if ( converter != NULL ) {
|
||||
gd::wstring folded = Folding::applySimpleCaseOnly( str );
|
||||
std::string input = Utf8::encode( folded );
|
||||
std::u32string folded = Folding::applySimpleCaseOnly( str );
|
||||
std::string input = Text::toUtf8( folded );
|
||||
std::string output;
|
||||
gd::wstring result;
|
||||
std::u32string result;
|
||||
|
||||
try {
|
||||
// #ifdef Q_OS_MAC
|
||||
|
@ -91,16 +87,16 @@ std::vector< gd::wstring > CharacterConversionDictionary::getAlternateWritings(
|
|||
opencc_convert_utf8_free( tmp );
|
||||
}
|
||||
else {
|
||||
gdWarning( "OpenCC: conversion failed %s\n", opencc_error() );
|
||||
qWarning( "OpenCC: conversion failed %s", opencc_error() );
|
||||
}
|
||||
}
|
||||
// #else
|
||||
// output = converter->Convert( input );
|
||||
// #endif
|
||||
result = Utf8::decode( output );
|
||||
result = Text::toUtf32( output );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "OpenCC: conversion failed %s\n", ex.what() );
|
||||
qWarning( "OpenCC: conversion failed %s", ex.what() );
|
||||
}
|
||||
|
||||
if ( !result.empty() && result != folded ) {
|
||||
|
@ -162,4 +158,4 @@ std::vector< sptr< Dictionary::Class > > makeDictionaries( Config::Chinese const
|
|||
return result;
|
||||
}
|
||||
|
||||
} // namespace Chinese
|
||||
} // namespace ChineseTranslit
|
|
@ -3,12 +3,10 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include "config.hh"
|
||||
#include "dictionary.hh"
|
||||
|
||||
/// Chinese character conversion support.
|
||||
namespace Chinese {
|
||||
namespace ChineseTranslit {
|
||||
|
||||
std::vector< sptr< Dictionary::Class > > makeDictionaries( Config::Chinese const & );
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
#include "customtransliteration.hh"
|
||||
#include "custom.hh"
|
||||
#include "dictionary.hh"
|
||||
#include <QCoreApplication>
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
|
||||
#include <vector>
|
||||
#include "transliteration.hh"
|
||||
|
||||
// Support for Belarusian transliteration
|
|
@ -1,7 +1,7 @@
|
|||
/* This file is (c) 2010 Jennie Petoumenou <epetoumenou@gmail.com>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "greektranslit.hh"
|
||||
#include "greek.hh"
|
||||
#include "transliteration.hh"
|
||||
#include <QCoreApplication>
|
||||
|
|
@ -1,7 +1,9 @@
|
|||
#include "romaji.hh"
|
||||
#include <QCoreApplication>
|
||||
|
||||
namespace Romaji {
|
||||
namespace RomajiTranslit {
|
||||
|
||||
using std::vector;
|
||||
|
||||
class HepburnHiragana: public Transliteration::Table
|
||||
{
|
||||
|
@ -375,4 +377,4 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & r )
|
|||
return result;
|
||||
}
|
||||
|
||||
} // namespace Romaji
|
||||
} // namespace RomajiTranslit
|
|
@ -4,12 +4,9 @@
|
|||
#pragma once
|
||||
|
||||
#include "transliteration.hh"
|
||||
#include "config.hh"
|
||||
|
||||
/// Japanese romanization (Romaji) support.
|
||||
namespace Romaji {
|
||||
namespace RomajiTranslit {
|
||||
|
||||
using std::vector;
|
||||
|
||||
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & );
|
||||
} // namespace Romaji
|
||||
std::vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & );
|
||||
} // namespace RomajiTranslit
|
|
@ -1,7 +1,7 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "russiantranslit.hh"
|
||||
#include "russian.hh"
|
||||
#include "transliteration.hh"
|
||||
#include <QCoreApplication>
|
||||
|
|
@ -2,13 +2,11 @@
|
|||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "transliteration.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "folding.hh"
|
||||
#include "gddebug.hh"
|
||||
|
||||
namespace Transliteration {
|
||||
|
||||
using gd::wchar;
|
||||
|
||||
BaseTransliterationDictionary::BaseTransliterationDictionary( string const & id,
|
||||
string const & name_,
|
||||
|
@ -27,11 +25,6 @@ string BaseTransliterationDictionary::getName() noexcept
|
|||
return name;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > BaseTransliterationDictionary::getProperties() noexcept
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long BaseTransliterationDictionary::getArticleCount() noexcept
|
||||
{
|
||||
return 0;
|
||||
|
@ -42,26 +35,30 @@ unsigned long BaseTransliterationDictionary::getWordCount() noexcept
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( wstring const &, unsigned long )
|
||||
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( std::u32string const &,
|
||||
unsigned long )
|
||||
{
|
||||
return std::make_shared< Dictionary::WordSearchRequestInstant >();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
BaseTransliterationDictionary::getArticle( wstring const &, vector< wstring > const &, wstring const &, bool )
|
||||
sptr< Dictionary::DataRequest > BaseTransliterationDictionary::getArticle( std::u32string const &,
|
||||
vector< std::u32string > const &,
|
||||
std::u32string const &,
|
||||
bool )
|
||||
|
||||
{
|
||||
return std::make_shared< Dictionary::DataRequestInstant >( false );
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwordsForSynonym( wstring const & str )
|
||||
sptr< Dictionary::WordSearchRequest >
|
||||
BaseTransliterationDictionary::findHeadwordsForSynonym( std::u32string const & str )
|
||||
|
||||
{
|
||||
sptr< Dictionary::WordSearchRequestInstant > result = std::make_shared< Dictionary::WordSearchRequestInstant >();
|
||||
|
||||
vector< wstring > alts = getAlternateWritings( str );
|
||||
vector< std::u32string > alts = getAlternateWritings( str );
|
||||
|
||||
GD_DPRINTF( "alts = %u\n", (unsigned)alts.size() );
|
||||
qDebug( "alts = %u", (unsigned)alts.size() );
|
||||
|
||||
for ( const auto & alt : alts ) {
|
||||
result->getMatches().push_back( alt );
|
||||
|
@ -73,13 +70,13 @@ sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwor
|
|||
|
||||
void Table::ins( char const * from, char const * to )
|
||||
{
|
||||
wstring fr = Utf8::decode( std::string( from ) );
|
||||
std::u32string fr = Text::toUtf32( std::string( from ) );
|
||||
|
||||
if ( fr.size() > maxEntrySize ) {
|
||||
maxEntrySize = fr.size();
|
||||
}
|
||||
|
||||
insert( std::pair< wstring, wstring >( fr, Utf8::decode( std::string( to ) ) ) );
|
||||
insert( std::pair< std::u32string, std::u32string >( fr, Text::toUtf32( std::string( to ) ) ) );
|
||||
}
|
||||
|
||||
|
||||
|
@ -90,12 +87,12 @@ TransliterationDictionary::TransliterationDictionary(
|
|||
{
|
||||
}
|
||||
|
||||
vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const & str ) noexcept
|
||||
vector< std::u32string > TransliterationDictionary::getAlternateWritings( std::u32string const & str ) noexcept
|
||||
{
|
||||
vector< wstring > results;
|
||||
vector< std::u32string > results;
|
||||
|
||||
wstring result, folded;
|
||||
wstring const * target;
|
||||
std::u32string result, folded;
|
||||
std::u32string const * target;
|
||||
|
||||
if ( caseSensitive ) {
|
||||
// Don't do any transform -- the transliteration is case-sensitive
|
||||
|
@ -106,8 +103,8 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
|
|||
target = &folded;
|
||||
}
|
||||
|
||||
wchar const * ptr = target->c_str();
|
||||
size_t left = target->size();
|
||||
char32_t const * ptr = target->c_str();
|
||||
size_t left = target->size();
|
||||
|
||||
Table::const_iterator i;
|
||||
|
||||
|
@ -116,7 +113,7 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
|
|||
|
||||
for ( x = table.getMaxEntrySize(); x >= 1; --x ) {
|
||||
if ( left >= x ) {
|
||||
i = table.find( wstring( ptr, x ) );
|
||||
i = table.find( std::u32string( ptr, x ) );
|
||||
|
||||
if ( i != table.end() ) {
|
||||
result.append( i->second );
|
|
@ -9,7 +9,6 @@
|
|||
namespace Transliteration {
|
||||
|
||||
using std::map;
|
||||
using gd::wstring;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
|
@ -28,24 +27,22 @@ public:
|
|||
|
||||
virtual string getName() noexcept;
|
||||
|
||||
virtual map< Dictionary::Property, string > getProperties() noexcept;
|
||||
|
||||
virtual unsigned long getArticleCount() noexcept;
|
||||
|
||||
virtual unsigned long getWordCount() noexcept;
|
||||
|
||||
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept = 0;
|
||||
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept = 0;
|
||||
|
||||
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & );
|
||||
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( std::u32string const & );
|
||||
|
||||
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, unsigned long );
|
||||
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( std::u32string const &, unsigned long );
|
||||
|
||||
virtual sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const &, wstring const &, bool );
|
||||
getArticle( std::u32string const &, vector< std::u32string > const &, std::u32string const &, bool );
|
||||
};
|
||||
|
||||
|
||||
class Table: public map< wstring, wstring >
|
||||
class Table: public map< std::u32string, std::u32string >
|
||||
{
|
||||
unsigned maxEntrySize;
|
||||
|
||||
|
@ -79,7 +76,7 @@ public:
|
|||
TransliterationDictionary(
|
||||
string const & id, string const & name, QIcon icon, Table const & table, bool caseSensitive = true );
|
||||
|
||||
virtual vector< wstring > getAlternateWritings( wstring const & ) noexcept;
|
||||
virtual vector< std::u32string > getAlternateWritings( std::u32string const & ) noexcept;
|
||||
};
|
||||
|
||||
} // namespace Transliteration
|
|
@ -37,50 +37,18 @@ bool tryPossibleZipName( std::string const & name, std::string & copyTo )
|
|||
|
||||
void loadFromFile( std::string const & filename, std::vector< char > & data )
|
||||
{
|
||||
File::Index f( filename, "rb" );
|
||||
File::Index f( filename, QIODevice::ReadOnly );
|
||||
auto size = f.file().size(); // QFile::size() obtains size via statx on Linux
|
||||
data.resize( size );
|
||||
f.read( data.data(), size );
|
||||
}
|
||||
|
||||
void Index::open( char const * mode )
|
||||
{
|
||||
QFile::OpenMode openMode = QIODevice::Text;
|
||||
|
||||
const char * pch = mode;
|
||||
while ( *pch ) {
|
||||
switch ( *pch ) {
|
||||
case 'r':
|
||||
openMode |= QIODevice::ReadOnly;
|
||||
break;
|
||||
case 'w':
|
||||
openMode |= QIODevice::WriteOnly;
|
||||
break;
|
||||
case '+':
|
||||
openMode &= ~( QIODevice::ReadOnly | QIODevice::WriteOnly );
|
||||
openMode |= QIODevice::ReadWrite;
|
||||
break;
|
||||
case 'a':
|
||||
openMode |= QIODevice::Append;
|
||||
break;
|
||||
case 'b':
|
||||
openMode &= ~QIODevice::Text;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
++pch;
|
||||
}
|
||||
|
||||
if ( !f.open( openMode ) ) {
|
||||
throw exCantOpen( f.fileName().toStdString() + ": " + f.errorString().toUtf8().data() );
|
||||
}
|
||||
}
|
||||
|
||||
Index::Index( std::string_view filename, char const * mode )
|
||||
Index::Index( std::string_view filename, QIODevice::OpenMode mode )
|
||||
{
|
||||
f.setFileName( QString::fromUtf8( filename.data(), filename.size() ) );
|
||||
open( mode );
|
||||
if ( !f.open( mode ) ) {
|
||||
throw exCantOpen( ( f.fileName() + ": " + f.errorString() ).toStdString() );
|
||||
}
|
||||
}
|
||||
|
||||
void Index::read( void * buf, qint64 size )
|
||||
|
|
|
@ -43,7 +43,7 @@ public:
|
|||
QMutex lock;
|
||||
|
||||
// Create QFile Object and open() it.
|
||||
Index( std::string_view filename, char const * mode );
|
||||
Index( std::string_view filename, QIODevice::OpenMode mode );
|
||||
|
||||
/// QFile::read & QFile::write , but with exception throwing
|
||||
void read( void * buf, qint64 size );
|
||||
|
@ -81,6 +81,18 @@ public:
|
|||
/// Like the above, but uses its own local internal buffer and strips newlines by default.
|
||||
std::string gets( bool stripNl = true );
|
||||
|
||||
/// Read 32bit as uint, then reading the subsequent data into a container
|
||||
template< typename T >
|
||||
void readU32SizeAndData( T & container )
|
||||
{
|
||||
uint32_t size = 0;
|
||||
read( &size, sizeof( uint32_t ) );
|
||||
if ( size > 0 ) {
|
||||
container.resize( size );
|
||||
read( container.data(), size );
|
||||
}
|
||||
};
|
||||
|
||||
/// export QFile::readall
|
||||
QByteArray readall();
|
||||
|
||||
|
@ -113,8 +125,6 @@ public:
|
|||
~Index() noexcept;
|
||||
|
||||
private:
|
||||
// QFile::open but with fopen-like mode settings.
|
||||
void open( char const * mode );
|
||||
|
||||
template< typename T >
|
||||
void readType( T & value )
|
||||
|
|
|
@ -4,10 +4,8 @@
|
|||
#include "indexedzip.hh"
|
||||
#include "zipfile.hh"
|
||||
#include <zlib.h>
|
||||
#include "gddebug.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "iconv.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include <QtCore5Compat/QTextCodec>
|
||||
|
||||
#include <QMutexLocker>
|
||||
|
@ -24,7 +22,7 @@ bool IndexedZip::openZipFile( QString const & name )
|
|||
return zipIsOpen;
|
||||
}
|
||||
|
||||
bool IndexedZip::hasFile( gd::wstring const & name )
|
||||
bool IndexedZip::hasFile( std::u32string const & name )
|
||||
{
|
||||
if ( !zipIsOpen ) {
|
||||
return false;
|
||||
|
@ -35,7 +33,7 @@ bool IndexedZip::hasFile( gd::wstring const & name )
|
|||
return !links.empty();
|
||||
}
|
||||
|
||||
bool IndexedZip::loadFile( gd::wstring const & name, vector< char > & data )
|
||||
bool IndexedZip::loadFile( std::u32string const & name, vector< char > & data )
|
||||
{
|
||||
if ( !zipIsOpen ) {
|
||||
return false;
|
||||
|
@ -67,7 +65,7 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )
|
|||
if ( !ZipFile::readLocalHeader( zip, header ) ) {
|
||||
vector< string > zipFileNames;
|
||||
zip.getFilenames( zipFileNames );
|
||||
GD_DPRINTF( "Failed to load header" );
|
||||
qDebug( "Failed to load header" );
|
||||
string filename;
|
||||
if ( zip.getCurrentFile() < zipFileNames.size() ) {
|
||||
filename = zipFileNames.at( zip.getCurrentFile() );
|
||||
|
@ -81,7 +79,7 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )
|
|||
|
||||
switch ( header.compressionMethod ) {
|
||||
case ZipFile::Uncompressed:
|
||||
GD_DPRINTF( "Uncompressed" );
|
||||
qDebug( "Uncompressed" );
|
||||
data.resize( header.uncompressedSize );
|
||||
return (size_t)zip.read( &data.front(), data.size() ) == data.size();
|
||||
|
||||
|
@ -111,7 +109,7 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )
|
|||
}
|
||||
|
||||
if ( inflate( &stream, Z_FINISH ) != Z_STREAM_END ) {
|
||||
GD_DPRINTF( "Not zstream end!" );
|
||||
qDebug( "Not zstream end!" );
|
||||
|
||||
data.clear();
|
||||
|
||||
|
@ -156,7 +154,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
|
|||
|
||||
while ( ZipFile::readNextEntry( zip, entry ) ) {
|
||||
if ( entry.compressionMethod == ZipFile::Unsupported ) {
|
||||
qWarning( "Zip warning: compression method unsupported -- skipping file \"%s\"\n", entry.fileName.data() );
|
||||
qWarning( "Zip warning: compression method unsupported -- skipping file \"%s\"", entry.fileName.data() );
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -181,7 +179,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
|
|||
if ( !hasNonAscii ) {
|
||||
// Add entry as is
|
||||
|
||||
zipFileNames.addSingleWord( Utf8::decode( entry.fileName.data() ), entry.localHeaderOffset );
|
||||
zipFileNames.addSingleWord( Text::toUtf32( entry.fileName.data() ), entry.localHeaderOffset );
|
||||
if ( filesCount ) {
|
||||
*filesCount += 1;
|
||||
}
|
||||
|
@ -193,7 +191,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
|
|||
|
||||
// Utf8
|
||||
try {
|
||||
wstring decoded = Utf8::decode( entry.fileName.constData() );
|
||||
std::u32string decoded = Text::toUtf32( entry.fileName.constData() );
|
||||
|
||||
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
|
||||
if ( filesCount != 0 && !alreadyCounted ) {
|
||||
|
@ -201,12 +199,12 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
|
|||
alreadyCounted = true;
|
||||
}
|
||||
}
|
||||
catch ( Utf8::exCantDecode & ) {
|
||||
catch ( Text::exCantDecode & ) {
|
||||
// Failed to decode
|
||||
}
|
||||
|
||||
if ( !entry.fileNameInUTF8 ) {
|
||||
wstring nameInSystemLocale;
|
||||
std::u32string nameInSystemLocale;
|
||||
|
||||
// System locale
|
||||
if ( localeCodec ) {
|
||||
|
@ -225,7 +223,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
|
|||
|
||||
// CP866
|
||||
try {
|
||||
wstring decoded = Iconv::toWstring( "CP866", entry.fileName.constData(), entry.fileName.size() );
|
||||
std::u32string decoded = Iconv::toWstring( "CP866", entry.fileName.constData(), entry.fileName.size() );
|
||||
|
||||
if ( nameInSystemLocale != decoded ) {
|
||||
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
|
||||
|
@ -242,7 +240,7 @@ bool IndexedZip::indexFile( BtreeIndexing::IndexedWords & zipFileNames, quint32
|
|||
|
||||
// CP1251
|
||||
try {
|
||||
wstring decoded = Iconv::toWstring( "CP1251", entry.fileName.constData(), entry.fileName.size() );
|
||||
std::u32string decoded = Iconv::toWstring( "CP1251", entry.fileName.constData(), entry.fileName.size() );
|
||||
|
||||
if ( nameInSystemLocale != decoded ) {
|
||||
zipFileNames.addSingleWord( decoded, entry.localHeaderOffset );
|
||||
|
|
|
@ -37,11 +37,11 @@ public:
|
|||
|
||||
/// Checks whether the given file exists in the zip file or not.
|
||||
/// Note that this function is thread-safe, since it does not access zip file.
|
||||
bool hasFile( gd::wstring const & name );
|
||||
bool hasFile( std::u32string const & name );
|
||||
|
||||
/// Attempts loading the given file into the given vector. Returns true on
|
||||
/// success, false otherwise.
|
||||
bool loadFile( gd::wstring const & name, std::vector< char > & );
|
||||
bool loadFile( std::u32string const & name, std::vector< char > & );
|
||||
bool loadFile( uint32_t offset, std::vector< char > & );
|
||||
|
||||
/// Index compressed files in zip file
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
#ifndef NO_TTS_SUPPORT
|
||||
#ifdef TTS_SUPPORT
|
||||
|
||||
#include "voiceengines.hh"
|
||||
#include "audiolink.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "utf8.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "text.hh"
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
@ -21,6 +20,7 @@ namespace VoiceEngines {
|
|||
|
||||
using namespace Dictionary;
|
||||
using std::string;
|
||||
using std::u32string;
|
||||
using std::map;
|
||||
|
||||
inline string toMd5( QByteArray const & b )
|
||||
|
@ -47,10 +47,6 @@ public:
|
|||
return voiceEngine.name.toUtf8().data();
|
||||
}
|
||||
|
||||
map< Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
|
@ -62,16 +58,18 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long maxResults ) override;
|
||||
sptr< WordSearchRequest > prefixMatch( u32string const & word, unsigned long maxResults ) override;
|
||||
|
||||
sptr< DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ) override;
|
||||
sptr< DataRequest >
|
||||
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ) override;
|
||||
|
||||
protected:
|
||||
|
||||
void loadIcon() noexcept override;
|
||||
};
|
||||
|
||||
sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( wstring const & /*word*/, unsigned long /*maxResults*/ )
|
||||
sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( u32string const & /*word*/,
|
||||
unsigned long /*maxResults*/ )
|
||||
|
||||
{
|
||||
WordSearchRequestInstant * sr = new WordSearchRequestInstant();
|
||||
|
@ -80,11 +78,11 @@ sptr< WordSearchRequest > VoiceEnginesDictionary::prefixMatch( wstring const & /
|
|||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
VoiceEnginesDictionary::getArticle( wstring const & word, vector< wstring > const &, wstring const &, bool )
|
||||
VoiceEnginesDictionary::getArticle( u32string const & word, vector< u32string > const &, u32string const &, bool )
|
||||
|
||||
{
|
||||
string result;
|
||||
string wordUtf8( Utf8::encode( word ) );
|
||||
string wordUtf8( Text::toUtf8( word ) );
|
||||
|
||||
result += "<table class=\"voiceengines_play\"><tr>";
|
||||
|
||||
|
@ -139,4 +137,4 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const
|
|||
|
||||
} // namespace VoiceEngines
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -1,20 +1,17 @@
|
|||
/* This file is (c) 2013 Timon Wong <timon86.wang@gmail.com>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
#pragma once
|
||||
#ifndef NO_TTS_SUPPORT
|
||||
#ifdef TTS_SUPPORT
|
||||
|
||||
#include "dictionary.hh"
|
||||
#include "config.hh"
|
||||
#include "wstring.hh"
|
||||
|
||||
#include "text.hh"
|
||||
#include <QCryptographicHash>
|
||||
|
||||
|
||||
namespace VoiceEngines {
|
||||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
|
||||
vector< sptr< Dictionary::Class > > makeDictionaries( Config::VoiceEngines const & voiceEngines );
|
||||
|
||||
|
|
|
@ -2,13 +2,11 @@
|
|||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "website.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include <QUrl>
|
||||
#include <QTextCodec>
|
||||
#include <QDir>
|
||||
#include <QFileInfo>
|
||||
#include "gddebug.hh"
|
||||
#include "globalbroadcaster.hh"
|
||||
#include "fmt/compile.h"
|
||||
|
||||
|
@ -22,7 +20,6 @@ namespace {
|
|||
|
||||
class WebSiteDictionary: public Dictionary::Class
|
||||
{
|
||||
string name;
|
||||
QByteArray urlTemplate;
|
||||
bool experimentalIframe;
|
||||
QString iconFilename;
|
||||
|
@ -38,12 +35,13 @@ public:
|
|||
bool inside_iframe_,
|
||||
QNetworkAccessManager & netMgr_ ):
|
||||
Dictionary::Class( id, vector< string >() ),
|
||||
name( name_ ),
|
||||
iconFilename( iconFilename_ ),
|
||||
inside_iframe( inside_iframe_ ),
|
||||
netMgr( netMgr_ ),
|
||||
experimentalIframe( false )
|
||||
{
|
||||
dictionaryName = name_;
|
||||
|
||||
if ( urlTemplate_.startsWith( "http://" ) || urlTemplate_.startsWith( "https://" ) ) {
|
||||
experimentalIframe = true;
|
||||
}
|
||||
|
@ -53,16 +51,6 @@ public:
|
|||
dictionaryDescription = urlTemplate_;
|
||||
}
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
map< Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return 0;
|
||||
|
@ -73,10 +61,12 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > prefixMatch( wstring const & word, unsigned long ) override;
|
||||
sptr< WordSearchRequest > prefixMatch( std::u32string const & word, unsigned long ) override;
|
||||
|
||||
sptr< DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const & context, bool ) override;
|
||||
sptr< DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const & context,
|
||||
bool ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -101,7 +91,7 @@ protected slots:
|
|||
virtual void requestFinished( QNetworkReply * ) {}
|
||||
};
|
||||
|
||||
sptr< WordSearchRequest > WebSiteDictionary::prefixMatch( wstring const & /*word*/, unsigned long )
|
||||
sptr< WordSearchRequest > WebSiteDictionary::prefixMatch( std::u32string const & /*word*/, unsigned long )
|
||||
{
|
||||
sptr< WordSearchRequestInstant > sr = std::make_shared< WordSearchRequestInstant >();
|
||||
|
||||
|
@ -304,9 +294,9 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
|
|||
}
|
||||
else {
|
||||
if ( netReply->url().scheme() == "file" ) {
|
||||
gdWarning( "WebSites: Failed loading article from \"%s\", reason: %s\n",
|
||||
dictPtr->getName().c_str(),
|
||||
netReply->errorString().toUtf8().data() );
|
||||
qWarning( "WebSites: Failed loading article from \"%s\", reason: %s",
|
||||
dictPtr->getName().c_str(),
|
||||
netReply->errorString().toUtf8().data() );
|
||||
}
|
||||
else {
|
||||
setErrorString( netReply->errorString() );
|
||||
|
@ -319,9 +309,9 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< DataRequest > WebSiteDictionary::getArticle( wstring const & str,
|
||||
vector< wstring > const & /*alts*/,
|
||||
wstring const & context,
|
||||
sptr< DataRequest > WebSiteDictionary::getArticle( std::u32string const & str,
|
||||
vector< std::u32string > const & /*alts*/,
|
||||
std::u32string const & context,
|
||||
bool /*ignoreDiacritics*/ )
|
||||
{
|
||||
QString urlString = Utils::WebSite::urlReplaceWord( QString( urlTemplate ), QString::fromStdU32String( str ) );
|
||||
|
@ -478,7 +468,8 @@ void WebSiteDictionary::loadIcon() noexcept
|
|||
loadIconFromFile( fInfo.absoluteFilePath(), true );
|
||||
}
|
||||
}
|
||||
if ( dictionaryIcon.isNull() && !loadIconFromText( ":/icons/webdict.svg", QString::fromStdString( name ) ) ) {
|
||||
if ( dictionaryIcon.isNull()
|
||||
&& !loadIconFromText( ":/icons/webdict.svg", QString::fromStdString( dictionaryName ) ) ) {
|
||||
dictionaryIcon = QIcon( ":/icons/webdict.svg" );
|
||||
}
|
||||
dictionaryIconLoaded = true;
|
||||
|
|
109
src/dict/xdxf.cc
109
src/dict/xdxf.cc
|
@ -4,11 +4,10 @@
|
|||
#include "xdxf.hh"
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "chunkedstorage.hh"
|
||||
#include "dictzip.hh"
|
||||
#include "htmlescape.hh"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
@ -16,30 +15,19 @@
|
|||
#include <list>
|
||||
#include <wctype.h>
|
||||
#include <stdlib.h>
|
||||
#include "gddebug.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "xdxf2html.hh"
|
||||
#include "ufile.hh"
|
||||
#include "dictzip.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "indexedzip.hh"
|
||||
#include "filetype.hh"
|
||||
#include "tiff.hh"
|
||||
#include "ftshelpers.hh"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include <QIODevice>
|
||||
#include <QXmlStreamReader>
|
||||
#include <QTextDocument>
|
||||
#include <QFileInfo>
|
||||
#include <QDir>
|
||||
#include <QPainter>
|
||||
#include <QRegularExpression>
|
||||
#include <QSemaphore>
|
||||
#include <QThreadPool>
|
||||
#include <QAtomicInt>
|
||||
|
||||
#include "utils.hh"
|
||||
|
@ -51,7 +39,6 @@ using std::multimap;
|
|||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using std::vector;
|
||||
using std::list;
|
||||
|
||||
|
@ -125,7 +112,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -152,16 +139,6 @@ public:
|
|||
|
||||
~XdxfDictionary();
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
return idxHeader.articleCount;
|
||||
|
@ -182,8 +159,10 @@ public:
|
|||
return idxHeader.langTo;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -228,7 +207,7 @@ private:
|
|||
|
||||
XdxfDictionary::XdxfDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() )
|
||||
{
|
||||
// Read the dictionary name
|
||||
|
@ -392,14 +371,14 @@ void XdxfDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
}
|
||||
|
||||
|
||||
gdDebug( "Xdxf: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Xdxf: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Xdxf: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Xdxf: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( ftsIdxName.c_str() );
|
||||
}
|
||||
}
|
||||
|
@ -413,7 +392,7 @@ void XdxfDictionary::getArticleText( uint32_t articleAddress, QString & headword
|
|||
text = Html::unescape( QString::fromStdString( articleStr ) );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Xdxf: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Xdxf: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -433,8 +412,8 @@ XdxfDictionary::getSearchResults( QString const & searchString, int searchMode,
|
|||
class XdxfArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
std::u32string word;
|
||||
vector< std::u32string > alts;
|
||||
XdxfDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -443,8 +422,8 @@ class XdxfArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
XdxfArticleRequest( wstring const & word_,
|
||||
vector< wstring > const & alts_,
|
||||
XdxfArticleRequest( std::u32string const & word_,
|
||||
vector< std::u32string > const & alts_,
|
||||
XdxfDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( word_ ),
|
||||
|
@ -489,13 +468,13 @@ void XdxfArticleRequest::run()
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -524,12 +503,12 @@ void XdxfArticleRequest::run()
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< std::u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
|
||||
|
@ -537,7 +516,7 @@ void XdxfArticleRequest::run()
|
|||
articlesIncluded.insert( x.articleOffset );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "XDXF: Failed loading article from \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
|
||||
qWarning( "XDXF: Failed loading article from \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -549,7 +528,7 @@ void XdxfArticleRequest::run()
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< std::u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
string cleaner = Utils::Html::getHtmlCleaner();
|
||||
|
||||
|
@ -576,9 +555,9 @@ void XdxfArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > XdxfDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > XdxfDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -878,7 +857,7 @@ void indexArticle( GzippedFile & gzFile,
|
|||
|
||||
if ( words.empty() ) {
|
||||
// Nothing to index, this article didn't have any tags
|
||||
gdWarning( "No <k> tags found in an article at offset 0x%x, article skipped.\n", (unsigned)articleOffset );
|
||||
qWarning( "No <k> tags found in an article at offset 0x%x, article skipped.", (unsigned)articleOffset );
|
||||
}
|
||||
else {
|
||||
// Add an entry
|
||||
|
@ -898,7 +877,7 @@ void indexArticle( GzippedFile & gzFile,
|
|||
// Add also first header - it's needed for full-text search
|
||||
chunks.addToBlock( words.begin()->toUtf8().data(), words.begin()->toUtf8().length() + 1 );
|
||||
|
||||
// GD_DPRINTF( "%x: %s\n", articleOffset, words.begin()->toUtf8().data() );
|
||||
// qDebug( "%x: %s", articleOffset, words.begin()->toUtf8().data() );
|
||||
|
||||
// Add words to index
|
||||
|
||||
|
@ -973,7 +952,7 @@ void XdxfResourceRequest::run()
|
|||
|
||||
string n = dict.getContainingFolder().toStdString() + Utils::Fs::separator() + resourceName;
|
||||
|
||||
GD_DPRINTF( "xdxf resource name is %s\n", n.c_str() );
|
||||
qDebug( "xdxf resource name is %s", n.c_str() );
|
||||
|
||||
try {
|
||||
try {
|
||||
|
@ -995,7 +974,7 @@ void XdxfResourceRequest::run()
|
|||
if ( dict.resourceZip.isOpen() ) {
|
||||
QMutexLocker _( &dataMutex );
|
||||
|
||||
if ( !dict.resourceZip.loadFile( Utf8::decode( resourceName ), data ) ) {
|
||||
if ( !dict.resourceZip.loadFile( Text::toUtf32( resourceName ), data ) ) {
|
||||
throw; // Make it fail since we couldn't read the archive
|
||||
}
|
||||
}
|
||||
|
@ -1016,10 +995,10 @@ void XdxfResourceRequest::run()
|
|||
hasAnyData = true;
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "XDXF: Failed loading resource \"%s\" for \"%s\", reason: %s\n",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "XDXF: Failed loading resource \"%s\" for \"%s\", reason: %s",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
// Resource not loaded -- we don't set the hasAnyData flag then
|
||||
}
|
||||
|
||||
|
@ -1073,11 +1052,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
// Building the index
|
||||
|
||||
gdDebug( "Xdxf: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
qDebug( "Xdxf: Building the index for dictionary: %s", fileName.c_str() );
|
||||
|
||||
//initializing.indexingDictionary( nameFromFileName( dictFiles[ 0 ] ) );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
|
||||
IdxHeader idxHeader;
|
||||
map< string, string > abrv;
|
||||
|
@ -1162,7 +1141,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
chunks.addToBlock( n.data(), n.size() );
|
||||
}
|
||||
else {
|
||||
GD_DPRINTF( "Warning: duplicate full_name in %s\n", dictFiles[ 0 ].c_str() );
|
||||
qDebug( "Warning: duplicate full_name in %s", dictFiles[ 0 ].c_str() );
|
||||
}
|
||||
}
|
||||
else if ( stream.name() == u"description" ) {
|
||||
|
@ -1186,7 +1165,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
chunks.addToBlock( n.data(), n.size() );
|
||||
}
|
||||
else {
|
||||
GD_DPRINTF( "Warning: duplicate description in %s\n", dictFiles[ 0 ].c_str() );
|
||||
qDebug( "Warning: duplicate description in %s", dictFiles[ 0 ].c_str() );
|
||||
}
|
||||
}
|
||||
else if ( stream.name() == u"languages" ) {
|
||||
|
@ -1216,7 +1195,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
else if ( stream.name() == u"abbreviations" ) {
|
||||
QString s;
|
||||
string value;
|
||||
list< wstring > keys;
|
||||
list< std::u32string > keys;
|
||||
while ( !( stream.isEndElement() && stream.name() == u"abbreviations" ) && !stream.atEnd() ) {
|
||||
if ( !stream.readNextStartElement() ) {
|
||||
break;
|
||||
|
@ -1232,7 +1211,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
s = readElementText( stream );
|
||||
value = Folding::trimWhitespace( s ).toStdString();
|
||||
for ( const auto & key : keys ) {
|
||||
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
|
||||
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
|
||||
}
|
||||
keys.clear();
|
||||
}
|
||||
|
@ -1252,7 +1231,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
s = readElementText( stream );
|
||||
value = Folding::trimWhitespace( s ).toStdString();
|
||||
for ( const auto & key : keys ) {
|
||||
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
|
||||
abrv[ Text::toUtf8( Folding::trimWhitespace( key ) ) ] = value;
|
||||
}
|
||||
keys.clear();
|
||||
}
|
||||
|
@ -1312,7 +1291,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// If there was a zip file, index it too
|
||||
|
||||
if ( zipFileName.size() ) {
|
||||
GD_DPRINTF( "Indexing zip file\n" );
|
||||
qDebug( "Indexing zip file" );
|
||||
|
||||
idxHeader.hasZipFile = 1;
|
||||
|
||||
|
@ -1363,17 +1342,17 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
|
||||
if ( stream.hasError() ) {
|
||||
gdWarning( "%s had a parse error %s at line %lu, and therefore was indexed only up to the point of error.",
|
||||
dictFiles[ 0 ].c_str(),
|
||||
stream.errorString().toUtf8().data(),
|
||||
(unsigned long)stream.lineNumber() );
|
||||
qWarning( "%s had a parse error %s at line %lu, and therefore was indexed only up to the point of error.",
|
||||
dictFiles[ 0 ].c_str(),
|
||||
stream.errorString().toUtf8().data(),
|
||||
(unsigned long)stream.lineNumber() );
|
||||
}
|
||||
}
|
||||
|
||||
dictionaries.push_back( std::make_shared< XdxfDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Xdxf dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Xdxf dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,9 +3,7 @@
|
|||
|
||||
#include "xdxf2html.hh"
|
||||
#include <QtXml>
|
||||
#include "gddebug.hh"
|
||||
#include "utf8.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "text.hh"
|
||||
#include "folding.hh"
|
||||
|
||||
#include "audiolink.hh"
|
||||
|
@ -131,21 +129,21 @@ string convert( string const & in,
|
|||
|
||||
#if ( QT_VERSION < QT_VERSION_CHECK( 6, 5, 0 ) )
|
||||
if ( !dd.setContent( QByteArray( in_data.c_str() ), false, &errorStr, &errorLine, &errorColumn ) ) {
|
||||
qWarning( "Xdxf2html error, xml parse failed: %s at %d,%d\n",
|
||||
qWarning( "Xdxf2html error, xml parse failed: %s at %d,%d",
|
||||
errorStr.toLocal8Bit().constData(),
|
||||
errorLine,
|
||||
errorColumn );
|
||||
gdWarning( "The input was: %s\n", in_data.c_str() );
|
||||
qWarning( "The input was: %s", in_data.c_str() );
|
||||
return in;
|
||||
}
|
||||
#else
|
||||
auto setContentResult = dd.setContent( QByteArray::fromStdString( in_data ) );
|
||||
if ( !setContentResult ) {
|
||||
qWarning( "Xdxf2html error, xml parse failed: %s at %lld,%lld\n",
|
||||
qWarning( "Xdxf2html error, xml parse failed: %s at %lld,%lld",
|
||||
setContentResult.errorMessage.toStdString().c_str(),
|
||||
setContentResult.errorLine,
|
||||
setContentResult.errorColumn );
|
||||
gdWarning( "The input was: %s\n", in_data.c_str() );
|
||||
qWarning( "The input was: %s", in_data.c_str() );
|
||||
return in;
|
||||
}
|
||||
#endif
|
||||
|
@ -443,7 +441,7 @@ string convert( string const & in,
|
|||
if ( i != pAbrv->end() ) {
|
||||
string title;
|
||||
|
||||
if ( Utf8::decode( i->second ).size() < 70 ) {
|
||||
if ( Text::toUtf32( i->second ).size() < 70 ) {
|
||||
// Replace all spaces with non-breakable ones, since that's how Lingvo shows tooltips
|
||||
title.reserve( i->second.size() );
|
||||
|
||||
|
@ -467,7 +465,7 @@ string convert( string const & in,
|
|||
else {
|
||||
title = i->second;
|
||||
}
|
||||
el.setAttribute( "title", QString::fromStdU32String( Utf8::decode( title ) ) );
|
||||
el.setAttribute( "title", QString::fromStdU32String( Text::toUtf32( title ) ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -629,7 +627,7 @@ string convert( string const & in,
|
|||
|
||||
// if( type == XDXF && dictPtr != NULL && !el.hasAttribute( "start" ) )
|
||||
if ( dictPtr != NULL && !el.hasAttribute( "start" ) ) {
|
||||
string filename = Utf8::encode( el.text().toStdU32String() );
|
||||
string filename = Text::toUtf8( el.text().toStdU32String() );
|
||||
|
||||
if ( Filetype::isNameOfPicture( filename ) ) {
|
||||
QUrl url;
|
||||
|
|
|
@ -5,10 +5,8 @@
|
|||
|
||||
#include "zim.hh"
|
||||
#include "btreeidx.hh"
|
||||
|
||||
#include "folding.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "langcoder.hh"
|
||||
#include "filetype.hh"
|
||||
#include "dictfile.hh"
|
||||
|
@ -17,24 +15,18 @@
|
|||
#include "ftshelpers.hh"
|
||||
#include "htmlescape.hh"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include <QByteArray>
|
||||
#include <QFile>
|
||||
#include <QString>
|
||||
#include <QAtomicInt>
|
||||
#include <QImage>
|
||||
#include <QDir>
|
||||
|
||||
#include <QRegularExpression>
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <QtConcurrent>
|
||||
#include <QtConcurrentRun>
|
||||
#include <utility>
|
||||
#include "globalregex.hh"
|
||||
#include <zim/zim.h>
|
||||
|
@ -46,12 +38,12 @@
|
|||
namespace Zim {
|
||||
|
||||
using std::string;
|
||||
using std::u32string;
|
||||
using std::map;
|
||||
using std::vector;
|
||||
using std::multimap;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using gd::wstring;
|
||||
|
||||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
|
@ -93,7 +85,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
// Some supporting functions
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -168,15 +160,6 @@ public:
|
|||
|
||||
~ZimDictionary() = default;
|
||||
|
||||
string getName() noexcept override
|
||||
{
|
||||
return dictionaryName;
|
||||
}
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
|
@ -199,7 +182,7 @@ public:
|
|||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
getArticle( u32string const &, vector< u32string > const & alts, u32string const &, bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -241,7 +224,7 @@ private:
|
|||
|
||||
ZimDictionary::ZimDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
df( dictionaryFiles[ 0 ] )
|
||||
{
|
||||
|
@ -295,7 +278,7 @@ void ZimDictionary::loadIcon() noexcept
|
|||
return;
|
||||
}
|
||||
catch ( zim::EntryNotFound & e ) {
|
||||
gdDebug( "ZIM icon not loaded for: %s", dictionaryName.c_str() );
|
||||
qDebug( "ZIM icon not loaded for: %s", dictionaryName.c_str() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -497,13 +480,13 @@ void ZimDictionary::makeFTSIndex( QAtomicInt & isCancelled )
|
|||
return;
|
||||
}
|
||||
|
||||
gdDebug( "Zim: Building the full-text index for dictionary: %s\n", getName().c_str() );
|
||||
qDebug( "Zim: Building the full-text index for dictionary: %s", getName().c_str() );
|
||||
try {
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Zim: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Zim: Failed building full-text search index for \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
QFile::remove( ftsIdxName.c_str() );
|
||||
}
|
||||
}
|
||||
|
@ -518,7 +501,7 @@ void ZimDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
text = Html::unescape( QString::fromUtf8( articleText.data(), articleText.size() ) );
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "Zim: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
|
||||
qWarning( "Zim: Failed retrieving article from \"%s\", reason: %s", getName().c_str(), ex.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -536,8 +519,8 @@ ZimDictionary::getSearchResults( QString const & searchString, int searchMode, b
|
|||
|
||||
class ZimArticleRequest: public Dictionary::DataRequest
|
||||
{
|
||||
wstring word;
|
||||
vector< wstring > alts;
|
||||
u32string word;
|
||||
vector< u32string > alts;
|
||||
ZimDictionary & dict;
|
||||
bool ignoreDiacritics;
|
||||
|
||||
|
@ -546,7 +529,10 @@ class ZimArticleRequest: public Dictionary::DataRequest
|
|||
|
||||
public:
|
||||
|
||||
ZimArticleRequest( wstring word_, vector< wstring > const & alts_, ZimDictionary & dict_, bool ignoreDiacritics_ ):
|
||||
ZimArticleRequest( u32string word_,
|
||||
vector< u32string > const & alts_,
|
||||
ZimDictionary & dict_,
|
||||
bool ignoreDiacritics_ ):
|
||||
word( std::move( word_ ) ),
|
||||
alts( alts_ ),
|
||||
dict( dict_ ),
|
||||
|
@ -588,13 +574,13 @@ void ZimArticleRequest::run()
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
|
||||
multimap< u32string, pair< string, string > > mainArticles, alternateArticles;
|
||||
|
||||
set< quint32 > articlesIncluded; // Some synonyms make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -631,12 +617,12 @@ void ZimArticleRequest::run()
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
u32string headwordStripped = Folding::applySimpleCaseOnly( headword );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, pair< string, string > > & mapToUse =
|
||||
multimap< u32string, pair< string, string > > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( pair( Folding::applySimpleCaseOnly( headword ), pair( headword, articleText ) ) );
|
||||
|
@ -655,7 +641,7 @@ void ZimArticleRequest::run()
|
|||
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
||||
string cleaner = Utils::Html::getHtmlCleaner();
|
||||
|
||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||
multimap< u32string, pair< string, string > >::const_iterator i;
|
||||
|
||||
|
||||
for ( i = mainArticles.begin(); i != mainArticles.end(); ++i ) {
|
||||
|
@ -683,9 +669,9 @@ void ZimArticleRequest::run()
|
|||
finish();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > ZimDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > ZimDictionary::getArticle( u32string const & word,
|
||||
vector< u32string > const & alts,
|
||||
u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -767,10 +753,10 @@ void ZimResourceRequest::run()
|
|||
hasAnyData = true;
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "ZIM: Failed loading resource \"%s\" from \"%s\", reason: %s\n",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
qWarning( "ZIM: Failed loading resource \"%s\" from \"%s\", reason: %s",
|
||||
resourceName.c_str(),
|
||||
dict.getName().c_str(),
|
||||
ex.what() );
|
||||
// Resource not loaded -- we don't set the hasAnyData flag then
|
||||
}
|
||||
|
||||
|
@ -783,7 +769,7 @@ sptr< Dictionary::DataRequest > ZimDictionary::getResource( string const & name
|
|||
return std::make_shared< ZimResourceRequest >( *this, noLeadingDot.toStdString() );
|
||||
}
|
||||
|
||||
wstring normalizeWord( const std::string & url );
|
||||
u32string normalizeWord( const std::string & url );
|
||||
vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames,
|
||||
string const & indicesDir,
|
||||
Dictionary::Initializing & initializing,
|
||||
|
@ -822,7 +808,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
try {
|
||||
//only check zim file.
|
||||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
gdDebug( "Zim: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
qDebug( "Zim: Building the index for dictionary: %s", fileName.c_str() );
|
||||
|
||||
unsigned articleCount = df.getArticleCount();
|
||||
unsigned wordCount = 0;
|
||||
|
@ -832,7 +818,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
initializing.indexingDictionary( firstName.mid( n + 1 ).toUtf8().constData() );
|
||||
}
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
IdxHeader idxHeader;
|
||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||
idxHeader.namePtr = 0xFFFFFFFF;
|
||||
|
@ -866,7 +852,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
if ( maxHeadwordsToExpand > 0 && ( articleCount >= maxHeadwordsToExpand ) ) {
|
||||
if ( !title.empty() ) {
|
||||
wstring word = Utf8::decode( title );
|
||||
u32string word = Text::toUtf32( title );
|
||||
indexedWords.addSingleWord( word, index );
|
||||
}
|
||||
else if ( !url.empty() ) {
|
||||
|
@ -875,7 +861,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
else {
|
||||
if ( !title.empty() ) {
|
||||
auto word = Utf8::decode( title );
|
||||
auto word = Text::toUtf32( title );
|
||||
indexedWords.addWord( word, index );
|
||||
wordCount++;
|
||||
}
|
||||
|
@ -910,17 +896,17 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< ZimDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Zim dictionary initializing failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Zim dictionary initializing failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
continue;
|
||||
}
|
||||
catch ( ... ) {
|
||||
qWarning( "Zim dictionary initializing failed\n" );
|
||||
qWarning( "Zim dictionary initializing failed" );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return dictionaries;
|
||||
}
|
||||
wstring normalizeWord( const std::string & url )
|
||||
u32string normalizeWord( const std::string & url )
|
||||
{
|
||||
auto formattedUrl = QString::fromStdString( url ).remove( RX::Zim::leadingDotSlash );
|
||||
return formattedUrl.toStdU32String();
|
||||
|
|
|
@ -4,13 +4,12 @@
|
|||
#include "zipsounds.hh"
|
||||
#include "dictfile.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "text.hh"
|
||||
#include "btreeidx.hh"
|
||||
|
||||
#include "audiolink.hh"
|
||||
#include "indexedzip.hh"
|
||||
#include "filetype.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "chunkedstorage.hh"
|
||||
#include "htmlescape.hh"
|
||||
|
||||
|
@ -19,16 +18,12 @@
|
|||
#include <QFile>
|
||||
#include <QDir>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include "utils.hh"
|
||||
|
||||
namespace ZipSounds {
|
||||
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using std::map;
|
||||
using std::multimap;
|
||||
using std::set;
|
||||
|
@ -60,7 +55,7 @@ static_assert( alignof( IdxHeader ) == 1 );
|
|||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
{
|
||||
File::Index idx( indexFile, "rb" );
|
||||
File::Index idx( indexFile, QIODevice::ReadOnly );
|
||||
|
||||
IdxHeader header;
|
||||
|
||||
|
@ -68,19 +63,19 @@ bool indexIsOldOrBad( string const & indexFile )
|
|||
|| header.formatVersion != CurrentFormatVersion;
|
||||
}
|
||||
|
||||
wstring stripExtension( string const & str )
|
||||
std::u32string stripExtension( string const & str )
|
||||
{
|
||||
wstring name;
|
||||
std::u32string name;
|
||||
try {
|
||||
name = Utf8::decode( str );
|
||||
name = Text::toUtf32( str );
|
||||
}
|
||||
catch ( Utf8::exCantDecode & ) {
|
||||
catch ( Text::exCantDecode & ) {
|
||||
return name;
|
||||
}
|
||||
|
||||
if ( Filetype::isNameOfSound( str ) ) {
|
||||
wstring::size_type pos = name.rfind( L'.' );
|
||||
if ( pos != wstring::npos ) {
|
||||
std::u32string::size_type pos = name.rfind( L'.' );
|
||||
if ( pos != std::u32string::npos ) {
|
||||
name.erase( pos );
|
||||
}
|
||||
|
||||
|
@ -111,10 +106,6 @@ public:
|
|||
|
||||
string getName() noexcept override;
|
||||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
{
|
||||
|
@ -126,8 +117,10 @@ public:
|
|||
return getArticleCount();
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest >
|
||||
getArticle( wstring const &, vector< wstring > const & alts, wstring const &, bool ignoreDiacritics ) override;
|
||||
sptr< Dictionary::DataRequest > getArticle( std::u32string const &,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics ) override;
|
||||
|
||||
sptr< Dictionary::DataRequest > getResource( string const & name ) override;
|
||||
|
||||
|
@ -140,7 +133,7 @@ ZipSoundsDictionary::ZipSoundsDictionary( string const & id,
|
|||
string const & indexFile,
|
||||
vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idx( indexFile, QIODevice::ReadOnly ),
|
||||
idxHeader( idx.read< IdxHeader >() )
|
||||
{
|
||||
chunks = std::shared_ptr< ChunkedStorage::Reader >( new ChunkedStorage::Reader( idx, idxHeader.chunksOffset ) );
|
||||
|
@ -165,9 +158,9 @@ string ZipSoundsDictionary::getName() noexcept
|
|||
return result;
|
||||
}
|
||||
|
||||
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const & word,
|
||||
vector< wstring > const & alts,
|
||||
wstring const &,
|
||||
sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( std::u32string const & word,
|
||||
vector< std::u32string > const & alts,
|
||||
std::u32string const &,
|
||||
bool ignoreDiacritics )
|
||||
|
||||
{
|
||||
|
@ -181,13 +174,13 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
|
|||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
multimap< wstring, uint32_t > mainArticles, alternateArticles;
|
||||
multimap< std::u32string, uint32_t > mainArticles, alternateArticles;
|
||||
|
||||
set< uint32_t > articlesIncluded; // Some synonims make it that the articles
|
||||
// appear several times. We combat this
|
||||
// by only allowing them to appear once.
|
||||
|
||||
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
std::u32string wordCaseFolded = Folding::applySimpleCaseOnly( word );
|
||||
if ( ignoreDiacritics ) {
|
||||
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
|
||||
}
|
||||
|
@ -202,12 +195,12 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
|
|||
|
||||
// We do the case-folded comparison here.
|
||||
|
||||
wstring headwordStripped = Folding::applySimpleCaseOnly( x.word );
|
||||
std::u32string headwordStripped = Folding::applySimpleCaseOnly( x.word );
|
||||
if ( ignoreDiacritics ) {
|
||||
headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );
|
||||
}
|
||||
|
||||
multimap< wstring, uint32_t > & mapToUse =
|
||||
multimap< std::u32string, uint32_t > & mapToUse =
|
||||
( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles;
|
||||
|
||||
mapToUse.insert( std::pair( Folding::applySimpleCaseOnly( x.word ), x.articleOffset ) );
|
||||
|
@ -221,7 +214,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
|
|||
|
||||
string result;
|
||||
|
||||
multimap< wstring, uint32_t >::const_iterator i;
|
||||
multimap< std::u32string, uint32_t >::const_iterator i;
|
||||
|
||||
result += "<table class=\"lsa_play\">";
|
||||
|
||||
|
@ -252,7 +245,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
|
|||
nameBlock += sz;
|
||||
|
||||
string displayedName =
|
||||
mainArticles.size() + alternateArticles.size() > 1 ? name : Utf8::encode( stripExtension( name ) );
|
||||
mainArticles.size() + alternateArticles.size() > 1 ? name : Text::toUtf8( stripExtension( name ) );
|
||||
|
||||
result += "<tr>";
|
||||
|
||||
|
@ -294,7 +287,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const &
|
|||
nameBlock += sz;
|
||||
|
||||
string displayedName =
|
||||
mainArticles.size() + alternateArticles.size() > 1 ? name : Utf8::encode( stripExtension( name ) );
|
||||
mainArticles.size() + alternateArticles.size() > 1 ? name : Text::toUtf8( stripExtension( name ) );
|
||||
|
||||
result += "<tr>";
|
||||
|
||||
|
@ -324,7 +317,7 @@ sptr< Dictionary::DataRequest > ZipSoundsDictionary::getResource( string const &
|
|||
{
|
||||
// Remove extension for sound files (like in sound dirs)
|
||||
|
||||
wstring strippedName = stripExtension( name );
|
||||
std::u32string strippedName = stripExtension( name );
|
||||
|
||||
vector< WordArticleLink > chain = findArticles( strippedName );
|
||||
|
||||
|
@ -403,9 +396,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
string indexFile = indicesDir + dictId;
|
||||
|
||||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) {
|
||||
gdDebug( "Zips: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
qDebug( "Zips: Building the index for dictionary: %s", fileName.c_str() );
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
File::Index idx( indexFile, QIODevice::WriteOnly );
|
||||
IdxHeader idxHeader;
|
||||
|
||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||
|
@ -438,7 +431,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
// Remove extension for sound files (like in sound dirs)
|
||||
|
||||
wstring word = stripExtension( link.word );
|
||||
std::u32string word = stripExtension( link.word );
|
||||
if ( !word.empty() ) {
|
||||
names.addWord( word, offset );
|
||||
}
|
||||
|
@ -477,7 +470,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
dictionaries.push_back( std::make_shared< ZipSoundsDictionary >( dictId, indexFile, dictFiles ) );
|
||||
}
|
||||
catch ( std::exception & e ) {
|
||||
gdWarning( "Zipped sounds pack reading failed: %s, error: %s\n", fileName.c_str(), e.what() );
|
||||
qWarning( "Zipped sounds pack reading failed: %s, error: %s", fileName.c_str(), e.what() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#include <QDir>
|
||||
#include <QTimer>
|
||||
#include "externalviewer.hh"
|
||||
#include "gddebug.hh"
|
||||
|
||||
ExternalViewer::ExternalViewer(
|
||||
const char * data, int size, QString const & extension, QString const & viewerCmdLine_, QObject * parent ):
|
||||
|
@ -26,7 +25,7 @@ ExternalViewer::ExternalViewer(
|
|||
|
||||
tempFile.close();
|
||||
|
||||
GD_DPRINTF( "%s\n", tempFile.fileName().toLocal8Bit().data() );
|
||||
qDebug( "%s", tempFile.fileName().toLocal8Bit().data() );
|
||||
}
|
||||
|
||||
void ExternalViewer::start()
|
||||
|
|
|
@ -5,9 +5,7 @@
|
|||
#include <cstdlib>
|
||||
#include "fulltextsearch.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "wstring_qt.hh"
|
||||
#include "dictfile.hh"
|
||||
#include "gddebug.hh"
|
||||
#include "folding.hh"
|
||||
#include "utils.hh"
|
||||
|
||||
|
@ -253,7 +251,7 @@ void FTSResultsRequest::run()
|
|||
qWarning() << e.get_description().c_str();
|
||||
}
|
||||
catch ( std::exception & ex ) {
|
||||
gdWarning( "FTS: Failed full-text search for \"%s\", reason: %s\n", dict.getName().c_str(), ex.what() );
|
||||
qWarning( "FTS: Failed full-text search for \"%s\", reason: %s", dict.getName().c_str(), ex.what() );
|
||||
// Results not loaded -- we don't set the hasAnyData flag then
|
||||
}
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue