goldendict-ng/winlibs/include/opencc/UTF8StringSlice.hpp

/*
 * Open Chinese Convert
 *
 * Copyright 2015 BYVoid <byvoid@byvoid.com>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "Common.hpp"
#include "UTF8Util.hpp"

namespace opencc {

namespace internal {

inline size_t FNVHash(const char* text, const size_t byteLength,
                      const size_t FNV_prime, const size_t FNV_offset_basis) {
  size_t hash = FNV_offset_basis;
  for (const char* pstr = text; pstr < text + byteLength; pstr++) {
    hash ^= *pstr;
    hash *= FNV_prime;
  }
  return hash;
}

template <int> size_t FNVHash(const char* text, const size_t byteLength);

template <>
inline size_t FNVHash<4>(const char* text, const size_t byteLength) {
  return FNVHash(text, byteLength, 16777619UL, 2166136261UL);
}

#if SIZE_MAX == 0xffffffffffffffff
template <>
inline size_t FNVHash<8>(const char* text, const size_t byteLength) {
  return FNVHash(text, byteLength, 1099511628211UL, 14695981039346656037UL);
}
#endif

} // namespace internal

template <typename LENGTH_TYPE> class UTF8StringSliceBase {
public:
  typedef LENGTH_TYPE LengthType;

  UTF8StringSliceBase(const char* _str)
      : str(_str), utf8Length(static_cast<LengthType>(UTF8Util::Length(_str))),
        byteLength(static_cast<LengthType>(strlen(_str))) {}

  UTF8StringSliceBase(const char* _str, const LengthType _utf8Length)
      : str(_str), utf8Length(_utf8Length) {
    CalculateByteLength();
  }

  UTF8StringSliceBase(const char* _str, const LengthType _utf8Length,
                      const LengthType _byteLength)
      : str(_str), utf8Length(_utf8Length), byteLength(_byteLength) {
    CalculateByteLength();
  }

  LengthType UTF8Length() const { return utf8Length; }

  LengthType ByteLength() const { return byteLength; }

  UTF8StringSliceBase Left(const LengthType numberOfCharacters) const {
    if (numberOfCharacters == UTF8Length()) {
      return *this;
    } else {
      return UTF8StringSliceBase(str, numberOfCharacters);
    }
  }

  UTF8StringSliceBase Right(const LengthType numberOfCharacters) const {
    if (numberOfCharacters == UTF8Length()) {
      return *this;
    } else {
      const char* pstr = str + byteLength;
      for (size_t i = 0; i < numberOfCharacters; i++) {
        pstr = UTF8Util::PrevChar(pstr);
      }
      return UTF8StringSliceBase(pstr, numberOfCharacters);
    }
  }

  UTF8StringSliceBase SubString(const LengthType offset,
                                const LengthType numberOfCharacters) const {
    if (offset == 0) {
      return Left(numberOfCharacters);
    } else {
      const char* pstr = str;
      for (size_t i = 0; i < offset; i++) {
        pstr = UTF8Util::NextChar(pstr);
      }
      return UTF8StringSliceBase(pstr, numberOfCharacters);
    }
  }

  string ToString() const { return string(str, str + byteLength); }

  const char* CString() const { return str; }

  LengthType CommonPrefixLength(const UTF8StringSliceBase& that) const {
    if (str == that.str) {
      return std::min(utf8Length, that.utf8Length);
    } else {
      const char* pstr1 = str;
      const char* pstr2 = that.str;
      for (size_t length = 0; length < utf8Length && length < that.utf8Length;
           length++) {
        size_t charLen1 = UTF8Util::NextCharLength(pstr1);
        size_t charLen2 = UTF8Util::NextCharLength(pstr2);
        if (charLen1 != charLen2 || strncmp(pstr1, pstr2, charLen1) != 0) {
          return length;
        }
        pstr1 += charLen1;
        pstr2 += charLen2;
      }
      return 0;
    }
  }

  void MoveRight() {
    if (utf8Length > 0) {
      const size_t charLen = UTF8Util::NextCharLength(str);
      str += charLen;
      utf8Length--;
      byteLength -= charLen;
    }
  }

  void MoveLeft() {
    if (utf8Length > 0) {
      const size_t charLen = UTF8Util::PrevCharLength(str + byteLength);
      utf8Length--;
      byteLength -= charLen;
    }
  }

  int ReverseCompare(const UTF8StringSliceBase& that) const {
    const char* pstr1 = str + byteLength;
    const char* pstr2 = that.str + that.byteLength;
    const size_t length = std::min(utf8Length, that.utf8Length);
    for (size_t i = 0; i < length; i++) {
      const size_t charLen1 = UTF8Util::PrevCharLength(pstr1);
      const size_t charLen2 = UTF8Util::PrevCharLength(pstr2);
      pstr1 -= charLen1;
      pstr2 -= charLen2;
      const int cmp = strncmp(pstr1, pstr2, std::min(charLen1, charLen2));
      if (cmp < 0) {
        return -1;
      } else if (cmp > 0) {
        return 1;
      } else if (charLen1 < charLen2) {
        return -1;
      } else if (charLen1 > charLen2) {
        return 1;
      }
    }
    if (utf8Length < that.utf8Length) {
      return -1;
    } else if (utf8Length > that.utf8Length) {
      return 1;
    } else {
      return 0;
    }
  }

  LengthType FindBytePosition(const UTF8StringSliceBase& pattern) const {
    return static_cast<LengthType>(
        ToString().find(pattern.str, 0, pattern.byteLength));
  }

  bool operator<(const UTF8StringSliceBase& that) const {
    return Compare(that) < 0;
  }

  bool operator>(const UTF8StringSliceBase& that) const {
    return Compare(that) > 0;
  }

  bool operator==(const UTF8StringSliceBase& that) const {
    return (str == that.str && utf8Length == that.utf8Length) ||
           Compare(that) == 0;
  }

  bool operator!=(const UTF8StringSliceBase& that) const {
    return !this->operator==(that);
  }

  class Hasher {
  public:
    size_t operator()(const UTF8StringSliceBase& text) const {
      return internal::FNVHash<sizeof(size_t)>(text.CString(),
                                               text.ByteLength());
    }
  };

private:
  inline int Compare(const UTF8StringSliceBase& that) const {
    int cmp = strncmp(str, that.str, std::min(byteLength, that.byteLength));
    if (cmp == 0) {
      if (utf8Length < that.utf8Length) {
        cmp = -1;
      } else if (utf8Length > that.utf8Length) {
        cmp = 1;
      } else {
        cmp = 0;
      }
    }
    return cmp;
  }

  void CalculateByteLength() {
    const char* pstr = str;
    for (size_t i = 0; i < utf8Length; i++) {
      pstr = UTF8Util::NextChar(pstr);
    }
    byteLength = static_cast<LengthType>(pstr - str);
  }

  const char* str;
  LengthType utf8Length;
  LengthType byteLength;
};

typedef UTF8StringSliceBase<size_t> UTF8StringSlice;

template <typename LENGTH_TYPE>
std::ostream& operator<<(::std::ostream& os,
                         const UTF8StringSliceBase<LENGTH_TYPE>& str) {
  return os << str.ToString();
}

} // namespace opencc
winlib: update opencc to 2020-04-26 2022-02-11 12:55:16 +00:00			`/*`
			`* Open Chinese Convert`
			`*`
			`* Copyright 2015 BYVoid <byvoid@byvoid.com>`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`#include "Common.hpp"`
			`#include "UTF8Util.hpp"`

			`namespace opencc {`

			`namespace internal {`

			`inline size_t FNVHash(const char* text, const size_t byteLength,`
			`const size_t FNV_prime, const size_t FNV_offset_basis) {`
			`size_t hash = FNV_offset_basis;`
			`for (const char* pstr = text; pstr < text + byteLength; pstr++) {`
			`hash ^= *pstr;`
			`hash *= FNV_prime;`
			`}`
			`return hash;`
			`}`

			`template <int> size_t FNVHash(const char* text, const size_t byteLength);`

			`template <>`
			`inline size_t FNVHash<4>(const char* text, const size_t byteLength) {`
			`return FNVHash(text, byteLength, 16777619UL, 2166136261UL);`
			`}`

			`#if SIZE_MAX == 0xffffffffffffffff`
			`template <>`
			`inline size_t FNVHash<8>(const char* text, const size_t byteLength) {`
			`return FNVHash(text, byteLength, 1099511628211UL, 14695981039346656037UL);`
			`}`
			`#endif`

			`} // namespace internal`

			`template <typename LENGTH_TYPE> class UTF8StringSliceBase {`
			`public:`
			`typedef LENGTH_TYPE LengthType;`

			`UTF8StringSliceBase(const char* _str)`
			`: str(_str), utf8Length(static_cast<LengthType>(UTF8Util::Length(_str))),`
			`byteLength(static_cast<LengthType>(strlen(_str))) {}`

			`UTF8StringSliceBase(const char* _str, const LengthType _utf8Length)`
			`: str(_str), utf8Length(_utf8Length) {`
			`CalculateByteLength();`
			`}`

			`UTF8StringSliceBase(const char* _str, const LengthType _utf8Length,`
			`const LengthType _byteLength)`
			`: str(_str), utf8Length(_utf8Length), byteLength(_byteLength) {`
			`CalculateByteLength();`
			`}`

			`LengthType UTF8Length() const { return utf8Length; }`

			`LengthType ByteLength() const { return byteLength; }`

			`UTF8StringSliceBase Left(const LengthType numberOfCharacters) const {`
			`if (numberOfCharacters == UTF8Length()) {`
			`return *this;`
			`} else {`
			`return UTF8StringSliceBase(str, numberOfCharacters);`
			`}`
			`}`

			`UTF8StringSliceBase Right(const LengthType numberOfCharacters) const {`
			`if (numberOfCharacters == UTF8Length()) {`
			`return *this;`
			`} else {`
			`const char* pstr = str + byteLength;`
			`for (size_t i = 0; i < numberOfCharacters; i++) {`
			`pstr = UTF8Util::PrevChar(pstr);`
			`}`
			`return UTF8StringSliceBase(pstr, numberOfCharacters);`
			`}`
			`}`

			`UTF8StringSliceBase SubString(const LengthType offset,`
			`const LengthType numberOfCharacters) const {`
			`if (offset == 0) {`
			`return Left(numberOfCharacters);`
			`} else {`
			`const char* pstr = str;`
			`for (size_t i = 0; i < offset; i++) {`
			`pstr = UTF8Util::NextChar(pstr);`
			`}`
			`return UTF8StringSliceBase(pstr, numberOfCharacters);`
			`}`
			`}`

			`string ToString() const { return string(str, str + byteLength); }`

			`const char* CString() const { return str; }`

			`LengthType CommonPrefixLength(const UTF8StringSliceBase& that) const {`
			`if (str == that.str) {`
			`return std::min(utf8Length, that.utf8Length);`
			`} else {`
			`const char* pstr1 = str;`
			`const char* pstr2 = that.str;`
			`for (size_t length = 0; length < utf8Length && length < that.utf8Length;`
			`length++) {`
			`size_t charLen1 = UTF8Util::NextCharLength(pstr1);`
			`size_t charLen2 = UTF8Util::NextCharLength(pstr2);`
			`if (charLen1 != charLen2 \|\| strncmp(pstr1, pstr2, charLen1) != 0) {`
			`return length;`
			`}`
			`pstr1 += charLen1;`
			`pstr2 += charLen2;`
			`}`
			`return 0;`
			`}`
			`}`

			`void MoveRight() {`
			`if (utf8Length > 0) {`
			`const size_t charLen = UTF8Util::NextCharLength(str);`
			`str += charLen;`
			`utf8Length--;`
			`byteLength -= charLen;`
			`}`
			`}`

			`void MoveLeft() {`
			`if (utf8Length > 0) {`
			`const size_t charLen = UTF8Util::PrevCharLength(str + byteLength);`
			`utf8Length--;`
			`byteLength -= charLen;`
			`}`
			`}`

			`int ReverseCompare(const UTF8StringSliceBase& that) const {`
			`const char* pstr1 = str + byteLength;`
			`const char* pstr2 = that.str + that.byteLength;`
			`const size_t length = std::min(utf8Length, that.utf8Length);`
			`for (size_t i = 0; i < length; i++) {`
			`const size_t charLen1 = UTF8Util::PrevCharLength(pstr1);`
			`const size_t charLen2 = UTF8Util::PrevCharLength(pstr2);`
			`pstr1 -= charLen1;`
			`pstr2 -= charLen2;`
			`const int cmp = strncmp(pstr1, pstr2, std::min(charLen1, charLen2));`
			`if (cmp < 0) {`
			`return -1;`
			`} else if (cmp > 0) {`
			`return 1;`
			`} else if (charLen1 < charLen2) {`
			`return -1;`
			`} else if (charLen1 > charLen2) {`
			`return 1;`
			`}`
			`}`
			`if (utf8Length < that.utf8Length) {`
			`return -1;`
			`} else if (utf8Length > that.utf8Length) {`
			`return 1;`
			`} else {`
			`return 0;`
			`}`
			`}`

			`LengthType FindBytePosition(const UTF8StringSliceBase& pattern) const {`
			`return static_cast<LengthType>(`
			`ToString().find(pattern.str, 0, pattern.byteLength));`
			`}`

			`bool operator<(const UTF8StringSliceBase& that) const {`
			`return Compare(that) < 0;`
			`}`

			`bool operator>(const UTF8StringSliceBase& that) const {`
			`return Compare(that) > 0;`
			`}`

			`bool operator==(const UTF8StringSliceBase& that) const {`
			`return (str == that.str && utf8Length == that.utf8Length) \|\|`
			`Compare(that) == 0;`
			`}`

			`bool operator!=(const UTF8StringSliceBase& that) const {`
			`return !this->operator==(that);`
			`}`

			`class Hasher {`
			`public:`
			`size_t operator()(const UTF8StringSliceBase& text) const {`
			`return internal::FNVHash<sizeof(size_t)>(text.CString(),`
			`text.ByteLength());`
			`}`
			`};`

			`private:`
			`inline int Compare(const UTF8StringSliceBase& that) const {`
			`int cmp = strncmp(str, that.str, std::min(byteLength, that.byteLength));`
			`if (cmp == 0) {`
			`if (utf8Length < that.utf8Length) {`
			`cmp = -1;`
			`} else if (utf8Length > that.utf8Length) {`
			`cmp = 1;`
			`} else {`
			`cmp = 0;`
			`}`
			`}`
			`return cmp;`
			`}`

			`void CalculateByteLength() {`
			`const char* pstr = str;`
			`for (size_t i = 0; i < utf8Length; i++) {`
			`pstr = UTF8Util::NextChar(pstr);`
			`}`
			`byteLength = static_cast<LengthType>(pstr - str);`
			`}`

			`const char* str;`
			`LengthType utf8Length;`
			`LengthType byteLength;`
			`};`

			`typedef UTF8StringSliceBase<size_t> UTF8StringSlice;`

			`template <typename LENGTH_TYPE>`
			`std::ostream& operator<<(::std::ostream& os,`
			`const UTF8StringSliceBase<LENGTH_TYPE>& str) {`
			`return os << str.ToString();`
			`}`

			`} // namespace opencc`