diff --git a/src/strategies/avx2/depquant-avx2.c b/src/strategies/avx2/depquant-avx2.c
index a6ac5a90..357932f9 100644
--- a/src/strategies/avx2/depquant-avx2.c
+++ b/src/strategies/avx2/depquant-avx2.c
@@ -104,7 +104,7 @@ static void check_rd_costs_avx2(const all_depquant_states* const state, const en
       value = _mm_min_epi32(value, max_rice);
       // In the original implementation the goRiceTab is selected beforehand, but since we need to load from
       // potentially four different locations, we need to calculate the offsets and use gather
-      __m128i go_rice_tab = _mm_cvtepi8_epi32(_mm_loadu_si32(&state->m_goRicePar[start]));
+      __m128i go_rice_tab = _mm_cvtepi8_epi32(_mm_loadu_si128((__m128i *)&state->m_goRicePar[start]));
       go_rice_tab = _mm_slli_epi32(go_rice_tab, 5);
       value = _mm_add_epi32(value, go_rice_tab);
 
@@ -144,7 +144,7 @@ static void check_rd_costs_avx2(const all_depquant_states* const state, const en
 
       __m128i max_rice = _mm_set1_epi32(31);
       value = _mm_min_epi32(value, max_rice);
-      __m128i go_rice_tab = _mm_cvtepi8_epi32(_mm_loadu_si32(&state->m_goRicePar[start]));
+      __m128i go_rice_tab = _mm_cvtepi8_epi32(_mm_loadu_si128((__m128i*)&state->m_goRicePar[start]));
       go_rice_tab = _mm_slli_epi32(go_rice_tab, 5);
       value = _mm_add_epi32(value, go_rice_tab);
 
@@ -727,7 +727,7 @@ static void update_state_eos_avx2(context_store* ctxs, const uint32_t scan_pos,
       sig_sbb = _mm_min_epi32(sig_sbb, _mm_set1_epi32(1));
       // Gather is not necessary here put it would require at least five operation to do the same thing
       // so the performance gain in my opinion is not worth the readability loss
-      __m256i sbb_frac_bits = _mm256_i32gather_epi64((int64_t *)cc->m_sbbFlagBits[0], sig_sbb, 8);
+      __m256i sbb_frac_bits = _mm256_i32gather_epi64((const long long int *)cc->m_sbbFlagBits[0], sig_sbb, 8);
       _mm256_store_si256((__m256i*)state->m_sbbFracBits[state_offset], sbb_frac_bits);
 
       memset(&state->m_numSigSbb[state_offset], 0, 4);
@@ -868,7 +868,7 @@ static void update_state_eos_avx2(context_store* ctxs, const uint32_t scan_pos,
     __m128i offsets = _mm_set_epi32(12 * 3, 12 * 2, 12 * 1, 12 * 0);
     offsets = _mm_add_epi32(offsets, _mm_set1_epi32(sigCtxOffsetNext));
     offsets         = _mm_add_epi32(offsets, sum_abs_min);
-    __m256i sig_frac_bits = _mm256_i32gather_epi64((const int64_t *)&state->m_sigFracBitsArray[state_offset][0][0], offsets, 8);
+    __m256i sig_frac_bits = _mm256_i32gather_epi64((long long const*)&state->m_sigFracBitsArray[state_offset][0][0], offsets, 8);
     _mm256_store_si256((__m256i*)&state->m_sigFracBits[state_offset][0], sig_frac_bits);
 
 
@@ -959,7 +959,7 @@ static INLINE void update_states_avx2(
 
       // Again gather is not necessary but it is easier to read and shouldn't have too large of a performance hit
       // Should be true for all gathers here
-      __m256i sbb_frac_bits = _mm256_i32gather_epi64((const int64_t *)state->m_sbbFracBits[0], prv_states, 8);
+      __m256i sbb_frac_bits = _mm256_i32gather_epi64((const long long *)state->m_sbbFracBits[0], prv_states, 8);
       _mm256_store_si256((__m256i*)&state->m_sbbFracBits[state_offset][0], sbb_frac_bits);
 
       // Next three lines: state->m_remRegBins = prvState->m_remRegBins - 1;
@@ -1218,7 +1218,7 @@ static INLINE void update_states_avx2(
         _mm_srli_epi32(_mm_add_epi32(sum_abs1, ones), 1),
         _mm_set1_epi32(3));
       offsets = _mm_add_epi32(offsets, temp);
-      __m256i sig_frac_bits = _mm256_i32gather_epi64((const int64_t *)state->m_sigFracBitsArray[state_offset][0], offsets, 8);
+      __m256i sig_frac_bits = _mm256_i32gather_epi64((const long long *)state->m_sigFracBitsArray[state_offset][0], offsets, 8);
       _mm256_store_si256((__m256i*)&state->m_sigFracBits[state_offset][0], sig_frac_bits);
 
       sum_gt1 = _mm_min_epi32(sum_gt1, _mm_set1_epi32(4));