mirror of
https://gitlab.com/sheaf/metabrush.git
synced 2024-12-23 22:04:07 +00:00
1266 lines
36 KiB
C++
1266 lines
36 KiB
C++
|
#pragma once
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// Copyright (c) 2024 Walter Mascarenhas
|
||
|
//
|
||
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
||
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||
|
//
|
||
|
// The MPL has a "Disclaimer of Warranty" and a "Limitation of Liability",
|
||
|
// and we provide no guarantee regarding the correctness of this source code.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
#include <bit>
|
||
|
#include <cassert>
|
||
|
#include <immintrin.h>
|
||
|
|
||
|
typedef unsigned int uint;
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
namespace wm::nlp::lp2d {
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// avx2 wrappers, to simplify the notation a bit and clarify the use of immediates
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
inline __m256d and_d( __m256d x, __m256d y )
|
||
|
{
|
||
|
return _mm256_and_pd( x, y );
|
||
|
}
|
||
|
|
||
|
inline __m256i and_i( __m256i x, __m256d y )
|
||
|
{
|
||
|
__m256i yi = _mm256_castpd_si256( y );
|
||
|
return _mm256_and_si256( x, yi );
|
||
|
}
|
||
|
|
||
|
inline __m256i and_i( __m256d x, __m256i y )
|
||
|
{
|
||
|
__m256i xi = _mm256_castpd_si256( x );
|
||
|
__m256i r = _mm256_and_si256( xi, y );
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
inline __m256i and_i( __m256i x, __m256i y )
|
||
|
{
|
||
|
return _mm256_and_si256( x, y );
|
||
|
}
|
||
|
|
||
|
inline __m256d and_not_d( __m256i x, __m256i y )
|
||
|
{
|
||
|
__m256d xd = _mm256_castsi256_pd( x );
|
||
|
__m256d yd = _mm256_castsi256_pd( y );
|
||
|
return _mm256_andnot_pd( xd, yd );
|
||
|
}
|
||
|
|
||
|
inline __m256d and_not_d( __m256d x, __m256d y )
|
||
|
{
|
||
|
return _mm256_andnot_pd( x, y );
|
||
|
}
|
||
|
|
||
|
inline __m256i and_not_i( __m256i x, __m256i y )
|
||
|
{
|
||
|
__m256d xd = _mm256_castsi256_pd( x );
|
||
|
__m256d yd = _mm256_castsi256_pd( y );
|
||
|
__m256d rd = _mm256_andnot_pd( xd, yd );
|
||
|
__m256i r = _mm256_castpd_si256( rd );
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
template < uint I0, uint I1, uint I2, uint I3 >
|
||
|
requires ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 < 2 ) && ( I3 < 2 ) )
|
||
|
__m256d blend( __m256d x, __m256d y )
|
||
|
{
|
||
|
__m256d p = _mm256_blend_pd( x, y, ( I0 | ( I1 << 1 ) | ( I2 << 2 ) | ( I3 << 3 ) ) );
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
template < uint I0, uint I1, uint I2, uint I3 >
|
||
|
requires ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 < 2 ) && ( I3 < 2 ) )
|
||
|
__m256i blend( __m256i x, __m256i y )
|
||
|
{
|
||
|
__m256d xd = _mm256_castsi256_pd( x );
|
||
|
__m256d yd = _mm256_castsi256_pd( y );
|
||
|
__m256d pd = _mm256_blend_pd( xd, yd, ( I0 | ( I1 << 1 ) | ( I2 << 2 ) | ( I3 << 3 ) ) );
|
||
|
__m256i p = _mm256_castpd_si256( pd );
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
inline __m256d blend( __m256d a, __m256d b, __m256d mask )
|
||
|
{
|
||
|
__m256d blended = _mm256_blendv_pd( a, b, mask );
|
||
|
return blended;
|
||
|
}
|
||
|
|
||
|
inline __m256d blend( __m256d a, __m256d b, __m256i mask )
|
||
|
{
|
||
|
__m256d mask_d = _mm256_castsi256_pd( mask );
|
||
|
__m256d blended = _mm256_blendv_pd( a, b, mask_d );
|
||
|
return blended;
|
||
|
}
|
||
|
|
||
|
inline __m256i blend( __m256i a, __m256i b, __m256i mask )
|
||
|
{
|
||
|
__m256d a_d = _mm256_castsi256_pd( a );
|
||
|
__m256d b_d = _mm256_castsi256_pd( b );
|
||
|
__m256d mask_d = _mm256_castsi256_pd( mask );
|
||
|
__m256d blended_d = _mm256_blendv_pd( a_d, b_d, mask_d );
|
||
|
__m256i blended = _mm256_castpd_si256( blended_d );
|
||
|
return blended;
|
||
|
}
|
||
|
|
||
|
inline __m256d blend_max( __m256d x, __m256d y, __m256d greater_mask )
|
||
|
{
|
||
|
__m256d max = blend( y, x, greater_mask );
|
||
|
return max;
|
||
|
}
|
||
|
|
||
|
inline __m256d blend_min( __m256d x, __m256d y, __m256d greater_mask )
|
||
|
{
|
||
|
__m256d min = blend( x, y, greater_mask );
|
||
|
return min;
|
||
|
}
|
||
|
|
||
|
inline __m256d cast_d( __m256i x )
|
||
|
{
|
||
|
return _mm256_castsi256_pd( x );
|
||
|
}
|
||
|
|
||
|
inline __m256i cast_i( __m256d x )
|
||
|
{
|
||
|
return _mm256_castpd_si256( x );
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// gather_lanes< a, b >( x ), with a,b in { 0, 1, 2 } returns { x.lane( a ), x.lane( b ) }
|
||
|
// with x.lane( 2 ) = 0
|
||
|
//
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
constexpr int clear_lane = 8;
|
||
|
|
||
|
template < uint I0, uint I1 >
|
||
|
requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) )
|
||
|
__m256i gather_lanes( __m256i x )
|
||
|
{
|
||
|
constexpr int lane_0 = I0;
|
||
|
constexpr int lane_1 = I1 << 4;
|
||
|
__m256d xd = _mm256_castsi256_pd( x );
|
||
|
__m256d gd = _mm256_permute2f128_pd( xd, xd, lane_0 | lane_1 );
|
||
|
__m256i g = _mm256_castpd_si256( gd );
|
||
|
return g;
|
||
|
}
|
||
|
|
||
|
template < uint I0, uint I1 >
|
||
|
requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) )
|
||
|
__m256d gather_lanes( __m256d x )
|
||
|
{
|
||
|
constexpr int lane_0 = I0;
|
||
|
constexpr int lane_1 = I1 << 4;
|
||
|
|
||
|
__m256d p;
|
||
|
if constexpr ( ( I0 == 0 ) && ( I1 == 1 ) )
|
||
|
p = x;
|
||
|
else
|
||
|
p = _mm256_permute2f128_pd( x, x, lane_0 | lane_1 );
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// gather_lanes< a, b >( x, y ), with a,b in { 0, 1, clear_lane } returns { x.lane( a ), y.lane( b ) }
|
||
|
// with x.lane( clear_lane ) = 0
|
||
|
//
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
template < uint I0, uint I1 >
|
||
|
requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) )
|
||
|
__m256d gather_lanes( __m256d x, __m256d y )
|
||
|
{
|
||
|
constexpr int lane_0 = I0;
|
||
|
constexpr int lane_1 = ( (I1 == clear_lane) ? clear_lane : ( I1 + 2 ) ) << 4;
|
||
|
|
||
|
__m256d p;
|
||
|
if( ( I0 == 0 ) && ( I1 == 1 ) )
|
||
|
p = blend< 0, 0, 1, 1 >( x, y );
|
||
|
else
|
||
|
p = _mm256_permute2f128_pd( x, y, lane_0 | lane_1 );
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// gather_high returns { x[ 1 ], y[ 1 ], x[ 3 ], y[ 3 ] }
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
inline __m256d gather_high( __m256d x, __m256d y )
|
||
|
{
|
||
|
return _mm256_unpackhi_pd( x, y );
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// gather_low returns { x[ 0 ], y[ 0 ], x[ 2 ], y[ 2 ] }
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
inline __m256d gather_low( __m256d x, __m256d y )
|
||
|
{
|
||
|
return _mm256_unpacklo_pd( x, y );
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// high_bits_64 returns the bits 63, 64 + 63, 128 + 63 and 196 + 63 of x
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
inline int high_bits_64( __m256d x )
|
||
|
{
|
||
|
return _mm256_movemask_pd( x );
|
||
|
}
|
||
|
|
||
|
inline int high_bits_64( __m256i x )
|
||
|
{
|
||
|
__m256d xd = cast_d( x );
|
||
|
return _mm256_movemask_pd( xd );
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// gather_low returns { x[ 0 ], y[ 0 ], x[ 2 ], y[ 2 ] }
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
inline __m256d is_greater( __m256d x, __m256d y )
|
||
|
{
|
||
|
__m256d gt = _mm256_cmp_pd( x, y, _CMP_GT_OQ );
|
||
|
return gt;
|
||
|
}
|
||
|
|
||
|
inline __m256i ones_i()
|
||
|
{
|
||
|
__m256i r;
|
||
|
return _mm256_cmpeq_epi64( r, r );
|
||
|
}
|
||
|
|
||
|
inline __m256d or_d( __m256d x, __m256d y )
|
||
|
{
|
||
|
return _mm256_or_pd( x, y );
|
||
|
}
|
||
|
|
||
|
inline __m256i or_i( __m256i x, __m256i y )
|
||
|
{
|
||
|
return _mm256_or_si256( x, y );
|
||
|
}
|
||
|
|
||
|
template < uint I0, uint I1, uint I2, uint I3 >
|
||
|
requires ( ( I0 < 4 ) && ( I1 < 4 ) && ( I2 < 4 ) && ( I3 < 4 ) )
|
||
|
__m256d permute( __m256d x )
|
||
|
{
|
||
|
__m256d p;
|
||
|
if constexpr ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 > 1 ) && ( I3 > 1 ) )
|
||
|
p = _mm256_permute_pd( x, ( I0 | ( I1 << 1 ) | ( ( I2 - 2 ) << 2 ) | ( ( I3 - 2 ) << 3 ) ) );
|
||
|
else
|
||
|
p = _mm256_permute4x64_pd( x, ( I0 | ( I1 << 2 ) | ( I2 << 4 ) | ( I3 << 6 ) ) );
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
template < uint I0, uint I1, uint I2, uint I3 >
|
||
|
requires ( ( I0 < 4 ) && ( I1 < 4 ) && ( I2 < 4 ) && ( I3 < 4 ) )
|
||
|
__m256i permute( __m256i x )
|
||
|
{
|
||
|
__m256d xd = _mm256_castsi256_pd( x );
|
||
|
__m256d pd = permute< I0, I1, I2, I3 >( xd );
|
||
|
__m256i p = _mm256_castpd_si256( pd );
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
inline __m256d xor_d( __m256d x, __m256d y )
|
||
|
{
|
||
|
return _mm256_xor_pd( x, y );
|
||
|
}
|
||
|
|
||
|
inline __m256d xor_d( __m256d x, __m256i y )
|
||
|
{
|
||
|
__m256d yd = cast_d( y );
|
||
|
return _mm256_xor_pd( x, yd );
|
||
|
}
|
||
|
|
||
|
inline __m256d xor_d( __m256i x, __m256d y )
|
||
|
{
|
||
|
__m256d xd = cast_d( x );
|
||
|
return _mm256_xor_pd( xd, y );
|
||
|
}
|
||
|
|
||
|
inline __m256i xor_i( __m256i x, __m256i y )
|
||
|
{
|
||
|
return _mm256_xor_si256( x, y );
|
||
|
}
|
||
|
|
||
|
inline __m256d zero_d()
|
||
|
{
|
||
|
__m256d r;
|
||
|
return _mm256_xor_pd( r, r );
|
||
|
}
|
||
|
|
||
|
inline __m256i zero_i()
|
||
|
{
|
||
|
__m256i r;
|
||
|
return _mm256_xor_si256( r, r );
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// avx2 wrappers, to simplify the notation a bit and clarify the use of immediates
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
enum class location
|
||
|
{
|
||
|
null = -1024,
|
||
|
out = -1,
|
||
|
border = 0,
|
||
|
in = 1
|
||
|
};
|
||
|
|
||
|
enum class bounded_convex_type
|
||
|
{
|
||
|
empty = 0,
|
||
|
point = 1,
|
||
|
segment = 2,
|
||
|
polygon = 3
|
||
|
};
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// vertex_0 and vertex_1 scatter the arrays
|
||
|
//
|
||
|
// { inf_y, inf_z, inf_x, ? } and
|
||
|
// { m_sup_y, m_sup_z, m_sup_x, ? }
|
||
|
//
|
||
|
// to vertex_pair, in the x, y, z order
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
struct vertex_0
|
||
|
{
|
||
|
static void scatter( __m256d* vertex_pair, __m256d inf, __m256d m_sup )
|
||
|
{
|
||
|
__m256d y = gather_low( inf, m_sup ); // { inf( y ), m_sup( y ), inf( x ), m_sup( x ) }
|
||
|
__m256d z = gather_high( inf, m_sup ); // { inf( z ), m_sup( z ), ? ? }
|
||
|
__m256d x = gather_lanes< 1, 1 >( y ); // { inf( x ), m_sup( x ), ? ? }
|
||
|
|
||
|
vertex_pair[ 0 ] = blend< 0, 0, 1, 1 >( x, vertex_pair[ 0 ] );
|
||
|
vertex_pair[ 1 ] = blend< 0, 0, 1, 1 >( y, vertex_pair[ 1 ] );
|
||
|
vertex_pair[ 2 ] = blend< 0, 0, 1, 1 >( z, vertex_pair[ 2 ] );
|
||
|
}
|
||
|
};
|
||
|
|
||
|
struct vertex_1
|
||
|
{
|
||
|
static void scatter( __m256d* vertex_pair, __m256d inf, __m256d m_sup )
|
||
|
{
|
||
|
__m256d x = gather_low( inf, m_sup ); // { inf( y ), m_sup( y ), inf( x ), m_sup( x ) }
|
||
|
__m256d z = gather_high( inf, m_sup ); // { inf( z ), m_sup( z ), ?, ? }
|
||
|
__m256d y = gather_lanes< 0, 0 >( x ); // { ?, ?, inf( y ), m_sup( y ) }
|
||
|
z = gather_lanes< 0, 0 >( z ); // { ?. . inf( z ), m_sup( z ) }
|
||
|
|
||
|
vertex_pair[ 0 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 0 ], x );
|
||
|
vertex_pair[ 1 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 1 ], y );
|
||
|
vertex_pair[ 2 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 2 ], z );
|
||
|
}
|
||
|
};
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// cross computes the solution ( x / z, y / z ) of the equation
|
||
|
//
|
||
|
// a[ 0 ] x + a[ 1 ] y = a[ 2 ]
|
||
|
// b[ 0 ] x + b[ 1 ] y = b[ 2 ]
|
||
|
//
|
||
|
// which is
|
||
|
//
|
||
|
// x = a[ 2 ] b[ 1 ] - a[ 1 ] b[ 2 ]
|
||
|
// y = a[ 0 ] b[ 2 ] - a[ 2 ] b[ 0 ]
|
||
|
// z = a[ 0 ] b[ 1 ] - a[ 1 ] b[ 0 ]
|
||
|
//
|
||
|
// which is convenient to order as
|
||
|
//
|
||
|
// y = a[ 0 ] b[ 2 ] - a[ 2 ] b[ 0 ]
|
||
|
// z = a[ 0 ] b[ 1 ] - a[ 1 ] b[ 0 ]
|
||
|
// x = a[ 2 ] b[ 1 ] - a[ 1 ] b[ 2 ]
|
||
|
//
|
||
|
// It assumes that x, y >= 0 and z > 0 and makes sure that the rounded values of
|
||
|
// x, y and z also satisfy these lower bounds.
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
template < class Vertex >
|
||
|
inline void cross( __m256d* vertex_pair, __m256d a, __m256d b )
|
||
|
{
|
||
|
__m256d inf_a = _mm256_movedup_pd( a ); // { a[ 0 ], a[ 0 ], a[ 2 ], ? }
|
||
|
__m256d m_sup_b = _mm256_movedup_pd( b ); // { b[ 0 ], b[ 0 ], b[ 2 ], ? }
|
||
|
|
||
|
__m256d inf_b = permute< 2, 1, 1, 3 >( b ); // { b[ 2 ], b[ 1 ], b[ 1 ], ? }
|
||
|
__m256d m_sup_a = permute< 2, 1, 1, 3 >( a ); // { a[ 2 ], a[ 1 ], a[ 1 ], ? }
|
||
|
|
||
|
__m256d inf_h = _mm256_mul_pd( inf_a, inf_b );
|
||
|
__m256d m_sup_h = _mm256_mul_pd( m_sup_a, m_sup_b );
|
||
|
|
||
|
__m256d inf = _mm256_fnmadd_pd( m_sup_a, m_sup_b, inf_h ); // inf(x) = rounded_down( x )
|
||
|
__m256d m_sup = _mm256_fnmadd_pd( inf_a, inf_b, m_sup_h ); // -sup(x) = rounded_down( -x )
|
||
|
|
||
|
// x,y must be >= 0 and z must be positive and the computed values violate this due to
|
||
|
// rounding. In this rare case in which inf[ i ] <= 0 I can prove that the exact value
|
||
|
// of inf[ i ] is m_sup[ i ] - inf[ i ]
|
||
|
|
||
|
__m256d zero = zero_d();
|
||
|
__m256d cmp = _mm256_cmp_pd( inf, zero, _CMP_LE_OQ ); // rounded_down( x ) <= 0 ?
|
||
|
__m256d ds = _mm256_sub_pd( m_sup, inf );
|
||
|
inf = blend( inf, ds, cmp );
|
||
|
|
||
|
Vertex::scatter( vertex_pair, inf, m_sup );
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// exact_location returns the exact location of the intersection ( x, y ) of the lines
|
||
|
//
|
||
|
// b0[ 0 ] x + b0[ 1 ] y >= b0[ 2 ]
|
||
|
// b1[ 1 ] x + b1[ 1 ] y >= b1[ 2 ]
|
||
|
//
|
||
|
// with respect to the set
|
||
|
//
|
||
|
// e0 x + e1 y >= e2
|
||
|
//
|
||
|
// under the assumption that
|
||
|
//
|
||
|
// d = b0[ 0 ] b1[ 1 ] - b0[ 1 ] b1[ 1 ] > 0
|
||
|
//
|
||
|
// where ei = edge[ i ], b0 = border[ 0 ] and b1 = b[ 1 ]
|
||
|
//
|
||
|
// This amounts to evaluating the sign of
|
||
|
//
|
||
|
// p = e0 r + e1 s - e2 d
|
||
|
//
|
||
|
// r = b0[ 2 ] b1[ 1 ] - b0[ 1 ] b1[ 2 ]
|
||
|
// s = b0[ 0 ] b1[ 2 ] - b0[ 2 ] b1[ 0 ] ,
|
||
|
//
|
||
|
// which motivates the evaluation of the six products
|
||
|
//
|
||
|
// group 1 group 2
|
||
|
// e0 b0[ 2 ] b1[ 1 ], -e0 b0[ 1 ] b1[ 2 ]
|
||
|
// e1 b0[ 0 ] b1[ 2 ], -e1 b0[ 2 ] b1[ 0 ]
|
||
|
// e2 b0[ 1 ] b1[ 0 ]. -e2 b0[ 0 ] b1[ 1 ]
|
||
|
//
|
||
|
// and we want to find the sign of their sum.
|
||
|
//
|
||
|
// The function assumes that the edges entries are either normal or zero, ie.,
|
||
|
// there are no subnormal entries.
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
location exact_location( __m256d e, __m256d b0, __m256d b1 );
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// The struct bounded convex represents a bounded convex set on the plane, which can
|
||
|
// have one of the following types:
|
||
|
//
|
||
|
// a) empty
|
||
|
// b) ṕoint
|
||
|
// c) segment
|
||
|
// d) polygon
|
||
|
//
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
struct bounded_convex final
|
||
|
{
|
||
|
constexpr bounded_convex( int alloc, __m256d* edge, __m256d* vertex_p )
|
||
|
: alloc( alloc ), edge( edge ), vertex_p( vertex_p ){}
|
||
|
|
||
|
void setup( __m256d box );
|
||
|
|
||
|
int insert_vertex( int at );
|
||
|
int remove_vertices( int begin_vertex, int end_vertex );
|
||
|
|
||
|
void set_empty()
|
||
|
{
|
||
|
end = 0;
|
||
|
begin = 0;
|
||
|
type = bounded_convex_type::empty;
|
||
|
}
|
||
|
|
||
|
void set_point( int first_vertex )
|
||
|
{
|
||
|
end = 1;
|
||
|
begin = 0;
|
||
|
type = bounded_convex_type::point;
|
||
|
edge[ 0 ] = edge[ first_vertex ];
|
||
|
edge[ 1 ] = edge[ first_vertex + 1 ];
|
||
|
|
||
|
__m256d* src = vertex_pair( first_vertex );
|
||
|
|
||
|
if( first_vertex & 0x1 )
|
||
|
{
|
||
|
vertex_p[ 0 ] = gather_lanes< 1, 1 >( src[ 0 ] );
|
||
|
vertex_p[ 1 ] = gather_lanes< 1, 1 >( src[ 1 ] );
|
||
|
vertex_p[ 2 ] = gather_lanes< 1, 1 >( src[ 2 ] );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
vertex_p[ 0 ] = src[ 0 ];
|
||
|
vertex_p[ 1 ] = src[ 1 ];
|
||
|
vertex_p[ 2 ] = src[ 2 ];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void set_to_last_segment();
|
||
|
|
||
|
void set_segment( int first_vertex )
|
||
|
{
|
||
|
end = 2;
|
||
|
begin = 0;
|
||
|
type = bounded_convex_type::segment;
|
||
|
edge[ 0 ] = edge[ first_vertex ];
|
||
|
edge[ 1 ] = edge[ first_vertex + 1 ];
|
||
|
edge[ 2 ] = edge[ first_vertex + 2 ];
|
||
|
|
||
|
__m256d* src = vertex_pair( first_vertex );
|
||
|
|
||
|
if( first_vertex & 0x1 )
|
||
|
{
|
||
|
vertex_p[ 0 ] = gather_lanes< 1, 0 >( src[ 0 ], src[ 3 ] );
|
||
|
vertex_p[ 1 ] = gather_lanes< 1, 0 >( src[ 1 ], src[ 4 ] );
|
||
|
vertex_p[ 2 ] = gather_lanes< 1, 0 >( src[ 2 ], src[ 5 ] );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
vertex_p[ 0 ] = *src;
|
||
|
vertex_p[ 1 ] = *( ++src );
|
||
|
vertex_p[ 2 ] = *( ++src );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
__m256d box() const;
|
||
|
void push( __m256d* begin, __m256d* end );
|
||
|
|
||
|
__m256d plain_edge( int i ) const
|
||
|
{
|
||
|
return edge[ begin + i ];
|
||
|
}
|
||
|
|
||
|
void plain_vertex( __m256d* inf_msup, int i ) const
|
||
|
{
|
||
|
int ri = ( begin + i );
|
||
|
__m256d const* vp = vertex_pair( ri );
|
||
|
int di = 2 * ( ( begin + i ) & 0x1 );
|
||
|
inf_msup[ 0 ] = _mm256_setr_pd( vp[ 0 ][ di ], vp[ 1 ][ di ], vp[ 2 ][ di ], 0.0 );
|
||
|
inf_msup[ 1 ] = _mm256_setr_pd( vp[ 0 ][ di + 1 ], vp[ 1 ][ di + 1 ], vp[ 2 ][ di + 1 ], 0.0 );
|
||
|
}
|
||
|
|
||
|
__m256d* vertex_pair( int index )
|
||
|
{
|
||
|
__m256d* p = vertex_p + 3 * ( index / 2 );
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
__m256d const* vertex_pair( int index ) const
|
||
|
{
|
||
|
__m256d* p = vertex_p + 3 * ( index / 2 );
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
int end;
|
||
|
int begin;
|
||
|
int const alloc;
|
||
|
__m256d* const edge;
|
||
|
__m256d* const vertex_p;
|
||
|
bounded_convex_type type;
|
||
|
};
|
||
|
|
||
|
// huge = { 0x5500'0000'0000'0000, 0x5500'0000'0000'0000, 0x5500'0000'0000'0000, 0x7FFF'7FFF'7FFF' }
|
||
|
// tiny = { 0x2bd0'0000'0000'0000, 0x2bd0'0000'0000'0000, 0x2bd0'0000'0000'0000, 0x0000'0000'0000'00000 }
|
||
|
|
||
|
__m256i bounds( __m256i* tiny, __m256i* huge );
|
||
|
|
||
|
int normalize_equations( __m256d* quadrant[ 4 ], int n, __m256i tiny, __m256i huge );
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// The class locator is used to find the location of a point with respect to the half
|
||
|
// plane.
|
||
|
//
|
||
|
// a x + b y >= c z
|
||
|
//
|
||
|
// The template parameters indicate the signs of the coefficients a, b and c.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
template < int flip_x, int flip_y, int flip_z >
|
||
|
struct locator final
|
||
|
{
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
// The raw method assumes that
|
||
|
//
|
||
|
// edges[ 0 ] = { a, a, a, a }
|
||
|
// edges[ 1 ] = { b, b, b, b }
|
||
|
// edges[ 2 ] = { c, c, c, c }
|
||
|
//
|
||
|
// vertex_pair[ 0 ] = { x0_inf, x0_msup, x1_inf, x1_msup }
|
||
|
// vertex_pair[ 1 ] = { y0_inf, y0_msup, y1_inf, y1_msup }
|
||
|
// vertex_pair[ 2 ] = { z0_inf, z0_msup, z1_inf, z1_msup }
|
||
|
//
|
||
|
// and checks the location of the vertices ( x0, y0 ) and ( x1, y1 ) with respect to
|
||
|
// the half plane
|
||
|
//
|
||
|
// (-1)^flip_x a x + (-1)^flip_y b y >= (-1)^flip_z c z
|
||
|
//
|
||
|
// This is done by computing a x + b y - c z rounded up and down.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
static int raw( __m256d const* edge, __m256d const* vertex_pair )
|
||
|
{
|
||
|
__m256d chk;
|
||
|
__m256d zero_d = _mm256_xor_pd( zero_d, zero_d );
|
||
|
|
||
|
if constexpr ( flip_x )
|
||
|
chk = _mm256_mul_pd( edge[ 0 ], permute< 1, 0, 3, 2 >( vertex_pair[ 0 ] ) );
|
||
|
else
|
||
|
chk = _mm256_mul_pd( edge[ 0 ], vertex_pair[ 0 ] );
|
||
|
|
||
|
if constexpr ( flip_y)
|
||
|
chk = _mm256_fmadd_pd( edge[ 1 ], permute< 1, 0, 3, 2 >( vertex_pair[ 1 ] ), chk );
|
||
|
else
|
||
|
chk = _mm256_fmadd_pd( edge[ 1 ], vertex_pair[ 1 ], chk );
|
||
|
|
||
|
if constexpr ( flip_z )
|
||
|
chk = _mm256_fmadd_pd( edge[ 2 ], vertex_pair[ 2 ], chk );
|
||
|
else
|
||
|
chk = _mm256_fmadd_pd( edge[ 2 ], permute< 1, 0, 3, 2 >( vertex_pair[ 2 ] ), chk );
|
||
|
|
||
|
__m256d cmp = _mm256_cmp_pd( chk, zero_d, _CMP_GT_OQ );
|
||
|
|
||
|
int cm = _mm256_movemask_pd( cmp );
|
||
|
|
||
|
return cm;
|
||
|
}
|
||
|
|
||
|
static location locate_vertex_0( __m256d const* edge, __m256d const* vertex_pair )
|
||
|
{
|
||
|
location loc;
|
||
|
int cm = raw( edge, vertex_pair );
|
||
|
|
||
|
if( cm & 0x1 ) // inf( x[ 0 ] ) > 0
|
||
|
loc = location::in;
|
||
|
else
|
||
|
{
|
||
|
if( cm & 0x2 )
|
||
|
loc = location::out;
|
||
|
else
|
||
|
loc = location::null;
|
||
|
}
|
||
|
|
||
|
return loc;
|
||
|
}
|
||
|
|
||
|
static location locate_vertex_1( __m256d const* edge, __m256d const* vertex_pair )
|
||
|
{
|
||
|
location loc;
|
||
|
int cm = raw( edge, vertex_pair );
|
||
|
|
||
|
if( cm & 0x4 ) // the second vertex is in
|
||
|
loc = location::in;
|
||
|
else
|
||
|
{
|
||
|
if( cm & 0x8 ) // the second vertex is out
|
||
|
loc = location::out;
|
||
|
else
|
||
|
loc = location::null;
|
||
|
}
|
||
|
|
||
|
return loc;
|
||
|
}
|
||
|
|
||
|
static void locate_pair( location loc[ 2 ], __m256d const* edge, __m256d const* vertex_pair )
|
||
|
{
|
||
|
// computing a vertex[ 0 ] + b vertex[ 1 ] + c vertex[ 2 ]
|
||
|
|
||
|
int cm = raw( edge, vertex_pair );
|
||
|
if( cm & 0x1 ) // the first vertex is in
|
||
|
loc[ 0 ] = location::in;
|
||
|
else
|
||
|
{
|
||
|
if( cm & 0x2 ) // the first vertex is out
|
||
|
loc[ 0 ] = location::out;
|
||
|
else
|
||
|
loc[ 0 ] = location::null;
|
||
|
}
|
||
|
|
||
|
if( cm & 0x4 ) // the second vertex is in
|
||
|
loc[ 1 ] = location::in;
|
||
|
else
|
||
|
{
|
||
|
if( cm & 0x8 ) // the second vertex is out
|
||
|
loc[ 1 ] = location::out;
|
||
|
else
|
||
|
loc[ 1 ] = location::null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void insert( bounded_convex& r, __m256d new_edge )
|
||
|
{
|
||
|
__m256i ones = _mm256_cmpeq_epi64( ones, ones );
|
||
|
ones = _mm256_slli_epi64( ones, 63 );
|
||
|
__m256d sign = _mm256_castsi256_pd( ones );
|
||
|
|
||
|
__m256d coefficients[ 3 ] = { permute< 0, 0, 0, 0 >( new_edge ),
|
||
|
permute< 1, 1, 1, 1 >( new_edge ),
|
||
|
permute< 2, 2, 2, 2 >( new_edge ) };
|
||
|
|
||
|
coefficients[ 0 ] = _mm256_andnot_pd( sign, coefficients[ 0 ] );
|
||
|
coefficients[ 1 ] = _mm256_andnot_pd( sign, coefficients[ 1 ] );
|
||
|
coefficients[ 2 ] = _mm256_andnot_pd( sign, coefficients[ 2 ] );
|
||
|
|
||
|
if( r.type != bounded_convex_type::polygon )
|
||
|
{
|
||
|
if( r.type == bounded_convex_type::point )
|
||
|
{
|
||
|
location loc = locate_vertex_0( coefficients, r.vertex_p );
|
||
|
|
||
|
if( loc == location::null )
|
||
|
loc = exact_location( new_edge, r.edge[ 0 ], r.edge[ 1 ] );
|
||
|
|
||
|
if( loc == location::out )
|
||
|
r.set_empty();
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
location loc[ 2 ];
|
||
|
locate_pair( loc, coefficients, r.vertex_p );
|
||
|
|
||
|
if( loc[ 0 ] == location::null )
|
||
|
loc[ 0 ] = exact_location( new_edge, r.edge[ 0 ], r.edge[ 1 ] );
|
||
|
|
||
|
if( loc[ 1 ] == location::null )
|
||
|
loc[ 1 ] = exact_location( new_edge, r.edge[ 1 ], r.edge[ 2 ] );
|
||
|
|
||
|
switch( loc[ 0 ] )
|
||
|
{
|
||
|
case location::in:
|
||
|
{
|
||
|
if( loc[ 1 ] == location::out )
|
||
|
{
|
||
|
r.edge[ 2 ] = new_edge;
|
||
|
cross< vertex_1 >( r.vertex_p, r.edge[ 1 ], new_edge );
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
case location::border:
|
||
|
{
|
||
|
if( loc[ 1 ] == location::out )
|
||
|
{
|
||
|
r.end = 1;
|
||
|
r.type = bounded_convex_type::point;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
default: // location::out
|
||
|
{
|
||
|
switch( loc[ 1 ] )
|
||
|
{
|
||
|
case location::in:
|
||
|
{
|
||
|
r.edge[ 0 ] = new_edge;
|
||
|
cross< vertex_0 >( r.vertex_p, new_edge, r.edge[ 1 ] );
|
||
|
return;
|
||
|
}
|
||
|
case location::border:
|
||
|
{
|
||
|
r.end = 1;
|
||
|
r.edge[ 0 ] = r.edge[ 1 ];
|
||
|
r.edge[ 1 ] = r.edge[ 2 ];
|
||
|
r.type = bounded_convex_type::point;
|
||
|
r.vertex_p[ 0 ] = gather_lanes< 1, 1 >( r.vertex_p[ 0 ] );
|
||
|
r.vertex_p[ 1 ] = gather_lanes< 1, 1 >( r.vertex_p[ 1 ] );
|
||
|
r.vertex_p[ 2 ] = gather_lanes< 1, 1 >( r.vertex_p[ 2 ] );
|
||
|
return;
|
||
|
}
|
||
|
default:
|
||
|
{
|
||
|
r.set_empty();
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
location loc[ 2 ];
|
||
|
|
||
|
int index = r.begin;
|
||
|
int intersection = -1;
|
||
|
bool intersection_at_vertex;
|
||
|
|
||
|
__m256d* vertex_pair_at_index = r.vertex_pair( index );
|
||
|
|
||
|
auto scan = [ & ]()
|
||
|
{
|
||
|
if( ++index >= r.end )
|
||
|
return location::null;
|
||
|
|
||
|
if( index & 0x1 )
|
||
|
return loc[ 1 ];
|
||
|
|
||
|
vertex_pair_at_index += 3;
|
||
|
|
||
|
if( index + 1 >= r.end )
|
||
|
{
|
||
|
loc[ 0 ] = locate_vertex_0( coefficients, vertex_pair_at_index );
|
||
|
|
||
|
if( loc[ 0 ] == location::null )
|
||
|
loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
locate_pair( loc, coefficients, vertex_pair_at_index );
|
||
|
|
||
|
if( loc[ 0 ] == location::null )
|
||
|
loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
|
||
|
|
||
|
if( loc[ 1 ] == location::null )
|
||
|
loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] );
|
||
|
}
|
||
|
|
||
|
return loc[ 0 ];
|
||
|
};
|
||
|
|
||
|
if( index & 0x1 )
|
||
|
{
|
||
|
location begin = locate_vertex_1( coefficients, vertex_pair_at_index );
|
||
|
|
||
|
if( begin == location::null )
|
||
|
begin = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
|
||
|
|
||
|
switch( begin )
|
||
|
{
|
||
|
case location::in:
|
||
|
goto cursor_in;
|
||
|
case location::border:
|
||
|
{
|
||
|
intersection = index;
|
||
|
intersection_at_vertex = true;
|
||
|
|
||
|
++index;
|
||
|
vertex_pair_at_index += 3;
|
||
|
|
||
|
locate_pair( loc, coefficients, vertex_pair_at_index );
|
||
|
|
||
|
if( loc[ 0 ] == location::null )
|
||
|
loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
|
||
|
|
||
|
if( loc[ 1 ] == location::null )
|
||
|
loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] );
|
||
|
|
||
|
switch( loc[ 0 ] )
|
||
|
{
|
||
|
case location::in:
|
||
|
goto cursor_in;
|
||
|
case location::border:
|
||
|
{
|
||
|
if( loc[ 1 ] == location::out )
|
||
|
r.set_segment( r.begin );
|
||
|
return;
|
||
|
}
|
||
|
case location::out:
|
||
|
goto cursor_out;
|
||
|
}
|
||
|
}
|
||
|
default:
|
||
|
goto cursor_out;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
locate_pair( loc, coefficients, vertex_pair_at_index );
|
||
|
|
||
|
if( loc[ 0 ] == location::null )
|
||
|
loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
|
||
|
|
||
|
if( loc[ 1 ] == location::null )
|
||
|
loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] );
|
||
|
|
||
|
switch( loc[ 0 ] )
|
||
|
{
|
||
|
case location::in:
|
||
|
goto cursor_in;
|
||
|
case location::border:
|
||
|
{
|
||
|
intersection = index;
|
||
|
intersection_at_vertex = true;
|
||
|
|
||
|
++index;
|
||
|
|
||
|
switch( loc[ 1 ] )
|
||
|
{
|
||
|
case location::in:
|
||
|
goto cursor_in;
|
||
|
case location::border:
|
||
|
{
|
||
|
location next = locate_vertex_0( coefficients, vertex_pair_at_index + 3 );
|
||
|
|
||
|
if( next == location::null )
|
||
|
next = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
|
||
|
|
||
|
if( next == location::out )
|
||
|
r.set_segment( r.begin );
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
default:
|
||
|
goto cursor_out;
|
||
|
}
|
||
|
}
|
||
|
default:
|
||
|
goto cursor_out;
|
||
|
}
|
||
|
|
||
|
cursor_in:
|
||
|
{
|
||
|
location next_loc = scan();
|
||
|
|
||
|
switch( next_loc )
|
||
|
{
|
||
|
case location::in:
|
||
|
goto cursor_in;
|
||
|
case location::border:
|
||
|
{
|
||
|
if( intersection >= 0 )
|
||
|
{
|
||
|
if( !intersection_at_vertex )
|
||
|
{
|
||
|
__m256d* vp = r.vertex_pair( intersection - 1 );
|
||
|
if( intersection & 0x1 )
|
||
|
cross< vertex_0 >( vp, new_edge, r.edge[ intersection ] );
|
||
|
else
|
||
|
cross< vertex_1 >( vp, new_edge, r.edge[ intersection ] );
|
||
|
|
||
|
--intersection;
|
||
|
}
|
||
|
|
||
|
r.end = index + 1;
|
||
|
r.begin = intersection;
|
||
|
r.edge[ r.end ] = new_edge;
|
||
|
r.edge[ r.begin ] = new_edge;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
next_loc = scan();
|
||
|
|
||
|
if( next_loc == location::out )
|
||
|
{
|
||
|
intersection = index - 1;
|
||
|
intersection_at_vertex = true;
|
||
|
goto cursor_out;
|
||
|
}
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
case location::out:
|
||
|
{
|
||
|
if( intersection < 0 )
|
||
|
{
|
||
|
intersection = index;
|
||
|
intersection_at_vertex = false;
|
||
|
goto cursor_out;
|
||
|
}
|
||
|
|
||
|
if( intersection_at_vertex )
|
||
|
r.begin = intersection;
|
||
|
else
|
||
|
{
|
||
|
r.begin = intersection - 1;
|
||
|
__m256d* vp = r.vertex_pair( r.begin );
|
||
|
|
||
|
if( r.begin & 0x1 )
|
||
|
cross< vertex_1 >( vp, new_edge, r.edge[ intersection ] );
|
||
|
else
|
||
|
cross< vertex_0 >( vp, new_edge, r.edge[ intersection ] );
|
||
|
}
|
||
|
|
||
|
__m256d* vp = r.vertex_pair( index );
|
||
|
if( index & 0x1 )
|
||
|
cross< vertex_1 >( vp, r.edge[ index ], new_edge );
|
||
|
else
|
||
|
cross< vertex_0 >( vp, r.edge[ index ], new_edge );
|
||
|
|
||
|
r.end = index + 1;
|
||
|
r.edge[ r.end ] = new_edge;
|
||
|
r.edge[ r.begin ] = new_edge;
|
||
|
return;
|
||
|
}
|
||
|
case location::null:
|
||
|
{
|
||
|
assert( index == r.end );
|
||
|
|
||
|
if( intersection >= 0 )
|
||
|
{
|
||
|
if( intersection_at_vertex )
|
||
|
{
|
||
|
if( intersection == r.begin )
|
||
|
return;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
__m256d* first_pair = r.vertex_pair( intersection - 1 );
|
||
|
|
||
|
if( intersection & 0x1 )
|
||
|
cross< vertex_0 >( first_pair, new_edge, r.edge[ intersection ] );
|
||
|
else
|
||
|
cross< vertex_1 >( first_pair, new_edge, r.edge[ intersection ] );
|
||
|
|
||
|
--intersection;
|
||
|
}
|
||
|
|
||
|
__m256d* last_pair = r.vertex_pair( index );
|
||
|
|
||
|
if( index & 0x1 )
|
||
|
cross< vertex_1 >( last_pair, r.edge[ index ], new_edge );
|
||
|
else
|
||
|
cross< vertex_0 >( last_pair, r.edge[ index ], new_edge );
|
||
|
|
||
|
++r.end;
|
||
|
r.begin = intersection;
|
||
|
r.edge[ r.end ] = new_edge;
|
||
|
r.edge[ r.begin ] = new_edge;
|
||
|
}
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
cursor_out:
|
||
|
{
|
||
|
location new_loc = scan();
|
||
|
|
||
|
switch( new_loc )
|
||
|
{
|
||
|
case location::in:
|
||
|
{
|
||
|
if( intersection < 0 )
|
||
|
{
|
||
|
intersection = index;
|
||
|
intersection_at_vertex = false;
|
||
|
goto cursor_in;
|
||
|
}
|
||
|
|
||
|
if( intersection + 1 == index )
|
||
|
{
|
||
|
assert( !intersection_at_vertex );
|
||
|
int new_vertex = r.insert_vertex( intersection );
|
||
|
|
||
|
if( new_vertex == intersection )
|
||
|
{
|
||
|
__m256d* vp = r.vertex_pair( intersection );
|
||
|
if( intersection & 0x1 )
|
||
|
{
|
||
|
cross< vertex_1 >( vp, r.edge[ intersection ], new_edge );
|
||
|
cross< vertex_0 >( vp + 3, new_edge, r.edge[ intersection + 2 ] );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge );
|
||
|
cross< vertex_1 >( vp, new_edge, r.edge[ intersection + 2 ] );
|
||
|
}
|
||
|
|
||
|
r.edge[ intersection + 1 ] = new_edge;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
assert( new_vertex == intersection - 1 );
|
||
|
|
||
|
__m256d* vp = r.vertex_pair( intersection );
|
||
|
|
||
|
if( intersection & 0x1 )
|
||
|
{
|
||
|
cross< vertex_1 >( vp, new_edge, r.edge[ index ] );
|
||
|
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge) ;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cross< vertex_0 >( vp, new_edge, r.edge[ index ] );
|
||
|
cross< vertex_1 >( vp - 3, r.edge[ intersection ], new_edge) ;
|
||
|
}
|
||
|
|
||
|
r.edge[ new_vertex ] = r.edge[ intersection ];
|
||
|
r.edge[ intersection ] = new_edge;
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if( !intersection_at_vertex )
|
||
|
{
|
||
|
__m256d* vp = r.vertex_pair( intersection );
|
||
|
|
||
|
if( intersection & 0x1 )
|
||
|
cross< vertex_1 >( vp, r.edge[ intersection ], new_edge );
|
||
|
else
|
||
|
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge );
|
||
|
}
|
||
|
|
||
|
if( intersection + 2 < index )
|
||
|
index = r.remove_vertices( intersection + 1, index - 1 ) + 2;
|
||
|
|
||
|
__m256d* vp = r.vertex_pair( index - 1 );
|
||
|
if( index & 0x1 )
|
||
|
cross< vertex_0 >( vp, new_edge, r.edge[ index ] );
|
||
|
else
|
||
|
cross< vertex_1 >( vp, new_edge, r.edge[ index ] );
|
||
|
|
||
|
r.edge[ index - 1 ] = new_edge;
|
||
|
return;
|
||
|
}
|
||
|
case location::border:
|
||
|
{
|
||
|
if( intersection >= 0 )
|
||
|
{
|
||
|
if( intersection_at_vertex )
|
||
|
{
|
||
|
if( ( intersection == r.begin ) && ( index + 1 == r.end ) )
|
||
|
{
|
||
|
r.set_to_last_segment();
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
__m256d* vp = r.vertex_pair( intersection );
|
||
|
if( intersection & 0x1 )
|
||
|
cross< vertex_1 >( vp, r.edge[ intersection ], new_edge );
|
||
|
else
|
||
|
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge );
|
||
|
}
|
||
|
|
||
|
if( intersection + 1 < index )
|
||
|
intersection = r.remove_vertices( intersection + 1, index );
|
||
|
|
||
|
r.edge[ intersection + 1 ] = new_edge;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
new_loc = scan();
|
||
|
switch( new_loc )
|
||
|
{
|
||
|
case location::in:
|
||
|
{
|
||
|
intersection = index - 1;
|
||
|
intersection_at_vertex = true;
|
||
|
goto cursor_in;
|
||
|
}
|
||
|
case location::border:
|
||
|
{
|
||
|
r.set_segment( index - 1 );
|
||
|
return;
|
||
|
}
|
||
|
default: // location::out or location::null:
|
||
|
{
|
||
|
r.set_point( index - 1 );
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
case location::out:
|
||
|
goto cursor_out;
|
||
|
case location::null:
|
||
|
{
|
||
|
assert( index == r.end );
|
||
|
|
||
|
if( intersection < 0 )
|
||
|
{
|
||
|
r.set_empty();
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if( intersection_at_vertex )
|
||
|
{
|
||
|
if( intersection == r.begin )
|
||
|
{
|
||
|
r.set_point( r.begin );
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
__m256d* vp = r.vertex_pair( intersection );
|
||
|
|
||
|
if( intersection & 0x1 )
|
||
|
cross< vertex_1 >( vp, r.edge[ intersection ], new_edge );
|
||
|
else
|
||
|
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge );
|
||
|
}
|
||
|
|
||
|
++intersection;
|
||
|
__m256d* vp = r.vertex_pair( intersection );
|
||
|
|
||
|
if( intersection & 0x1 )
|
||
|
cross< vertex_1 >( vp, new_edge, r.edge[ index ] );
|
||
|
else
|
||
|
cross< vertex_0 >( vp, new_edge, r.edge[ index ] );
|
||
|
|
||
|
r.end = intersection + 1;
|
||
|
r.edge[ intersection ] = new_edge;
|
||
|
r.edge[ r.end ] = r.edge[ r.begin ];
|
||
|
}
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
|
||
|
} // namespace wm::nlp::lp2d
|
||
|
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// The function interval_system_2d solves the system of interval equations
|
||
|
//
|
||
|
// ai x + bi y = ci
|
||
|
//
|
||
|
// for 0 <= i < n_equations and
|
||
|
//
|
||
|
// ai = [ eq[ 6 * i ], eq[ 6 * i + 1 ] ] with eq[ 6 * i ] <= eq[ 6 * i + 1 ]
|
||
|
// bi = [ eq[ 6 * i + 2 ], eq[ 6 * i + 3 ] ] with eq[ 6 * i + 2 ] <= eq[ 6 * i + 3 ]
|
||
|
// ci = [ eq[ 6 * i + 4 ], eq[ 6 * i + 5 ] ] with eq[ 6 * i + 4 ] <= eq[ 6 * i + 5 ]
|
||
|
//
|
||
|
// The function finds the solutions inside the box
|
||
|
//
|
||
|
// box[ 0 ] <= x <= box[ 1 ]
|
||
|
// box[ 2 ] <= y <= box[ 3 ]
|
||
|
//
|
||
|
// The solution consists of up to 4 boxes bx[ 0 ], bx[ 1 ], bx[ 2 ] and bx[ 3 ], with
|
||
|
//
|
||
|
// bx[ i ] = inf_x[ i ] <= x <= sup_x[ i ]
|
||
|
// inf_y[ i ] <= y <= sup_y[ i ]
|
||
|
// for
|
||
|
//
|
||
|
// inf_x[ i ] = solution[ 4 * i ]
|
||
|
// sup_x[ i ] = solution[ 4 * i + 1 ]
|
||
|
// inf_y[ i ] = solution[ 4 * i + 2 ]
|
||
|
// sup_y[ i ] = solution[ 4 * i + 3 ]
|
||
|
//
|
||
|
// The function returns the number of boxes in the solutions, which can be 0,1,2,3 or 4,
|
||
|
// or a negative number if the input is invalid or if it cannot change the rounding mode.
|
||
|
//
|
||
|
// It assumes that:
|
||
|
//
|
||
|
// a) n_equations <= 16
|
||
|
// b) all doubles are finite, ie., not nan or +/-inf
|
||
|
//
|
||
|
// c) box and eq are such that
|
||
|
//
|
||
|
// box[ 0 ] <= box[ 1 ]
|
||
|
// box[ 2 ] <= box[ 3]
|
||
|
// eq[ 2 * i ] <= eq[ 2 i + 1 ] for i = 0,..., 3 * n_equations.
|
||
|
//
|
||
|
// d) box and eq are aligned at a 32 byte boundary
|
||
|
//
|
||
|
// It does not verify the input and its results should not be trusted if the inputs are
|
||
|
// invalid.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------------------
|
||
|
|
||
|
extern "C"
|
||
|
{
|
||
|
int interval_system_2d( double* solution, double const* box, double const* eq, unsigned int n_equations );
|
||
|
}
|