metabrush/brush-strokes/cbits/lp_2d.hpp

1266 lines
36 KiB
C++

#pragma once
//----------------------------------------------------------------------------------------
// Copyright (c) 2024 Walter Mascarenhas
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//
// The MPL has a "Disclaimer of Warranty" and a "Limitation of Liability",
// and we provide no guarantee regarding the correctness of this source code.
//
//----------------------------------------------------------------------------------------
#include <bit>
#include <cassert>
#include <immintrin.h>
typedef unsigned int uint;
//----------------------------------------------------------------------------------------
namespace wm::nlp::lp2d {
//----------------------------------------------------------------------------------------
// avx2 wrappers, to simplify the notation a bit and clarify the use of immediates
//----------------------------------------------------------------------------------------
inline __m256d and_d( __m256d x, __m256d y )
{
return _mm256_and_pd( x, y );
}
inline __m256i and_i( __m256i x, __m256d y )
{
__m256i yi = _mm256_castpd_si256( y );
return _mm256_and_si256( x, yi );
}
inline __m256i and_i( __m256d x, __m256i y )
{
__m256i xi = _mm256_castpd_si256( x );
__m256i r = _mm256_and_si256( xi, y );
return r;
}
inline __m256i and_i( __m256i x, __m256i y )
{
return _mm256_and_si256( x, y );
}
inline __m256d and_not_d( __m256i x, __m256i y )
{
__m256d xd = _mm256_castsi256_pd( x );
__m256d yd = _mm256_castsi256_pd( y );
return _mm256_andnot_pd( xd, yd );
}
inline __m256d and_not_d( __m256d x, __m256d y )
{
return _mm256_andnot_pd( x, y );
}
inline __m256i and_not_i( __m256i x, __m256i y )
{
__m256d xd = _mm256_castsi256_pd( x );
__m256d yd = _mm256_castsi256_pd( y );
__m256d rd = _mm256_andnot_pd( xd, yd );
__m256i r = _mm256_castpd_si256( rd );
return r;
}
template < uint I0, uint I1, uint I2, uint I3 >
requires ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 < 2 ) && ( I3 < 2 ) )
__m256d blend( __m256d x, __m256d y )
{
__m256d p = _mm256_blend_pd( x, y, ( I0 | ( I1 << 1 ) | ( I2 << 2 ) | ( I3 << 3 ) ) );
return p;
}
template < uint I0, uint I1, uint I2, uint I3 >
requires ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 < 2 ) && ( I3 < 2 ) )
__m256i blend( __m256i x, __m256i y )
{
__m256d xd = _mm256_castsi256_pd( x );
__m256d yd = _mm256_castsi256_pd( y );
__m256d pd = _mm256_blend_pd( xd, yd, ( I0 | ( I1 << 1 ) | ( I2 << 2 ) | ( I3 << 3 ) ) );
__m256i p = _mm256_castpd_si256( pd );
return p;
}
inline __m256d blend( __m256d a, __m256d b, __m256d mask )
{
__m256d blended = _mm256_blendv_pd( a, b, mask );
return blended;
}
inline __m256d blend( __m256d a, __m256d b, __m256i mask )
{
__m256d mask_d = _mm256_castsi256_pd( mask );
__m256d blended = _mm256_blendv_pd( a, b, mask_d );
return blended;
}
inline __m256i blend( __m256i a, __m256i b, __m256i mask )
{
__m256d a_d = _mm256_castsi256_pd( a );
__m256d b_d = _mm256_castsi256_pd( b );
__m256d mask_d = _mm256_castsi256_pd( mask );
__m256d blended_d = _mm256_blendv_pd( a_d, b_d, mask_d );
__m256i blended = _mm256_castpd_si256( blended_d );
return blended;
}
inline __m256d blend_max( __m256d x, __m256d y, __m256d greater_mask )
{
__m256d max = blend( y, x, greater_mask );
return max;
}
inline __m256d blend_min( __m256d x, __m256d y, __m256d greater_mask )
{
__m256d min = blend( x, y, greater_mask );
return min;
}
inline __m256d cast_d( __m256i x )
{
return _mm256_castsi256_pd( x );
}
inline __m256i cast_i( __m256d x )
{
return _mm256_castpd_si256( x );
}
//----------------------------------------------------------------------------------------
//
// gather_lanes< a, b >( x ), with a,b in { 0, 1, 2 } returns { x.lane( a ), x.lane( b ) }
// with x.lane( 2 ) = 0
//
//----------------------------------------------------------------------------------------
constexpr int clear_lane = 8;
template < uint I0, uint I1 >
requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) )
__m256i gather_lanes( __m256i x )
{
constexpr int lane_0 = I0;
constexpr int lane_1 = I1 << 4;
__m256d xd = _mm256_castsi256_pd( x );
__m256d gd = _mm256_permute2f128_pd( xd, xd, lane_0 | lane_1 );
__m256i g = _mm256_castpd_si256( gd );
return g;
}
template < uint I0, uint I1 >
requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) )
__m256d gather_lanes( __m256d x )
{
constexpr int lane_0 = I0;
constexpr int lane_1 = I1 << 4;
__m256d p;
if constexpr ( ( I0 == 0 ) && ( I1 == 1 ) )
p = x;
else
p = _mm256_permute2f128_pd( x, x, lane_0 | lane_1 );
return p;
}
//----------------------------------------------------------------------------------------
//
// gather_lanes< a, b >( x, y ), with a,b in { 0, 1, clear_lane } returns { x.lane( a ), y.lane( b ) }
// with x.lane( clear_lane ) = 0
//
//----------------------------------------------------------------------------------------
template < uint I0, uint I1 >
requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) )
__m256d gather_lanes( __m256d x, __m256d y )
{
constexpr int lane_0 = I0;
constexpr int lane_1 = ( (I1 == clear_lane) ? clear_lane : ( I1 + 2 ) ) << 4;
__m256d p;
if( ( I0 == 0 ) && ( I1 == 1 ) )
p = blend< 0, 0, 1, 1 >( x, y );
else
p = _mm256_permute2f128_pd( x, y, lane_0 | lane_1 );
return p;
}
//----------------------------------------------------------------------------------------
// gather_high returns { x[ 1 ], y[ 1 ], x[ 3 ], y[ 3 ] }
//----------------------------------------------------------------------------------------
inline __m256d gather_high( __m256d x, __m256d y )
{
return _mm256_unpackhi_pd( x, y );
}
//----------------------------------------------------------------------------------------
// gather_low returns { x[ 0 ], y[ 0 ], x[ 2 ], y[ 2 ] }
//----------------------------------------------------------------------------------------
inline __m256d gather_low( __m256d x, __m256d y )
{
return _mm256_unpacklo_pd( x, y );
}
//----------------------------------------------------------------------------------------
// high_bits_64 returns the bits 63, 64 + 63, 128 + 63 and 196 + 63 of x
//----------------------------------------------------------------------------------------
inline int high_bits_64( __m256d x )
{
return _mm256_movemask_pd( x );
}
inline int high_bits_64( __m256i x )
{
__m256d xd = cast_d( x );
return _mm256_movemask_pd( xd );
}
//----------------------------------------------------------------------------------------
// gather_low returns { x[ 0 ], y[ 0 ], x[ 2 ], y[ 2 ] }
//----------------------------------------------------------------------------------------
inline __m256d is_greater( __m256d x, __m256d y )
{
__m256d gt = _mm256_cmp_pd( x, y, _CMP_GT_OQ );
return gt;
}
inline __m256i ones_i()
{
__m256i r;
return _mm256_cmpeq_epi64( r, r );
}
inline __m256d or_d( __m256d x, __m256d y )
{
return _mm256_or_pd( x, y );
}
inline __m256i or_i( __m256i x, __m256i y )
{
return _mm256_or_si256( x, y );
}
template < uint I0, uint I1, uint I2, uint I3 >
requires ( ( I0 < 4 ) && ( I1 < 4 ) && ( I2 < 4 ) && ( I3 < 4 ) )
__m256d permute( __m256d x )
{
__m256d p;
if constexpr ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 > 1 ) && ( I3 > 1 ) )
p = _mm256_permute_pd( x, ( I0 | ( I1 << 1 ) | ( ( I2 - 2 ) << 2 ) | ( ( I3 - 2 ) << 3 ) ) );
else
p = _mm256_permute4x64_pd( x, ( I0 | ( I1 << 2 ) | ( I2 << 4 ) | ( I3 << 6 ) ) );
return p;
}
template < uint I0, uint I1, uint I2, uint I3 >
requires ( ( I0 < 4 ) && ( I1 < 4 ) && ( I2 < 4 ) && ( I3 < 4 ) )
__m256i permute( __m256i x )
{
__m256d xd = _mm256_castsi256_pd( x );
__m256d pd = permute< I0, I1, I2, I3 >( xd );
__m256i p = _mm256_castpd_si256( pd );
return p;
}
inline __m256d xor_d( __m256d x, __m256d y )
{
return _mm256_xor_pd( x, y );
}
inline __m256d xor_d( __m256d x, __m256i y )
{
__m256d yd = cast_d( y );
return _mm256_xor_pd( x, yd );
}
inline __m256d xor_d( __m256i x, __m256d y )
{
__m256d xd = cast_d( x );
return _mm256_xor_pd( xd, y );
}
inline __m256i xor_i( __m256i x, __m256i y )
{
return _mm256_xor_si256( x, y );
}
inline __m256d zero_d()
{
__m256d r;
return _mm256_xor_pd( r, r );
}
inline __m256i zero_i()
{
__m256i r;
return _mm256_xor_si256( r, r );
}
//----------------------------------------------------------------------------------------
// avx2 wrappers, to simplify the notation a bit and clarify the use of immediates
//----------------------------------------------------------------------------------------
enum class location
{
null = -1024,
out = -1,
border = 0,
in = 1
};
enum class bounded_convex_type
{
empty = 0,
point = 1,
segment = 2,
polygon = 3
};
//----------------------------------------------------------------------------------------
// vertex_0 and vertex_1 scatter the arrays
//
// { inf_y, inf_z, inf_x, ? } and
// { m_sup_y, m_sup_z, m_sup_x, ? }
//
// to vertex_pair, in the x, y, z order
//----------------------------------------------------------------------------------------
struct vertex_0
{
static void scatter( __m256d* vertex_pair, __m256d inf, __m256d m_sup )
{
__m256d y = gather_low( inf, m_sup ); // { inf( y ), m_sup( y ), inf( x ), m_sup( x ) }
__m256d z = gather_high( inf, m_sup ); // { inf( z ), m_sup( z ), ? ? }
__m256d x = gather_lanes< 1, 1 >( y ); // { inf( x ), m_sup( x ), ? ? }
vertex_pair[ 0 ] = blend< 0, 0, 1, 1 >( x, vertex_pair[ 0 ] );
vertex_pair[ 1 ] = blend< 0, 0, 1, 1 >( y, vertex_pair[ 1 ] );
vertex_pair[ 2 ] = blend< 0, 0, 1, 1 >( z, vertex_pair[ 2 ] );
}
};
struct vertex_1
{
static void scatter( __m256d* vertex_pair, __m256d inf, __m256d m_sup )
{
__m256d x = gather_low( inf, m_sup ); // { inf( y ), m_sup( y ), inf( x ), m_sup( x ) }
__m256d z = gather_high( inf, m_sup ); // { inf( z ), m_sup( z ), ?, ? }
__m256d y = gather_lanes< 0, 0 >( x ); // { ?, ?, inf( y ), m_sup( y ) }
z = gather_lanes< 0, 0 >( z ); // { ?. . inf( z ), m_sup( z ) }
vertex_pair[ 0 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 0 ], x );
vertex_pair[ 1 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 1 ], y );
vertex_pair[ 2 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 2 ], z );
}
};
//----------------------------------------------------------------------------------------
// cross computes the solution ( x / z, y / z ) of the equation
//
// a[ 0 ] x + a[ 1 ] y = a[ 2 ]
// b[ 0 ] x + b[ 1 ] y = b[ 2 ]
//
// which is
//
// x = a[ 2 ] b[ 1 ] - a[ 1 ] b[ 2 ]
// y = a[ 0 ] b[ 2 ] - a[ 2 ] b[ 0 ]
// z = a[ 0 ] b[ 1 ] - a[ 1 ] b[ 0 ]
//
// which is convenient to order as
//
// y = a[ 0 ] b[ 2 ] - a[ 2 ] b[ 0 ]
// z = a[ 0 ] b[ 1 ] - a[ 1 ] b[ 0 ]
// x = a[ 2 ] b[ 1 ] - a[ 1 ] b[ 2 ]
//
// It assumes that x, y >= 0 and z > 0 and makes sure that the rounded values of
// x, y and z also satisfy these lower bounds.
//----------------------------------------------------------------------------------------
template < class Vertex >
inline void cross( __m256d* vertex_pair, __m256d a, __m256d b )
{
__m256d inf_a = _mm256_movedup_pd( a ); // { a[ 0 ], a[ 0 ], a[ 2 ], ? }
__m256d m_sup_b = _mm256_movedup_pd( b ); // { b[ 0 ], b[ 0 ], b[ 2 ], ? }
__m256d inf_b = permute< 2, 1, 1, 3 >( b ); // { b[ 2 ], b[ 1 ], b[ 1 ], ? }
__m256d m_sup_a = permute< 2, 1, 1, 3 >( a ); // { a[ 2 ], a[ 1 ], a[ 1 ], ? }
__m256d inf_h = _mm256_mul_pd( inf_a, inf_b );
__m256d m_sup_h = _mm256_mul_pd( m_sup_a, m_sup_b );
__m256d inf = _mm256_fnmadd_pd( m_sup_a, m_sup_b, inf_h ); // inf(x) = rounded_down( x )
__m256d m_sup = _mm256_fnmadd_pd( inf_a, inf_b, m_sup_h ); // -sup(x) = rounded_down( -x )
// x,y must be >= 0 and z must be positive and the computed values violate this due to
// rounding. In this rare case in which inf[ i ] <= 0 I can prove that the exact value
// of inf[ i ] is m_sup[ i ] - inf[ i ]
__m256d zero = zero_d();
__m256d cmp = _mm256_cmp_pd( inf, zero, _CMP_LE_OQ ); // rounded_down( x ) <= 0 ?
__m256d ds = _mm256_sub_pd( m_sup, inf );
inf = blend( inf, ds, cmp );
Vertex::scatter( vertex_pair, inf, m_sup );
}
//----------------------------------------------------------------------------------------
// exact_location returns the exact location of the intersection ( x, y ) of the lines
//
// b0[ 0 ] x + b0[ 1 ] y >= b0[ 2 ]
// b1[ 1 ] x + b1[ 1 ] y >= b1[ 2 ]
//
// with respect to the set
//
// e0 x + e1 y >= e2
//
// under the assumption that
//
// d = b0[ 0 ] b1[ 1 ] - b0[ 1 ] b1[ 1 ] > 0
//
// where ei = edge[ i ], b0 = border[ 0 ] and b1 = b[ 1 ]
//
// This amounts to evaluating the sign of
//
// p = e0 r + e1 s - e2 d
//
// r = b0[ 2 ] b1[ 1 ] - b0[ 1 ] b1[ 2 ]
// s = b0[ 0 ] b1[ 2 ] - b0[ 2 ] b1[ 0 ] ,
//
// which motivates the evaluation of the six products
//
// group 1 group 2
// e0 b0[ 2 ] b1[ 1 ], -e0 b0[ 1 ] b1[ 2 ]
// e1 b0[ 0 ] b1[ 2 ], -e1 b0[ 2 ] b1[ 0 ]
// e2 b0[ 1 ] b1[ 0 ]. -e2 b0[ 0 ] b1[ 1 ]
//
// and we want to find the sign of their sum.
//
// The function assumes that the edges entries are either normal or zero, ie.,
// there are no subnormal entries.
//----------------------------------------------------------------------------------------
location exact_location( __m256d e, __m256d b0, __m256d b1 );
//----------------------------------------------------------------------------------------
// The struct bounded convex represents a bounded convex set on the plane, which can
// have one of the following types:
//
// a) empty
// b) ṕoint
// c) segment
// d) polygon
//
//----------------------------------------------------------------------------------------
struct bounded_convex final
{
constexpr bounded_convex( int alloc, __m256d* edge, __m256d* vertex_p )
: alloc( alloc ), edge( edge ), vertex_p( vertex_p ){}
void setup( __m256d box );
int insert_vertex( int at );
int remove_vertices( int begin_vertex, int end_vertex );
void set_empty()
{
end = 0;
begin = 0;
type = bounded_convex_type::empty;
}
void set_point( int first_vertex )
{
end = 1;
begin = 0;
type = bounded_convex_type::point;
edge[ 0 ] = edge[ first_vertex ];
edge[ 1 ] = edge[ first_vertex + 1 ];
__m256d* src = vertex_pair( first_vertex );
if( first_vertex & 0x1 )
{
vertex_p[ 0 ] = gather_lanes< 1, 1 >( src[ 0 ] );
vertex_p[ 1 ] = gather_lanes< 1, 1 >( src[ 1 ] );
vertex_p[ 2 ] = gather_lanes< 1, 1 >( src[ 2 ] );
}
else
{
vertex_p[ 0 ] = src[ 0 ];
vertex_p[ 1 ] = src[ 1 ];
vertex_p[ 2 ] = src[ 2 ];
}
}
void set_to_last_segment();
void set_segment( int first_vertex )
{
end = 2;
begin = 0;
type = bounded_convex_type::segment;
edge[ 0 ] = edge[ first_vertex ];
edge[ 1 ] = edge[ first_vertex + 1 ];
edge[ 2 ] = edge[ first_vertex + 2 ];
__m256d* src = vertex_pair( first_vertex );
if( first_vertex & 0x1 )
{
vertex_p[ 0 ] = gather_lanes< 1, 0 >( src[ 0 ], src[ 3 ] );
vertex_p[ 1 ] = gather_lanes< 1, 0 >( src[ 1 ], src[ 4 ] );
vertex_p[ 2 ] = gather_lanes< 1, 0 >( src[ 2 ], src[ 5 ] );
}
else
{
vertex_p[ 0 ] = *src;
vertex_p[ 1 ] = *( ++src );
vertex_p[ 2 ] = *( ++src );
}
}
__m256d box() const;
void push( __m256d* begin, __m256d* end );
__m256d plain_edge( int i ) const
{
return edge[ begin + i ];
}
void plain_vertex( __m256d* inf_msup, int i ) const
{
int ri = ( begin + i );
__m256d const* vp = vertex_pair( ri );
int di = 2 * ( ( begin + i ) & 0x1 );
inf_msup[ 0 ] = _mm256_setr_pd( vp[ 0 ][ di ], vp[ 1 ][ di ], vp[ 2 ][ di ], 0.0 );
inf_msup[ 1 ] = _mm256_setr_pd( vp[ 0 ][ di + 1 ], vp[ 1 ][ di + 1 ], vp[ 2 ][ di + 1 ], 0.0 );
}
__m256d* vertex_pair( int index )
{
__m256d* p = vertex_p + 3 * ( index / 2 );
return p;
}
__m256d const* vertex_pair( int index ) const
{
__m256d* p = vertex_p + 3 * ( index / 2 );
return p;
}
int end;
int begin;
int const alloc;
__m256d* const edge;
__m256d* const vertex_p;
bounded_convex_type type;
};
// huge = { 0x5500'0000'0000'0000, 0x5500'0000'0000'0000, 0x5500'0000'0000'0000, 0x7FFF'7FFF'7FFF' }
// tiny = { 0x2bd0'0000'0000'0000, 0x2bd0'0000'0000'0000, 0x2bd0'0000'0000'0000, 0x0000'0000'0000'00000 }
__m256i bounds( __m256i* tiny, __m256i* huge );
int normalize_equations( __m256d* quadrant[ 4 ], int n, __m256i tiny, __m256i huge );
//----------------------------------------------------------------------------------------
//
// The class locator is used to find the location of a point with respect to the half
// plane.
//
// a x + b y >= c z
//
// The template parameters indicate the signs of the coefficients a, b and c.
//
//----------------------------------------------------------------------------------------
template < int flip_x, int flip_y, int flip_z >
struct locator final
{
//----------------------------------------------------------------------------------------
// The raw method assumes that
//
// edges[ 0 ] = { a, a, a, a }
// edges[ 1 ] = { b, b, b, b }
// edges[ 2 ] = { c, c, c, c }
//
// vertex_pair[ 0 ] = { x0_inf, x0_msup, x1_inf, x1_msup }
// vertex_pair[ 1 ] = { y0_inf, y0_msup, y1_inf, y1_msup }
// vertex_pair[ 2 ] = { z0_inf, z0_msup, z1_inf, z1_msup }
//
// and checks the location of the vertices ( x0, y0 ) and ( x1, y1 ) with respect to
// the half plane
//
// (-1)^flip_x a x + (-1)^flip_y b y >= (-1)^flip_z c z
//
// This is done by computing a x + b y - c z rounded up and down.
//
//----------------------------------------------------------------------------------------
static int raw( __m256d const* edge, __m256d const* vertex_pair )
{
__m256d chk;
__m256d zero_d = _mm256_xor_pd( zero_d, zero_d );
if constexpr ( flip_x )
chk = _mm256_mul_pd( edge[ 0 ], permute< 1, 0, 3, 2 >( vertex_pair[ 0 ] ) );
else
chk = _mm256_mul_pd( edge[ 0 ], vertex_pair[ 0 ] );
if constexpr ( flip_y)
chk = _mm256_fmadd_pd( edge[ 1 ], permute< 1, 0, 3, 2 >( vertex_pair[ 1 ] ), chk );
else
chk = _mm256_fmadd_pd( edge[ 1 ], vertex_pair[ 1 ], chk );
if constexpr ( flip_z )
chk = _mm256_fmadd_pd( edge[ 2 ], vertex_pair[ 2 ], chk );
else
chk = _mm256_fmadd_pd( edge[ 2 ], permute< 1, 0, 3, 2 >( vertex_pair[ 2 ] ), chk );
__m256d cmp = _mm256_cmp_pd( chk, zero_d, _CMP_GT_OQ );
int cm = _mm256_movemask_pd( cmp );
return cm;
}
static location locate_vertex_0( __m256d const* edge, __m256d const* vertex_pair )
{
location loc;
int cm = raw( edge, vertex_pair );
if( cm & 0x1 ) // inf( x[ 0 ] ) > 0
loc = location::in;
else
{
if( cm & 0x2 )
loc = location::out;
else
loc = location::null;
}
return loc;
}
static location locate_vertex_1( __m256d const* edge, __m256d const* vertex_pair )
{
location loc;
int cm = raw( edge, vertex_pair );
if( cm & 0x4 ) // the second vertex is in
loc = location::in;
else
{
if( cm & 0x8 ) // the second vertex is out
loc = location::out;
else
loc = location::null;
}
return loc;
}
static void locate_pair( location loc[ 2 ], __m256d const* edge, __m256d const* vertex_pair )
{
// computing a vertex[ 0 ] + b vertex[ 1 ] + c vertex[ 2 ]
int cm = raw( edge, vertex_pair );
if( cm & 0x1 ) // the first vertex is in
loc[ 0 ] = location::in;
else
{
if( cm & 0x2 ) // the first vertex is out
loc[ 0 ] = location::out;
else
loc[ 0 ] = location::null;
}
if( cm & 0x4 ) // the second vertex is in
loc[ 1 ] = location::in;
else
{
if( cm & 0x8 ) // the second vertex is out
loc[ 1 ] = location::out;
else
loc[ 1 ] = location::null;
}
}
static void insert( bounded_convex& r, __m256d new_edge )
{
__m256i ones = _mm256_cmpeq_epi64( ones, ones );
ones = _mm256_slli_epi64( ones, 63 );
__m256d sign = _mm256_castsi256_pd( ones );
__m256d coefficients[ 3 ] = { permute< 0, 0, 0, 0 >( new_edge ),
permute< 1, 1, 1, 1 >( new_edge ),
permute< 2, 2, 2, 2 >( new_edge ) };
coefficients[ 0 ] = _mm256_andnot_pd( sign, coefficients[ 0 ] );
coefficients[ 1 ] = _mm256_andnot_pd( sign, coefficients[ 1 ] );
coefficients[ 2 ] = _mm256_andnot_pd( sign, coefficients[ 2 ] );
if( r.type != bounded_convex_type::polygon )
{
if( r.type == bounded_convex_type::point )
{
location loc = locate_vertex_0( coefficients, r.vertex_p );
if( loc == location::null )
loc = exact_location( new_edge, r.edge[ 0 ], r.edge[ 1 ] );
if( loc == location::out )
r.set_empty();
return;
}
location loc[ 2 ];
locate_pair( loc, coefficients, r.vertex_p );
if( loc[ 0 ] == location::null )
loc[ 0 ] = exact_location( new_edge, r.edge[ 0 ], r.edge[ 1 ] );
if( loc[ 1 ] == location::null )
loc[ 1 ] = exact_location( new_edge, r.edge[ 1 ], r.edge[ 2 ] );
switch( loc[ 0 ] )
{
case location::in:
{
if( loc[ 1 ] == location::out )
{
r.edge[ 2 ] = new_edge;
cross< vertex_1 >( r.vertex_p, r.edge[ 1 ], new_edge );
}
return;
}
case location::border:
{
if( loc[ 1 ] == location::out )
{
r.end = 1;
r.type = bounded_convex_type::point;
}
return;
}
default: // location::out
{
switch( loc[ 1 ] )
{
case location::in:
{
r.edge[ 0 ] = new_edge;
cross< vertex_0 >( r.vertex_p, new_edge, r.edge[ 1 ] );
return;
}
case location::border:
{
r.end = 1;
r.edge[ 0 ] = r.edge[ 1 ];
r.edge[ 1 ] = r.edge[ 2 ];
r.type = bounded_convex_type::point;
r.vertex_p[ 0 ] = gather_lanes< 1, 1 >( r.vertex_p[ 0 ] );
r.vertex_p[ 1 ] = gather_lanes< 1, 1 >( r.vertex_p[ 1 ] );
r.vertex_p[ 2 ] = gather_lanes< 1, 1 >( r.vertex_p[ 2 ] );
return;
}
default:
{
r.set_empty();
return;
}
}
}
}
}
location loc[ 2 ];
int index = r.begin;
int intersection = -1;
bool intersection_at_vertex;
__m256d* vertex_pair_at_index = r.vertex_pair( index );
auto scan = [ & ]()
{
if( ++index >= r.end )
return location::null;
if( index & 0x1 )
return loc[ 1 ];
vertex_pair_at_index += 3;
if( index + 1 >= r.end )
{
loc[ 0 ] = locate_vertex_0( coefficients, vertex_pair_at_index );
if( loc[ 0 ] == location::null )
loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
}
else
{
locate_pair( loc, coefficients, vertex_pair_at_index );
if( loc[ 0 ] == location::null )
loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
if( loc[ 1 ] == location::null )
loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] );
}
return loc[ 0 ];
};
if( index & 0x1 )
{
location begin = locate_vertex_1( coefficients, vertex_pair_at_index );
if( begin == location::null )
begin = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
switch( begin )
{
case location::in:
goto cursor_in;
case location::border:
{
intersection = index;
intersection_at_vertex = true;
++index;
vertex_pair_at_index += 3;
locate_pair( loc, coefficients, vertex_pair_at_index );
if( loc[ 0 ] == location::null )
loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
if( loc[ 1 ] == location::null )
loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] );
switch( loc[ 0 ] )
{
case location::in:
goto cursor_in;
case location::border:
{
if( loc[ 1 ] == location::out )
r.set_segment( r.begin );
return;
}
case location::out:
goto cursor_out;
}
}
default:
goto cursor_out;
}
}
locate_pair( loc, coefficients, vertex_pair_at_index );
if( loc[ 0 ] == location::null )
loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
if( loc[ 1 ] == location::null )
loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] );
switch( loc[ 0 ] )
{
case location::in:
goto cursor_in;
case location::border:
{
intersection = index;
intersection_at_vertex = true;
++index;
switch( loc[ 1 ] )
{
case location::in:
goto cursor_in;
case location::border:
{
location next = locate_vertex_0( coefficients, vertex_pair_at_index + 3 );
if( next == location::null )
next = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] );
if( next == location::out )
r.set_segment( r.begin );
return;
}
default:
goto cursor_out;
}
}
default:
goto cursor_out;
}
cursor_in:
{
location next_loc = scan();
switch( next_loc )
{
case location::in:
goto cursor_in;
case location::border:
{
if( intersection >= 0 )
{
if( !intersection_at_vertex )
{
__m256d* vp = r.vertex_pair( intersection - 1 );
if( intersection & 0x1 )
cross< vertex_0 >( vp, new_edge, r.edge[ intersection ] );
else
cross< vertex_1 >( vp, new_edge, r.edge[ intersection ] );
--intersection;
}
r.end = index + 1;
r.begin = intersection;
r.edge[ r.end ] = new_edge;
r.edge[ r.begin ] = new_edge;
}
else
{
next_loc = scan();
if( next_loc == location::out )
{
intersection = index - 1;
intersection_at_vertex = true;
goto cursor_out;
}
}
return;
}
case location::out:
{
if( intersection < 0 )
{
intersection = index;
intersection_at_vertex = false;
goto cursor_out;
}
if( intersection_at_vertex )
r.begin = intersection;
else
{
r.begin = intersection - 1;
__m256d* vp = r.vertex_pair( r.begin );
if( r.begin & 0x1 )
cross< vertex_1 >( vp, new_edge, r.edge[ intersection ] );
else
cross< vertex_0 >( vp, new_edge, r.edge[ intersection ] );
}
__m256d* vp = r.vertex_pair( index );
if( index & 0x1 )
cross< vertex_1 >( vp, r.edge[ index ], new_edge );
else
cross< vertex_0 >( vp, r.edge[ index ], new_edge );
r.end = index + 1;
r.edge[ r.end ] = new_edge;
r.edge[ r.begin ] = new_edge;
return;
}
case location::null:
{
assert( index == r.end );
if( intersection >= 0 )
{
if( intersection_at_vertex )
{
if( intersection == r.begin )
return;
}
else
{
__m256d* first_pair = r.vertex_pair( intersection - 1 );
if( intersection & 0x1 )
cross< vertex_0 >( first_pair, new_edge, r.edge[ intersection ] );
else
cross< vertex_1 >( first_pair, new_edge, r.edge[ intersection ] );
--intersection;
}
__m256d* last_pair = r.vertex_pair( index );
if( index & 0x1 )
cross< vertex_1 >( last_pair, r.edge[ index ], new_edge );
else
cross< vertex_0 >( last_pair, r.edge[ index ], new_edge );
++r.end;
r.begin = intersection;
r.edge[ r.end ] = new_edge;
r.edge[ r.begin ] = new_edge;
}
return;
}
}
}
cursor_out:
{
location new_loc = scan();
switch( new_loc )
{
case location::in:
{
if( intersection < 0 )
{
intersection = index;
intersection_at_vertex = false;
goto cursor_in;
}
if( intersection + 1 == index )
{
assert( !intersection_at_vertex );
int new_vertex = r.insert_vertex( intersection );
if( new_vertex == intersection )
{
__m256d* vp = r.vertex_pair( intersection );
if( intersection & 0x1 )
{
cross< vertex_1 >( vp, r.edge[ intersection ], new_edge );
cross< vertex_0 >( vp + 3, new_edge, r.edge[ intersection + 2 ] );
}
else
{
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge );
cross< vertex_1 >( vp, new_edge, r.edge[ intersection + 2 ] );
}
r.edge[ intersection + 1 ] = new_edge;
}
else
{
assert( new_vertex == intersection - 1 );
__m256d* vp = r.vertex_pair( intersection );
if( intersection & 0x1 )
{
cross< vertex_1 >( vp, new_edge, r.edge[ index ] );
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge) ;
}
else
{
cross< vertex_0 >( vp, new_edge, r.edge[ index ] );
cross< vertex_1 >( vp - 3, r.edge[ intersection ], new_edge) ;
}
r.edge[ new_vertex ] = r.edge[ intersection ];
r.edge[ intersection ] = new_edge;
}
return;
}
if( !intersection_at_vertex )
{
__m256d* vp = r.vertex_pair( intersection );
if( intersection & 0x1 )
cross< vertex_1 >( vp, r.edge[ intersection ], new_edge );
else
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge );
}
if( intersection + 2 < index )
index = r.remove_vertices( intersection + 1, index - 1 ) + 2;
__m256d* vp = r.vertex_pair( index - 1 );
if( index & 0x1 )
cross< vertex_0 >( vp, new_edge, r.edge[ index ] );
else
cross< vertex_1 >( vp, new_edge, r.edge[ index ] );
r.edge[ index - 1 ] = new_edge;
return;
}
case location::border:
{
if( intersection >= 0 )
{
if( intersection_at_vertex )
{
if( ( intersection == r.begin ) && ( index + 1 == r.end ) )
{
r.set_to_last_segment();
return;
}
}
else
{
__m256d* vp = r.vertex_pair( intersection );
if( intersection & 0x1 )
cross< vertex_1 >( vp, r.edge[ intersection ], new_edge );
else
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge );
}
if( intersection + 1 < index )
intersection = r.remove_vertices( intersection + 1, index );
r.edge[ intersection + 1 ] = new_edge;
return;
}
new_loc = scan();
switch( new_loc )
{
case location::in:
{
intersection = index - 1;
intersection_at_vertex = true;
goto cursor_in;
}
case location::border:
{
r.set_segment( index - 1 );
return;
}
default: // location::out or location::null:
{
r.set_point( index - 1 );
return;
}
}
}
case location::out:
goto cursor_out;
case location::null:
{
assert( index == r.end );
if( intersection < 0 )
{
r.set_empty();
return;
}
if( intersection_at_vertex )
{
if( intersection == r.begin )
{
r.set_point( r.begin );
return;
}
}
else
{
__m256d* vp = r.vertex_pair( intersection );
if( intersection & 0x1 )
cross< vertex_1 >( vp, r.edge[ intersection ], new_edge );
else
cross< vertex_0 >( vp, r.edge[ intersection ], new_edge );
}
++intersection;
__m256d* vp = r.vertex_pair( intersection );
if( intersection & 0x1 )
cross< vertex_1 >( vp, new_edge, r.edge[ index ] );
else
cross< vertex_0 >( vp, new_edge, r.edge[ index ] );
r.end = intersection + 1;
r.edge[ intersection ] = new_edge;
r.edge[ r.end ] = r.edge[ r.begin ];
}
return;
}
}
}
};
} // namespace wm::nlp::lp2d
//----------------------------------------------------------------------------------------
//
// The function interval_system_2d solves the system of interval equations
//
// ai x + bi y = ci
//
// for 0 <= i < n_equations and
//
// ai = [ eq[ 6 * i ], eq[ 6 * i + 1 ] ] with eq[ 6 * i ] <= eq[ 6 * i + 1 ]
// bi = [ eq[ 6 * i + 2 ], eq[ 6 * i + 3 ] ] with eq[ 6 * i + 2 ] <= eq[ 6 * i + 3 ]
// ci = [ eq[ 6 * i + 4 ], eq[ 6 * i + 5 ] ] with eq[ 6 * i + 4 ] <= eq[ 6 * i + 5 ]
//
// The function finds the solutions inside the box
//
// box[ 0 ] <= x <= box[ 1 ]
// box[ 2 ] <= y <= box[ 3 ]
//
// The solution consists of up to 4 boxes bx[ 0 ], bx[ 1 ], bx[ 2 ] and bx[ 3 ], with
//
// bx[ i ] = inf_x[ i ] <= x <= sup_x[ i ]
// inf_y[ i ] <= y <= sup_y[ i ]
// for
//
// inf_x[ i ] = solution[ 4 * i ]
// sup_x[ i ] = solution[ 4 * i + 1 ]
// inf_y[ i ] = solution[ 4 * i + 2 ]
// sup_y[ i ] = solution[ 4 * i + 3 ]
//
// The function returns the number of boxes in the solutions, which can be 0,1,2,3 or 4,
// or a negative number if the input is invalid or if it cannot change the rounding mode.
//
// It assumes that:
//
// a) n_equations <= 16
// b) all doubles are finite, ie., not nan or +/-inf
//
// c) box and eq are such that
//
// box[ 0 ] <= box[ 1 ]
// box[ 2 ] <= box[ 3]
// eq[ 2 * i ] <= eq[ 2 i + 1 ] for i = 0,..., 3 * n_equations.
//
// d) box and eq are aligned at a 32 byte boundary
//
// It does not verify the input and its results should not be trusted if the inputs are
// invalid.
//
//----------------------------------------------------------------------------------------
extern "C"
{
int interval_system_2d( double* solution, double const* box, double const* eq, unsigned int n_equations );
}