#pragma once //---------------------------------------------------------------------------------------- // Copyright (c) 2024 Walter Mascarenhas // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. // // The MPL has a "Disclaimer of Warranty" and a "Limitation of Liability", // and we provide no guarantee regarding the correctness of this source code. // //---------------------------------------------------------------------------------------- #include #include #include typedef unsigned int uint; //---------------------------------------------------------------------------------------- namespace wm::nlp::lp2d { //---------------------------------------------------------------------------------------- // avx2 wrappers, to simplify the notation a bit and clarify the use of immediates //---------------------------------------------------------------------------------------- inline __m256d and_d( __m256d x, __m256d y ) { return _mm256_and_pd( x, y ); } inline __m256i and_i( __m256i x, __m256d y ) { __m256i yi = _mm256_castpd_si256( y ); return _mm256_and_si256( x, yi ); } inline __m256i and_i( __m256d x, __m256i y ) { __m256i xi = _mm256_castpd_si256( x ); __m256i r = _mm256_and_si256( xi, y ); return r; } inline __m256i and_i( __m256i x, __m256i y ) { return _mm256_and_si256( x, y ); } inline __m256d and_not_d( __m256i x, __m256i y ) { __m256d xd = _mm256_castsi256_pd( x ); __m256d yd = _mm256_castsi256_pd( y ); return _mm256_andnot_pd( xd, yd ); } inline __m256d and_not_d( __m256d x, __m256d y ) { return _mm256_andnot_pd( x, y ); } inline __m256i and_not_i( __m256i x, __m256i y ) { __m256d xd = _mm256_castsi256_pd( x ); __m256d yd = _mm256_castsi256_pd( y ); __m256d rd = _mm256_andnot_pd( xd, yd ); __m256i r = _mm256_castpd_si256( rd ); return r; } template < uint I0, uint I1, uint I2, uint I3 > requires ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 < 2 ) && ( I3 < 2 ) ) __m256d blend( __m256d x, __m256d y ) { __m256d p = _mm256_blend_pd( x, y, ( I0 | ( I1 << 1 ) | ( I2 << 2 ) | ( I3 << 3 ) ) ); return p; } template < uint I0, uint I1, uint I2, uint I3 > requires ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 < 2 ) && ( I3 < 2 ) ) __m256i blend( __m256i x, __m256i y ) { __m256d xd = _mm256_castsi256_pd( x ); __m256d yd = _mm256_castsi256_pd( y ); __m256d pd = _mm256_blend_pd( xd, yd, ( I0 | ( I1 << 1 ) | ( I2 << 2 ) | ( I3 << 3 ) ) ); __m256i p = _mm256_castpd_si256( pd ); return p; } inline __m256d blend( __m256d a, __m256d b, __m256d mask ) { __m256d blended = _mm256_blendv_pd( a, b, mask ); return blended; } inline __m256d blend( __m256d a, __m256d b, __m256i mask ) { __m256d mask_d = _mm256_castsi256_pd( mask ); __m256d blended = _mm256_blendv_pd( a, b, mask_d ); return blended; } inline __m256i blend( __m256i a, __m256i b, __m256i mask ) { __m256d a_d = _mm256_castsi256_pd( a ); __m256d b_d = _mm256_castsi256_pd( b ); __m256d mask_d = _mm256_castsi256_pd( mask ); __m256d blended_d = _mm256_blendv_pd( a_d, b_d, mask_d ); __m256i blended = _mm256_castpd_si256( blended_d ); return blended; } inline __m256d blend_max( __m256d x, __m256d y, __m256d greater_mask ) { __m256d max = blend( y, x, greater_mask ); return max; } inline __m256d blend_min( __m256d x, __m256d y, __m256d greater_mask ) { __m256d min = blend( x, y, greater_mask ); return min; } inline __m256d cast_d( __m256i x ) { return _mm256_castsi256_pd( x ); } inline __m256i cast_i( __m256d x ) { return _mm256_castpd_si256( x ); } //---------------------------------------------------------------------------------------- // // gather_lanes< a, b >( x ), with a,b in { 0, 1, 2 } returns { x.lane( a ), x.lane( b ) } // with x.lane( 2 ) = 0 // //---------------------------------------------------------------------------------------- constexpr int clear_lane = 8; template < uint I0, uint I1 > requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) ) __m256i gather_lanes( __m256i x ) { constexpr int lane_0 = I0; constexpr int lane_1 = I1 << 4; __m256d xd = _mm256_castsi256_pd( x ); __m256d gd = _mm256_permute2f128_pd( xd, xd, lane_0 | lane_1 ); __m256i g = _mm256_castpd_si256( gd ); return g; } template < uint I0, uint I1 > requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) ) __m256d gather_lanes( __m256d x ) { constexpr int lane_0 = I0; constexpr int lane_1 = I1 << 4; __m256d p; if constexpr ( ( I0 == 0 ) && ( I1 == 1 ) ) p = x; else p = _mm256_permute2f128_pd( x, x, lane_0 | lane_1 ); return p; } //---------------------------------------------------------------------------------------- // // gather_lanes< a, b >( x, y ), with a,b in { 0, 1, clear_lane } returns { x.lane( a ), y.lane( b ) } // with x.lane( clear_lane ) = 0 // //---------------------------------------------------------------------------------------- template < uint I0, uint I1 > requires ( ( ( I0 < 2 ) || ( I0 == clear_lane ) ) && ( ( I1 < 2 ) || ( I1 == clear_lane ) ) ) __m256d gather_lanes( __m256d x, __m256d y ) { constexpr int lane_0 = I0; constexpr int lane_1 = ( (I1 == clear_lane) ? clear_lane : ( I1 + 2 ) ) << 4; __m256d p; if( ( I0 == 0 ) && ( I1 == 1 ) ) p = blend< 0, 0, 1, 1 >( x, y ); else p = _mm256_permute2f128_pd( x, y, lane_0 | lane_1 ); return p; } //---------------------------------------------------------------------------------------- // gather_high returns { x[ 1 ], y[ 1 ], x[ 3 ], y[ 3 ] } //---------------------------------------------------------------------------------------- inline __m256d gather_high( __m256d x, __m256d y ) { return _mm256_unpackhi_pd( x, y ); } //---------------------------------------------------------------------------------------- // gather_low returns { x[ 0 ], y[ 0 ], x[ 2 ], y[ 2 ] } //---------------------------------------------------------------------------------------- inline __m256d gather_low( __m256d x, __m256d y ) { return _mm256_unpacklo_pd( x, y ); } //---------------------------------------------------------------------------------------- // high_bits_64 returns the bits 63, 64 + 63, 128 + 63 and 196 + 63 of x //---------------------------------------------------------------------------------------- inline int high_bits_64( __m256d x ) { return _mm256_movemask_pd( x ); } inline int high_bits_64( __m256i x ) { __m256d xd = cast_d( x ); return _mm256_movemask_pd( xd ); } //---------------------------------------------------------------------------------------- // gather_low returns { x[ 0 ], y[ 0 ], x[ 2 ], y[ 2 ] } //---------------------------------------------------------------------------------------- inline __m256d is_greater( __m256d x, __m256d y ) { __m256d gt = _mm256_cmp_pd( x, y, _CMP_GT_OQ ); return gt; } inline __m256i ones_i() { __m256i r; return _mm256_cmpeq_epi64( r, r ); } inline __m256d or_d( __m256d x, __m256d y ) { return _mm256_or_pd( x, y ); } inline __m256i or_i( __m256i x, __m256i y ) { return _mm256_or_si256( x, y ); } template < uint I0, uint I1, uint I2, uint I3 > requires ( ( I0 < 4 ) && ( I1 < 4 ) && ( I2 < 4 ) && ( I3 < 4 ) ) __m256d permute( __m256d x ) { __m256d p; if constexpr ( ( I0 < 2 ) && ( I1 < 2 ) && ( I2 > 1 ) && ( I3 > 1 ) ) p = _mm256_permute_pd( x, ( I0 | ( I1 << 1 ) | ( ( I2 - 2 ) << 2 ) | ( ( I3 - 2 ) << 3 ) ) ); else p = _mm256_permute4x64_pd( x, ( I0 | ( I1 << 2 ) | ( I2 << 4 ) | ( I3 << 6 ) ) ); return p; } template < uint I0, uint I1, uint I2, uint I3 > requires ( ( I0 < 4 ) && ( I1 < 4 ) && ( I2 < 4 ) && ( I3 < 4 ) ) __m256i permute( __m256i x ) { __m256d xd = _mm256_castsi256_pd( x ); __m256d pd = permute< I0, I1, I2, I3 >( xd ); __m256i p = _mm256_castpd_si256( pd ); return p; } inline __m256d xor_d( __m256d x, __m256d y ) { return _mm256_xor_pd( x, y ); } inline __m256d xor_d( __m256d x, __m256i y ) { __m256d yd = cast_d( y ); return _mm256_xor_pd( x, yd ); } inline __m256d xor_d( __m256i x, __m256d y ) { __m256d xd = cast_d( x ); return _mm256_xor_pd( xd, y ); } inline __m256i xor_i( __m256i x, __m256i y ) { return _mm256_xor_si256( x, y ); } inline __m256d zero_d() { __m256d r; return _mm256_xor_pd( r, r ); } inline __m256i zero_i() { __m256i r; return _mm256_xor_si256( r, r ); } //---------------------------------------------------------------------------------------- // avx2 wrappers, to simplify the notation a bit and clarify the use of immediates //---------------------------------------------------------------------------------------- enum class location { null = -1024, out = -1, border = 0, in = 1 }; enum class bounded_convex_type { empty = 0, point = 1, segment = 2, polygon = 3 }; //---------------------------------------------------------------------------------------- // vertex_0 and vertex_1 scatter the arrays // // { inf_y, inf_z, inf_x, ? } and // { m_sup_y, m_sup_z, m_sup_x, ? } // // to vertex_pair, in the x, y, z order //---------------------------------------------------------------------------------------- struct vertex_0 { static void scatter( __m256d* vertex_pair, __m256d inf, __m256d m_sup ) { __m256d y = gather_low( inf, m_sup ); // { inf( y ), m_sup( y ), inf( x ), m_sup( x ) } __m256d z = gather_high( inf, m_sup ); // { inf( z ), m_sup( z ), ? ? } __m256d x = gather_lanes< 1, 1 >( y ); // { inf( x ), m_sup( x ), ? ? } vertex_pair[ 0 ] = blend< 0, 0, 1, 1 >( x, vertex_pair[ 0 ] ); vertex_pair[ 1 ] = blend< 0, 0, 1, 1 >( y, vertex_pair[ 1 ] ); vertex_pair[ 2 ] = blend< 0, 0, 1, 1 >( z, vertex_pair[ 2 ] ); } }; struct vertex_1 { static void scatter( __m256d* vertex_pair, __m256d inf, __m256d m_sup ) { __m256d x = gather_low( inf, m_sup ); // { inf( y ), m_sup( y ), inf( x ), m_sup( x ) } __m256d z = gather_high( inf, m_sup ); // { inf( z ), m_sup( z ), ?, ? } __m256d y = gather_lanes< 0, 0 >( x ); // { ?, ?, inf( y ), m_sup( y ) } z = gather_lanes< 0, 0 >( z ); // { ?. . inf( z ), m_sup( z ) } vertex_pair[ 0 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 0 ], x ); vertex_pair[ 1 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 1 ], y ); vertex_pair[ 2 ] = blend< 0, 0, 1, 1 >( vertex_pair[ 2 ], z ); } }; //---------------------------------------------------------------------------------------- // cross computes the solution ( x / z, y / z ) of the equation // // a[ 0 ] x + a[ 1 ] y = a[ 2 ] // b[ 0 ] x + b[ 1 ] y = b[ 2 ] // // which is // // x = a[ 2 ] b[ 1 ] - a[ 1 ] b[ 2 ] // y = a[ 0 ] b[ 2 ] - a[ 2 ] b[ 0 ] // z = a[ 0 ] b[ 1 ] - a[ 1 ] b[ 0 ] // // which is convenient to order as // // y = a[ 0 ] b[ 2 ] - a[ 2 ] b[ 0 ] // z = a[ 0 ] b[ 1 ] - a[ 1 ] b[ 0 ] // x = a[ 2 ] b[ 1 ] - a[ 1 ] b[ 2 ] // // It assumes that x, y >= 0 and z > 0 and makes sure that the rounded values of // x, y and z also satisfy these lower bounds. //---------------------------------------------------------------------------------------- template < class Vertex > inline void cross( __m256d* vertex_pair, __m256d a, __m256d b ) { __m256d inf_a = _mm256_movedup_pd( a ); // { a[ 0 ], a[ 0 ], a[ 2 ], ? } __m256d m_sup_b = _mm256_movedup_pd( b ); // { b[ 0 ], b[ 0 ], b[ 2 ], ? } __m256d inf_b = permute< 2, 1, 1, 3 >( b ); // { b[ 2 ], b[ 1 ], b[ 1 ], ? } __m256d m_sup_a = permute< 2, 1, 1, 3 >( a ); // { a[ 2 ], a[ 1 ], a[ 1 ], ? } __m256d inf_h = _mm256_mul_pd( inf_a, inf_b ); __m256d m_sup_h = _mm256_mul_pd( m_sup_a, m_sup_b ); __m256d inf = _mm256_fnmadd_pd( m_sup_a, m_sup_b, inf_h ); // inf(x) = rounded_down( x ) __m256d m_sup = _mm256_fnmadd_pd( inf_a, inf_b, m_sup_h ); // -sup(x) = rounded_down( -x ) // x,y must be >= 0 and z must be positive and the computed values violate this due to // rounding. In this rare case in which inf[ i ] <= 0 I can prove that the exact value // of inf[ i ] is m_sup[ i ] - inf[ i ] __m256d zero = zero_d(); __m256d cmp = _mm256_cmp_pd( inf, zero, _CMP_LE_OQ ); // rounded_down( x ) <= 0 ? __m256d ds = _mm256_sub_pd( m_sup, inf ); inf = blend( inf, ds, cmp ); Vertex::scatter( vertex_pair, inf, m_sup ); } //---------------------------------------------------------------------------------------- // exact_location returns the exact location of the intersection ( x, y ) of the lines // // b0[ 0 ] x + b0[ 1 ] y >= b0[ 2 ] // b1[ 1 ] x + b1[ 1 ] y >= b1[ 2 ] // // with respect to the set // // e0 x + e1 y >= e2 // // under the assumption that // // d = b0[ 0 ] b1[ 1 ] - b0[ 1 ] b1[ 1 ] > 0 // // where ei = edge[ i ], b0 = border[ 0 ] and b1 = b[ 1 ] // // This amounts to evaluating the sign of // // p = e0 r + e1 s - e2 d // // r = b0[ 2 ] b1[ 1 ] - b0[ 1 ] b1[ 2 ] // s = b0[ 0 ] b1[ 2 ] - b0[ 2 ] b1[ 0 ] , // // which motivates the evaluation of the six products // // group 1 group 2 // e0 b0[ 2 ] b1[ 1 ], -e0 b0[ 1 ] b1[ 2 ] // e1 b0[ 0 ] b1[ 2 ], -e1 b0[ 2 ] b1[ 0 ] // e2 b0[ 1 ] b1[ 0 ]. -e2 b0[ 0 ] b1[ 1 ] // // and we want to find the sign of their sum. // // The function assumes that the edges entries are either normal or zero, ie., // there are no subnormal entries. //---------------------------------------------------------------------------------------- location exact_location( __m256d e, __m256d b0, __m256d b1 ); //---------------------------------------------------------------------------------------- // The struct bounded convex represents a bounded convex set on the plane, which can // have one of the following types: // // a) empty // b) ṕoint // c) segment // d) polygon // //---------------------------------------------------------------------------------------- struct bounded_convex final { constexpr bounded_convex( int alloc, __m256d* edge, __m256d* vertex_p ) : alloc( alloc ), edge( edge ), vertex_p( vertex_p ){} void setup( __m256d box ); int insert_vertex( int at ); int remove_vertices( int begin_vertex, int end_vertex ); void set_empty() { end = 0; begin = 0; type = bounded_convex_type::empty; } void set_point( int first_vertex ) { end = 1; begin = 0; type = bounded_convex_type::point; edge[ 0 ] = edge[ first_vertex ]; edge[ 1 ] = edge[ first_vertex + 1 ]; __m256d* src = vertex_pair( first_vertex ); if( first_vertex & 0x1 ) { vertex_p[ 0 ] = gather_lanes< 1, 1 >( src[ 0 ] ); vertex_p[ 1 ] = gather_lanes< 1, 1 >( src[ 1 ] ); vertex_p[ 2 ] = gather_lanes< 1, 1 >( src[ 2 ] ); } else { vertex_p[ 0 ] = src[ 0 ]; vertex_p[ 1 ] = src[ 1 ]; vertex_p[ 2 ] = src[ 2 ]; } } void set_to_last_segment(); void set_segment( int first_vertex ) { end = 2; begin = 0; type = bounded_convex_type::segment; edge[ 0 ] = edge[ first_vertex ]; edge[ 1 ] = edge[ first_vertex + 1 ]; edge[ 2 ] = edge[ first_vertex + 2 ]; __m256d* src = vertex_pair( first_vertex ); if( first_vertex & 0x1 ) { vertex_p[ 0 ] = gather_lanes< 1, 0 >( src[ 0 ], src[ 3 ] ); vertex_p[ 1 ] = gather_lanes< 1, 0 >( src[ 1 ], src[ 4 ] ); vertex_p[ 2 ] = gather_lanes< 1, 0 >( src[ 2 ], src[ 5 ] ); } else { vertex_p[ 0 ] = *src; vertex_p[ 1 ] = *( ++src ); vertex_p[ 2 ] = *( ++src ); } } __m256d box() const; void push( __m256d* begin, __m256d* end ); __m256d plain_edge( int i ) const { return edge[ begin + i ]; } void plain_vertex( __m256d* inf_msup, int i ) const { int ri = ( begin + i ); __m256d const* vp = vertex_pair( ri ); int di = 2 * ( ( begin + i ) & 0x1 ); inf_msup[ 0 ] = _mm256_setr_pd( vp[ 0 ][ di ], vp[ 1 ][ di ], vp[ 2 ][ di ], 0.0 ); inf_msup[ 1 ] = _mm256_setr_pd( vp[ 0 ][ di + 1 ], vp[ 1 ][ di + 1 ], vp[ 2 ][ di + 1 ], 0.0 ); } __m256d* vertex_pair( int index ) { __m256d* p = vertex_p + 3 * ( index / 2 ); return p; } __m256d const* vertex_pair( int index ) const { __m256d* p = vertex_p + 3 * ( index / 2 ); return p; } int end; int begin; int const alloc; __m256d* const edge; __m256d* const vertex_p; bounded_convex_type type; }; // huge = { 0x5500'0000'0000'0000, 0x5500'0000'0000'0000, 0x5500'0000'0000'0000, 0x7FFF'7FFF'7FFF' } // tiny = { 0x2bd0'0000'0000'0000, 0x2bd0'0000'0000'0000, 0x2bd0'0000'0000'0000, 0x0000'0000'0000'00000 } __m256i bounds( __m256i* tiny, __m256i* huge ); int normalize_equations( __m256d* quadrant[ 4 ], int n, __m256i tiny, __m256i huge ); //---------------------------------------------------------------------------------------- // // The class locator is used to find the location of a point with respect to the half // plane. // // a x + b y >= c z // // The template parameters indicate the signs of the coefficients a, b and c. // //---------------------------------------------------------------------------------------- template < int flip_x, int flip_y, int flip_z > struct locator final { //---------------------------------------------------------------------------------------- // The raw method assumes that // // edges[ 0 ] = { a, a, a, a } // edges[ 1 ] = { b, b, b, b } // edges[ 2 ] = { c, c, c, c } // // vertex_pair[ 0 ] = { x0_inf, x0_msup, x1_inf, x1_msup } // vertex_pair[ 1 ] = { y0_inf, y0_msup, y1_inf, y1_msup } // vertex_pair[ 2 ] = { z0_inf, z0_msup, z1_inf, z1_msup } // // and checks the location of the vertices ( x0, y0 ) and ( x1, y1 ) with respect to // the half plane // // (-1)^flip_x a x + (-1)^flip_y b y >= (-1)^flip_z c z // // This is done by computing a x + b y - c z rounded up and down. // //---------------------------------------------------------------------------------------- static int raw( __m256d const* edge, __m256d const* vertex_pair ) { __m256d chk; __m256d zero_d = _mm256_xor_pd( zero_d, zero_d ); if constexpr ( flip_x ) chk = _mm256_mul_pd( edge[ 0 ], permute< 1, 0, 3, 2 >( vertex_pair[ 0 ] ) ); else chk = _mm256_mul_pd( edge[ 0 ], vertex_pair[ 0 ] ); if constexpr ( flip_y) chk = _mm256_fmadd_pd( edge[ 1 ], permute< 1, 0, 3, 2 >( vertex_pair[ 1 ] ), chk ); else chk = _mm256_fmadd_pd( edge[ 1 ], vertex_pair[ 1 ], chk ); if constexpr ( flip_z ) chk = _mm256_fmadd_pd( edge[ 2 ], vertex_pair[ 2 ], chk ); else chk = _mm256_fmadd_pd( edge[ 2 ], permute< 1, 0, 3, 2 >( vertex_pair[ 2 ] ), chk ); __m256d cmp = _mm256_cmp_pd( chk, zero_d, _CMP_GT_OQ ); int cm = _mm256_movemask_pd( cmp ); return cm; } static location locate_vertex_0( __m256d const* edge, __m256d const* vertex_pair ) { location loc; int cm = raw( edge, vertex_pair ); if( cm & 0x1 ) // inf( x[ 0 ] ) > 0 loc = location::in; else { if( cm & 0x2 ) loc = location::out; else loc = location::null; } return loc; } static location locate_vertex_1( __m256d const* edge, __m256d const* vertex_pair ) { location loc; int cm = raw( edge, vertex_pair ); if( cm & 0x4 ) // the second vertex is in loc = location::in; else { if( cm & 0x8 ) // the second vertex is out loc = location::out; else loc = location::null; } return loc; } static void locate_pair( location loc[ 2 ], __m256d const* edge, __m256d const* vertex_pair ) { // computing a vertex[ 0 ] + b vertex[ 1 ] + c vertex[ 2 ] int cm = raw( edge, vertex_pair ); if( cm & 0x1 ) // the first vertex is in loc[ 0 ] = location::in; else { if( cm & 0x2 ) // the first vertex is out loc[ 0 ] = location::out; else loc[ 0 ] = location::null; } if( cm & 0x4 ) // the second vertex is in loc[ 1 ] = location::in; else { if( cm & 0x8 ) // the second vertex is out loc[ 1 ] = location::out; else loc[ 1 ] = location::null; } } static void insert( bounded_convex& r, __m256d new_edge ) { __m256i ones = _mm256_cmpeq_epi64( ones, ones ); ones = _mm256_slli_epi64( ones, 63 ); __m256d sign = _mm256_castsi256_pd( ones ); __m256d coefficients[ 3 ] = { permute< 0, 0, 0, 0 >( new_edge ), permute< 1, 1, 1, 1 >( new_edge ), permute< 2, 2, 2, 2 >( new_edge ) }; coefficients[ 0 ] = _mm256_andnot_pd( sign, coefficients[ 0 ] ); coefficients[ 1 ] = _mm256_andnot_pd( sign, coefficients[ 1 ] ); coefficients[ 2 ] = _mm256_andnot_pd( sign, coefficients[ 2 ] ); if( r.type != bounded_convex_type::polygon ) { if( r.type == bounded_convex_type::point ) { location loc = locate_vertex_0( coefficients, r.vertex_p ); if( loc == location::null ) loc = exact_location( new_edge, r.edge[ 0 ], r.edge[ 1 ] ); if( loc == location::out ) r.set_empty(); return; } location loc[ 2 ]; locate_pair( loc, coefficients, r.vertex_p ); if( loc[ 0 ] == location::null ) loc[ 0 ] = exact_location( new_edge, r.edge[ 0 ], r.edge[ 1 ] ); if( loc[ 1 ] == location::null ) loc[ 1 ] = exact_location( new_edge, r.edge[ 1 ], r.edge[ 2 ] ); switch( loc[ 0 ] ) { case location::in: { if( loc[ 1 ] == location::out ) { r.edge[ 2 ] = new_edge; cross< vertex_1 >( r.vertex_p, r.edge[ 1 ], new_edge ); } return; } case location::border: { if( loc[ 1 ] == location::out ) { r.end = 1; r.type = bounded_convex_type::point; } return; } default: // location::out { switch( loc[ 1 ] ) { case location::in: { r.edge[ 0 ] = new_edge; cross< vertex_0 >( r.vertex_p, new_edge, r.edge[ 1 ] ); return; } case location::border: { r.end = 1; r.edge[ 0 ] = r.edge[ 1 ]; r.edge[ 1 ] = r.edge[ 2 ]; r.type = bounded_convex_type::point; r.vertex_p[ 0 ] = gather_lanes< 1, 1 >( r.vertex_p[ 0 ] ); r.vertex_p[ 1 ] = gather_lanes< 1, 1 >( r.vertex_p[ 1 ] ); r.vertex_p[ 2 ] = gather_lanes< 1, 1 >( r.vertex_p[ 2 ] ); return; } default: { r.set_empty(); return; } } } } } location loc[ 2 ]; int index = r.begin; int intersection = -1; bool intersection_at_vertex; __m256d* vertex_pair_at_index = r.vertex_pair( index ); auto scan = [ & ]() { if( ++index >= r.end ) return location::null; if( index & 0x1 ) return loc[ 1 ]; vertex_pair_at_index += 3; if( index + 1 >= r.end ) { loc[ 0 ] = locate_vertex_0( coefficients, vertex_pair_at_index ); if( loc[ 0 ] == location::null ) loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] ); } else { locate_pair( loc, coefficients, vertex_pair_at_index ); if( loc[ 0 ] == location::null ) loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] ); if( loc[ 1 ] == location::null ) loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] ); } return loc[ 0 ]; }; if( index & 0x1 ) { location begin = locate_vertex_1( coefficients, vertex_pair_at_index ); if( begin == location::null ) begin = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] ); switch( begin ) { case location::in: goto cursor_in; case location::border: { intersection = index; intersection_at_vertex = true; ++index; vertex_pair_at_index += 3; locate_pair( loc, coefficients, vertex_pair_at_index ); if( loc[ 0 ] == location::null ) loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] ); if( loc[ 1 ] == location::null ) loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] ); switch( loc[ 0 ] ) { case location::in: goto cursor_in; case location::border: { if( loc[ 1 ] == location::out ) r.set_segment( r.begin ); return; } case location::out: goto cursor_out; } } default: goto cursor_out; } } locate_pair( loc, coefficients, vertex_pair_at_index ); if( loc[ 0 ] == location::null ) loc[ 0 ] = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] ); if( loc[ 1 ] == location::null ) loc[ 1 ] = exact_location( new_edge, r.edge[ index + 1 ], r.edge[ index + 2 ] ); switch( loc[ 0 ] ) { case location::in: goto cursor_in; case location::border: { intersection = index; intersection_at_vertex = true; ++index; switch( loc[ 1 ] ) { case location::in: goto cursor_in; case location::border: { location next = locate_vertex_0( coefficients, vertex_pair_at_index + 3 ); if( next == location::null ) next = exact_location( new_edge, r.edge[ index ], r.edge[ index + 1 ] ); if( next == location::out ) r.set_segment( r.begin ); return; } default: goto cursor_out; } } default: goto cursor_out; } cursor_in: { location next_loc = scan(); switch( next_loc ) { case location::in: goto cursor_in; case location::border: { if( intersection >= 0 ) { if( !intersection_at_vertex ) { __m256d* vp = r.vertex_pair( intersection - 1 ); if( intersection & 0x1 ) cross< vertex_0 >( vp, new_edge, r.edge[ intersection ] ); else cross< vertex_1 >( vp, new_edge, r.edge[ intersection ] ); --intersection; } r.end = index + 1; r.begin = intersection; r.edge[ r.end ] = new_edge; r.edge[ r.begin ] = new_edge; } else { next_loc = scan(); if( next_loc == location::out ) { intersection = index - 1; intersection_at_vertex = true; goto cursor_out; } } return; } case location::out: { if( intersection < 0 ) { intersection = index; intersection_at_vertex = false; goto cursor_out; } if( intersection_at_vertex ) r.begin = intersection; else { r.begin = intersection - 1; __m256d* vp = r.vertex_pair( r.begin ); if( r.begin & 0x1 ) cross< vertex_1 >( vp, new_edge, r.edge[ intersection ] ); else cross< vertex_0 >( vp, new_edge, r.edge[ intersection ] ); } __m256d* vp = r.vertex_pair( index ); if( index & 0x1 ) cross< vertex_1 >( vp, r.edge[ index ], new_edge ); else cross< vertex_0 >( vp, r.edge[ index ], new_edge ); r.end = index + 1; r.edge[ r.end ] = new_edge; r.edge[ r.begin ] = new_edge; return; } case location::null: { assert( index == r.end ); if( intersection >= 0 ) { if( intersection_at_vertex ) { if( intersection == r.begin ) return; } else { __m256d* first_pair = r.vertex_pair( intersection - 1 ); if( intersection & 0x1 ) cross< vertex_0 >( first_pair, new_edge, r.edge[ intersection ] ); else cross< vertex_1 >( first_pair, new_edge, r.edge[ intersection ] ); --intersection; } __m256d* last_pair = r.vertex_pair( index ); if( index & 0x1 ) cross< vertex_1 >( last_pair, r.edge[ index ], new_edge ); else cross< vertex_0 >( last_pair, r.edge[ index ], new_edge ); ++r.end; r.begin = intersection; r.edge[ r.end ] = new_edge; r.edge[ r.begin ] = new_edge; } return; } } } cursor_out: { location new_loc = scan(); switch( new_loc ) { case location::in: { if( intersection < 0 ) { intersection = index; intersection_at_vertex = false; goto cursor_in; } if( intersection + 1 == index ) { assert( !intersection_at_vertex ); int new_vertex = r.insert_vertex( intersection ); if( new_vertex == intersection ) { __m256d* vp = r.vertex_pair( intersection ); if( intersection & 0x1 ) { cross< vertex_1 >( vp, r.edge[ intersection ], new_edge ); cross< vertex_0 >( vp + 3, new_edge, r.edge[ intersection + 2 ] ); } else { cross< vertex_0 >( vp, r.edge[ intersection ], new_edge ); cross< vertex_1 >( vp, new_edge, r.edge[ intersection + 2 ] ); } r.edge[ intersection + 1 ] = new_edge; } else { assert( new_vertex == intersection - 1 ); __m256d* vp = r.vertex_pair( intersection ); if( intersection & 0x1 ) { cross< vertex_1 >( vp, new_edge, r.edge[ index ] ); cross< vertex_0 >( vp, r.edge[ intersection ], new_edge) ; } else { cross< vertex_0 >( vp, new_edge, r.edge[ index ] ); cross< vertex_1 >( vp - 3, r.edge[ intersection ], new_edge) ; } r.edge[ new_vertex ] = r.edge[ intersection ]; r.edge[ intersection ] = new_edge; } return; } if( !intersection_at_vertex ) { __m256d* vp = r.vertex_pair( intersection ); if( intersection & 0x1 ) cross< vertex_1 >( vp, r.edge[ intersection ], new_edge ); else cross< vertex_0 >( vp, r.edge[ intersection ], new_edge ); } if( intersection + 2 < index ) index = r.remove_vertices( intersection + 1, index - 1 ) + 2; __m256d* vp = r.vertex_pair( index - 1 ); if( index & 0x1 ) cross< vertex_0 >( vp, new_edge, r.edge[ index ] ); else cross< vertex_1 >( vp, new_edge, r.edge[ index ] ); r.edge[ index - 1 ] = new_edge; return; } case location::border: { if( intersection >= 0 ) { if( intersection_at_vertex ) { if( ( intersection == r.begin ) && ( index + 1 == r.end ) ) { r.set_to_last_segment(); return; } } else { __m256d* vp = r.vertex_pair( intersection ); if( intersection & 0x1 ) cross< vertex_1 >( vp, r.edge[ intersection ], new_edge ); else cross< vertex_0 >( vp, r.edge[ intersection ], new_edge ); } if( intersection + 1 < index ) intersection = r.remove_vertices( intersection + 1, index ); r.edge[ intersection + 1 ] = new_edge; return; } new_loc = scan(); switch( new_loc ) { case location::in: { intersection = index - 1; intersection_at_vertex = true; goto cursor_in; } case location::border: { r.set_segment( index - 1 ); return; } default: // location::out or location::null: { r.set_point( index - 1 ); return; } } } case location::out: goto cursor_out; case location::null: { assert( index == r.end ); if( intersection < 0 ) { r.set_empty(); return; } if( intersection_at_vertex ) { if( intersection == r.begin ) { r.set_point( r.begin ); return; } } else { __m256d* vp = r.vertex_pair( intersection ); if( intersection & 0x1 ) cross< vertex_1 >( vp, r.edge[ intersection ], new_edge ); else cross< vertex_0 >( vp, r.edge[ intersection ], new_edge ); } ++intersection; __m256d* vp = r.vertex_pair( intersection ); if( intersection & 0x1 ) cross< vertex_1 >( vp, new_edge, r.edge[ index ] ); else cross< vertex_0 >( vp, new_edge, r.edge[ index ] ); r.end = intersection + 1; r.edge[ intersection ] = new_edge; r.edge[ r.end ] = r.edge[ r.begin ]; } return; } } } }; } // namespace wm::nlp::lp2d //---------------------------------------------------------------------------------------- // // The function interval_system_2d solves the system of interval equations // // ai x + bi y = ci // // for 0 <= i < n_equations and // // ai = [ eq[ 6 * i ], eq[ 6 * i + 1 ] ] with eq[ 6 * i ] <= eq[ 6 * i + 1 ] // bi = [ eq[ 6 * i + 2 ], eq[ 6 * i + 3 ] ] with eq[ 6 * i + 2 ] <= eq[ 6 * i + 3 ] // ci = [ eq[ 6 * i + 4 ], eq[ 6 * i + 5 ] ] with eq[ 6 * i + 4 ] <= eq[ 6 * i + 5 ] // // The function finds the solutions inside the box // // box[ 0 ] <= x <= box[ 1 ] // box[ 2 ] <= y <= box[ 3 ] // // The solution consists of up to 4 boxes bx[ 0 ], bx[ 1 ], bx[ 2 ] and bx[ 3 ], with // // bx[ i ] = inf_x[ i ] <= x <= sup_x[ i ] // inf_y[ i ] <= y <= sup_y[ i ] // for // // inf_x[ i ] = solution[ 4 * i ] // sup_x[ i ] = solution[ 4 * i + 1 ] // inf_y[ i ] = solution[ 4 * i + 2 ] // sup_y[ i ] = solution[ 4 * i + 3 ] // // The function returns the number of boxes in the solutions, which can be 0,1,2,3 or 4, // or a negative number if the input is invalid or if it cannot change the rounding mode. // // It assumes that: // // a) n_equations <= 16 // b) all doubles are finite, ie., not nan or +/-inf // // c) box and eq are such that // // box[ 0 ] <= box[ 1 ] // box[ 2 ] <= box[ 3] // eq[ 2 * i ] <= eq[ 2 i + 1 ] for i = 0,..., 3 * n_equations. // // d) box and eq are aligned at a 32 byte boundary // // It does not verify the input and its results should not be trusted if the inputs are // invalid. // //---------------------------------------------------------------------------------------- extern "C" { int interval_system_2d( double* solution, double const* box, double const* eq, unsigned int n_equations ); }