diff --git a/.gitignore b/.gitignore index be531abc6..8e9bceba1 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,6 @@ src/xc_integrator/local_work_driver/host/obara_saika/generator/*.x # Build directories build/ +build*/ _build/ cmake-build-*/ diff --git a/cmake/gauxc-dep-versions.cmake b/cmake/gauxc-dep-versions.cmake index 62fbcb26f..1c8459bfc 100644 --- a/cmake/gauxc-dep-versions.cmake +++ b/cmake/gauxc-dep-versions.cmake @@ -1,5 +1,5 @@ set( GAUXC_LINALG_MODULES_REPOSITORY https://github.com/wavefunction91/linalg-cmake-modules.git ) -set( GAUXC_LINALG_MODULES_REVISION 9d2c273a671d6811e9fd432f6a4fa3d915b144b8 ) +set( GAUXC_LINALG_MODULES_REVISION 222364df5e7639f371bf2f37ceb0f476301101a1 ) set( GAUXC_CUB_REPOSITORY https://github.com/NVIDIA/cub.git ) set( GAUXC_CUB_REVISION 1.10.0 ) @@ -7,14 +7,14 @@ set( GAUXC_CUB_REVISION 1.10.0 ) set( GAUXC_CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git ) set( GAUXC_CUTLASS_REVISION v2.10.0 ) -set( GAUXC_EXCHCXX_REPOSITORY https://github.com/wavefunction91/ExchCXX.git ) -set( GAUXC_EXCHCXX_REVISION v1.0.0 ) +set( GAUXC_EXCHCXX_REPOSITORY https://github.com/lorisercole/ExchCXX.git ) +set( GAUXC_EXCHCXX_REVISION fab59dc41ef881e0a2b776a92b47c03101340071 ) set( GAUXC_GAU2GRID_REPOSITORY https://github.com/dgasmith/gau2grid.git ) set( GAUXC_GAU2GRID_REVISION v2.0.6 ) -set( GAUXC_INTEGRATORXX_REPOSITORY https://github.com/wavefunction91/IntegratorXX.git ) -set( GAUXC_INTEGRATORXX_REVISION 1369be58d7a3235dac36d75dd964fef058830622 ) +set( GAUXC_INTEGRATORXX_REPOSITORY https://github.com/lorisercole/IntegratorXX.git ) +set( GAUXC_INTEGRATORXX_REVISION 923125236ea5971ee9accdea39da552b8e322ff6 ) set( GAUXC_HIGHFIVE_REPOSITORY https://github.com/highfive-devs/HighFive.git ) set( GAUXC_HIGHFIVE_REVISION 805f0e13d09b47c4b01d40682621904aa3b31bb8 ) diff --git a/external/gau2grid/generated_source/gau2grid/gau2grid.h b/external/gau2grid/generated_source/gau2grid/gau2grid.h index 29f888852..0e097a526 100644 --- a/external/gau2grid/generated_source/gau2grid/gau2grid.h +++ b/external/gau2grid/generated_source/gau2grid/gau2grid.h @@ -79,4 +79,4 @@ void gg_collocation_deriv3(int L, const unsigned long npoints, const double* PRA #ifdef __cplusplus } #endif -#endif /* GAU2GRID_GUARD_H */ \ No newline at end of file +#endif /* GAU2GRID_GUARD_H */ diff --git a/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h b/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h index f6033886a..d85679263 100644 --- a/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h +++ b/external/gau2grid/generated_source/gau2grid/gau2grid_pragma.h @@ -96,4 +96,4 @@ #define PRAGMA_RESTRICT __restrict__ -#endif \ No newline at end of file +#endif diff --git a/external/gau2grid/generated_source/gau2grid_helper.c b/external/gau2grid/generated_source/gau2grid_helper.c index e5868df7e..31956e084 100644 --- a/external/gau2grid/generated_source/gau2grid_helper.c +++ b/external/gau2grid/generated_source/gau2grid_helper.c @@ -8,6 +8,7 @@ #include #if defined(__clang__) && defined(_MSC_VER) #include +#include #elif defined __clang__ #include #elif defined _MSC_VER diff --git a/include/gauxc/basisset.hpp b/include/gauxc/basisset.hpp index c0c0f8396..3df7541fd 100644 --- a/include/gauxc/basisset.hpp +++ b/include/gauxc/basisset.hpp @@ -68,7 +68,7 @@ struct BasisSet : public std::vector> { * * @returns the number of GTO shells which comprise the BasisSet object */ - inline int32_t nshells() const { return this->size(); }; + inline int32_t nshells() const { return static_cast(this->size()); }; /** * @brief Return the number of GTO basis functions which comprise the diff --git a/include/gauxc/basisset_map.hpp b/include/gauxc/basisset_map.hpp index 53f6d9d8f..c3bcfed36 100644 --- a/include/gauxc/basisset_map.hpp +++ b/include/gauxc/basisset_map.hpp @@ -61,8 +61,8 @@ class BasisSetMap { size_t st_idx = 0; for( const auto& shell : basis ) { size_t range_end = st_idx + shell.size(); - shell_to_first_ao_.emplace_back( st_idx ); - shell_to_ao_range_.push_back({ st_idx, range_end }); + shell_to_first_ao_.emplace_back( static_cast(st_idx) ); + shell_to_ao_range_.push_back({ static_cast(st_idx), static_cast(range_end) }); st_idx = range_end; } @@ -72,7 +72,7 @@ class BasisSetMap { auto at_pos = std::find_if( mol.begin(), mol.end(), [&](const Atom& at) { return at.x == shell.O()[0] and at.y == shell.O()[1] and at.z == shell.O()[2]; }); - if( at_pos != mol.end() ) shell_to_center_[sh_idx] = std::distance( mol.begin(), at_pos ); + if( at_pos != mol.end() ) shell_to_center_[sh_idx] = static_cast(std::distance( mol.begin(), at_pos )); else shell_to_center_[sh_idx] = -1; ++sh_idx; } @@ -170,13 +170,13 @@ class BasisSetMap { /// Count the number of shells with angular momentum `l` inline size_t nshells_with_l(uint32_t l) const { - return std::count( shell_ls_.begin(), shell_ls_.end(), l ); + return std::count( shell_ls_.begin(), shell_ls_.end(), static_cast(l) ); } /// Check whether shells of angular momentum `l` are spherical (pure) inline bool l_purity(uint32_t l) const { // Find first shell with L - auto first_shell_w_l = std::find( shell_ls_.begin(), shell_ls_.end(), l ); + auto first_shell_w_l = std::find( shell_ls_.begin(), shell_ls_.end(), static_cast(l) ); return shell_pure( std::distance( shell_ls_.begin(), first_shell_w_l ) ); } diff --git a/include/gauxc/exceptions.hpp b/include/gauxc/exceptions.hpp index 84b9b4893..efd60fbeb 100644 --- a/include/gauxc/exceptions.hpp +++ b/include/gauxc/exceptions.hpp @@ -14,8 +14,6 @@ #include #include #include -#include - namespace GauXC { // FWD decl all exception types for optional handling @@ -45,16 +43,10 @@ class generic_gauxc_exception : public std::exception { std::string function_; int line_; std::string msg_prefix_; + std::string what_msg_; const char* what() const noexcept override { - std::stringstream ss; - ss << "Generic GauXC Exception (" << msg_prefix_ << ")" << std::endl - << " File " << file_ << std::endl - << " Function " << function_ << std::endl - << " Line " << line_ << std::endl; - auto msg = ss.str(); - - return strdup( msg.c_str() ); + return what_msg_.c_str(); }; public: @@ -67,17 +59,29 @@ class generic_gauxc_exception : public std::exception { * @param[in] line Line number of file that threw exception * @param[in] msg General descriptor of task which threw exception */ - generic_gauxc_exception( std::string file, std::string function, int line, + generic_gauxc_exception( std::string file, std::string function, int line, std::string msg ) : - file_(file), function_(function), line_(line), msg_prefix_(msg) {} + file_(file), function_(function), line_(line), msg_prefix_(msg) { + std::stringstream ss; + ss << "Generic GauXC Exception (" << msg_prefix_ << ")" << std::endl + << " File " << file_ << std::endl + << " Function " << function_ << std::endl + << " Line " << line_ << std::endl; + what_msg_ = ss.str(); + } }; } +#if defined(_MSC_VER) && !defined(__clang__) +#define GAUXC_GENERIC_EXCEPTION( MSG ) \ + throw generic_gauxc_exception( __FILE__, __FUNCSIG__, __LINE__, MSG ) +#else #define GAUXC_GENERIC_EXCEPTION( MSG ) \ throw generic_gauxc_exception( __FILE__, __PRETTY_FUNCTION__, __LINE__, MSG ) +#endif #define GAUXC_PIMPL_NOT_INITIALIZED() \ GAUXC_GENERIC_EXCEPTION("PIMPL NOT INITIALIZED") diff --git a/include/gauxc/molecule.hpp b/include/gauxc/molecule.hpp index 9f4fe6a74..2b6743620 100644 --- a/include/gauxc/molecule.hpp +++ b/include/gauxc/molecule.hpp @@ -47,7 +47,7 @@ class Molecule : public std::vector { })->Z; } - bool operator==(const Molecule& other) { + bool operator==(const Molecule& other) const { if(other.size() != this->size()) return false; for( auto i = 0ul; i < this->size(); ++i ) if( other[i] != operator[](i) ) return false; diff --git a/include/gauxc/reduction_driver.hpp b/include/gauxc/reduction_driver.hpp index f3bef1886..9cb1dffc7 100644 --- a/include/gauxc/reduction_driver.hpp +++ b/include/gauxc/reduction_driver.hpp @@ -11,6 +11,7 @@ */ #pragma once #include +#include #include #include #include diff --git a/include/gauxc/util/misc.hpp b/include/gauxc/util/misc.hpp index cf2ef8f04..998482efa 100644 --- a/include/gauxc/util/misc.hpp +++ b/include/gauxc/util/misc.hpp @@ -114,7 +114,7 @@ inline auto integral_list_intersect( const std::vector& A, const std::vector& B, const uint32_t overlap_threshold_spec ) { - const uint32_t max_intersect_sz = std::min(A.size(), B.size()); + const uint32_t max_intersect_sz = static_cast(std::min(A.size(), B.size())); const uint32_t overlap_threshold = std::min( max_intersect_sz, overlap_threshold_spec ); diff --git a/include/gauxc/util/real_solid_harmonics.hpp b/include/gauxc/util/real_solid_harmonics.hpp index 3394da020..6c5aa2a0b 100644 --- a/include/gauxc/util/real_solid_harmonics.hpp +++ b/include/gauxc/util/real_solid_harmonics.hpp @@ -66,10 +66,10 @@ inline constexpr double real_solid_harmonic_coeff( int l, int m, int lx, int ly, auto i = abs_m - lx; if( comp != parity( std::abs(i) ) ) return 0.; - double pfac = integral_falling_factorial( 2*lx, lx+1 ) * - integral_falling_factorial( 2*ly, ly+1 ) * - integral_falling_factorial( 2*lz, lz+1 ); - const double factorial_l = integral_factorial(l); + double pfac = static_cast(integral_falling_factorial( 2*lx, lx+1 )) * + static_cast(integral_falling_factorial( 2*ly, ly+1 )) * + static_cast(integral_falling_factorial( 2*lz, lz+1 )); + const double factorial_l = static_cast(integral_factorial(l)); pfac = pfac / ( factorial_l * factorial_l * integral_falling_factorial(2*l,l+1) * integral_falling_factorial(l+abs_m,l-abs_m+1) ); pfac = std::sqrt(pfac); @@ -84,7 +84,7 @@ inline constexpr double real_solid_harmonic_coeff( int l, int m, int lx, int ly, auto i_max = (l-abs_m)/2; double sum = 0; for(i=i_min;i<=i_max;i++) { - double pfac1 = parity(i) * binomial_coefficient(l,i) * binomial_coefficient(i,j); + double pfac1 = static_cast(parity(i) * binomial_coefficient(l,i) * binomial_coefficient(i,j)); pfac1 *= integral_factorial(2*(l-i)); pfac1 /= integral_factorial(l-abs_m-2*i); double sum1 = 0.0; @@ -99,7 +99,7 @@ inline constexpr double real_solid_harmonic_coeff( int l, int m, int lx, int ly, sum += pfac1*sum1; } - double pfac2 = integral_double_factorial( 2*l - 1 ); + double pfac2 = static_cast(integral_double_factorial( 2*l - 1 )); pfac2 = pfac2 / integral_double_factorial( 2*lx - 1 ); pfac2 = pfac2 / integral_double_factorial( 2*ly - 1 ); pfac2 = pfac2 / integral_double_factorial( 2*lz - 1 ); diff --git a/include/gauxc/xc_integrator/integrator_factory.hpp b/include/gauxc/xc_integrator/integrator_factory.hpp index 54a1c4a3a..1fc75f888 100644 --- a/include/gauxc/xc_integrator/integrator_factory.hpp +++ b/include/gauxc/xc_integrator/integrator_factory.hpp @@ -68,8 +68,8 @@ class XCIntegratorFactory { lb->runtime(), rd_kernel_ ); // Create Integrator instance - std::transform( input_type_.begin(), input_type_.end(), input_type_.begin(), - ::toupper ); + std::transform( input_type_.begin(), input_type_.end(), input_type_.begin(), + [](unsigned char c){ return static_cast(std::toupper(c)); } ); if( input_type_ == "REPLICATED" ) return std::make_shared( diff --git a/include/gauxc/xc_task.hpp b/include/gauxc/xc_task.hpp index 630d6dd6b..8a27a195b 100644 --- a/include/gauxc/xc_task.hpp +++ b/include/gauxc/xc_task.hpp @@ -66,14 +66,14 @@ struct XCTask { GAUXC_GENERIC_EXCEPTION("Cannot Perform Requested Merge: Incompatible Tasks"); points.insert( points.end(), other.points.begin(), other.points.end() ); weights.insert( weights.end(), other.weights.begin(), other.weights.end() ); - npts = points.size(); + npts = static_cast(points.size()); } template void merge_with( TaskIt begin, TaskIt end ) { size_t old_sz = points.size(); - size_t pts_add = std::accumulate( begin, end, 0ul, + size_t pts_add = std::accumulate( begin, end, size_t{0}, []( const auto &a, const auto &t ) { return a + t.points.size(); }); @@ -91,10 +91,9 @@ struct XCTask { weights_it = std::copy( it->weights.begin(), it->weights.end(), weights_it ); } - npts = points.size(); + npts = static_cast(points.size()); } - inline bool equiv_with( const XCTask& other ) const { return iParent == other.iParent and bfn_screening.equiv_with(other.bfn_screening); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1aed4b428..8b3c919c7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -52,7 +52,9 @@ target_include_directories( gauxc ) include( CheckCXXCompilerFlag ) -check_cxx_compiler_flag( -Wall GAUXC_CXX_HAS_WALL ) +if(NOT MSVC) # under clang-cl, -Wall maps to /Wall (= -Weverything) + check_cxx_compiler_flag( -Wall GAUXC_CXX_HAS_WALL ) +endif() check_cxx_compiler_flag( -Wextra GAUXC_CXX_HAS_WEXTRA ) check_cxx_compiler_flag( -Wpedantic GAUXC_CXX_HAS_WPEDANTIC ) check_cxx_compiler_flag( -Wnon-virtual-dtor GAUXC_CXX_HAS_WNON_VIRTUAL_DTOR ) @@ -78,6 +80,16 @@ if( GAUXC_CXX_HAS_WSHADOW ) target_compile_options( gauxc PRIVATE $<$: -Wshadow> ) endif() +if(MSVC) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options( gauxc PRIVATE + -Wno-unused-variable + ) + else() + target_compile_options( gauxc PUBLIC /EHsc ) + endif() +endif() + target_link_libraries( gauxc PUBLIC ExchCXX::ExchCXX IntegratorXX::IntegratorXX diff --git a/src/atomic_radii.cxx b/src/atomic_radii.cxx index 522753092..d5a6eb921 100644 --- a/src/atomic_radii.cxx +++ b/src/atomic_radii.cxx @@ -28,7 +28,7 @@ double default_atomic_radius(AtomicNumber Z) { } -long double pm_to_bohr( long double x ) { +double pm_to_bohr( double x ) { return x * 0.0188973000000929 / 1.00000205057; } @@ -344,7 +344,7 @@ double uff_radius_103(AtomicNumber _Z) { const double RADIUS_UFF_SCALING = 1.1; const double DDX_BOHR_TO_ANGSTROM = 0.52917721092; auto Z = _Z.get(); - if (Z < 0 || Z >= radius_uff_list.size()) { + if (Z < 0 || static_cast(Z) >= radius_uff_list.size()) { return -1.; } return radius_uff_list[Z-1] * RADIUS_UFF_SCALING / DDX_BOHR_TO_ANGSTROM; diff --git a/src/exceptions/cublas_exception.hpp b/src/exceptions/cublas_exception.hpp index 503fc9007..3d294a0cd 100644 --- a/src/exceptions/cublas_exception.hpp +++ b/src/exceptions/cublas_exception.hpp @@ -78,6 +78,7 @@ class cublas_exception : public std::exception { int line_; ///< Line number of file_ that threw exception std::string msg_prefix_; ///< General descriptor of task which threw exception cublasStatus_t err_code_; ///< cuBLAS error code pertaining to the thrown exception + std::string what_msg_; /** * @brief Get a descriptive message pertaining to the thrown cuBLAS error @@ -86,17 +87,7 @@ class cublas_exception : public std::exception { * the internal state of the exception object. */ const char* what() const noexcept override { - std::stringstream ss; - ss << "CUBLAS Exception (" << msg_prefix_ << ")" << std::endl - << " Error Code " << int(err_code_) << ": \"" - << detail::cublasGetErrorString( err_code_ ) - << "\"" << std::endl - << " File " << file_ << std::endl - << " Line " << line_ << std::endl; - - auto msg = ss.str(); - - return strdup( msg.c_str() ); + return what_msg_.c_str(); } public: @@ -109,9 +100,18 @@ class cublas_exception : public std::exception { * @param[in] msg General descriptor of task which threw exception * @param[in] err cuBLAS error code pertaining to the thrown exception */ - cublas_exception( std::string file, int line, std::string msg, + cublas_exception( std::string file, int line, std::string msg, cublasStatus_t err ) : - file_(file), line_(line), msg_prefix_(msg), err_code_(err) { } + file_(file), line_(line), msg_prefix_(msg), err_code_(err) { + std::stringstream ss; + ss << "CUBLAS Exception (" << msg_prefix_ << ")" << std::endl + << " Error Code " << int(err_code_) << ": \"" + << detail::cublasGetErrorString( err_code_ ) + << "\"" << std::endl + << " File " << file_ << std::endl + << " Line " << line_ << std::endl; + what_msg_ = ss.str(); + } }; // class cublas_exception diff --git a/src/exceptions/cuda_exception.hpp b/src/exceptions/cuda_exception.hpp index 6d4767d1a..9a9b9103a 100644 --- a/src/exceptions/cuda_exception.hpp +++ b/src/exceptions/cuda_exception.hpp @@ -31,6 +31,7 @@ class cuda_exception : public std::exception { int line_; ///< Line number of file_ that threw exception std::string msg_prefix_; ///< General descriptor of task which threw exception cudaError_t err_code_; ///< CUDA error code pertaining to the thrown exception + std::string what_msg_; /** * @brief Get a descriptive message pertaining to the thrown CUDA error @@ -39,16 +40,7 @@ class cuda_exception : public std::exception { * the internal state of the exception object. */ const char* what() const noexcept override { - std::stringstream ss; - ss << "CUDA Exception (" << msg_prefix_ << ")" << std::endl - << " Error Code " << int(err_code_) << ": \"" - << cudaGetErrorString( err_code_ ) << "\"" << std::endl - << " File " << file_ << std::endl - << " Line " << line_ << std::endl; - - auto msg = ss.str(); - - return strdup( msg.c_str() ); + return what_msg_.c_str(); } public: @@ -62,7 +54,15 @@ class cuda_exception : public std::exception { * @param[in] err CUDA error code pertaining to the thrown exception */ cuda_exception( std::string file, int line, std::string msg, cudaError_t err ) : - file_(file), line_(line), msg_prefix_(msg), err_code_(err) { } + file_(file), line_(line), msg_prefix_(msg), err_code_(err) { + std::stringstream ss; + ss << "CUDA Exception (" << msg_prefix_ << ")" << std::endl + << " Error Code " << int(err_code_) << ": \"" + << cudaGetErrorString( err_code_ ) << "\"" << std::endl + << " File " << file_ << std::endl + << " Line " << line_ << std::endl; + what_msg_ = ss.str(); + } }; // class cuda_exception diff --git a/src/exceptions/cutlass_exception.hpp b/src/exceptions/cutlass_exception.hpp index 4de854bef..2a76983e7 100644 --- a/src/exceptions/cutlass_exception.hpp +++ b/src/exceptions/cutlass_exception.hpp @@ -31,6 +31,7 @@ class cutlass_exception : public std::exception { int line_; ///< Line number of file_ that threw exception std::string msg_prefix_; ///< General descriptor of task which threw exception cutlass::Status status_; ///< CUTLASS status pertaining to the thrown exception + std::string what_msg_; /** * @brief Get a descriptive message pertaining to the thrown CUTLASS error @@ -39,16 +40,7 @@ class cutlass_exception : public std::exception { * the internal state of the exception object. */ const char* what() const noexcept override { - std::stringstream ss; - ss << "CUTLASS Exception (" << msg_prefix_ << ")" << std::endl - << " Error Code " << int(status_) << ": \"" - << cutlassGetStatusString( status_ ) << "\"" << std::endl - << " File " << file_ << std::endl - << " Line " << line_ << std::endl; - - auto msg = ss.str(); - - return strdup( msg.c_str() ); + return what_msg_.c_str(); } public: @@ -62,7 +54,15 @@ class cutlass_exception : public std::exception { * @param[in] err CUTLASS status pertaining to the thrown exception */ cutlass_exception( std::string file, int line, std::string msg, cutlass::Status status ) : - file_(file), line_(line), msg_prefix_(msg), status_(status) { } + file_(file), line_(line), msg_prefix_(msg), status_(status) { + std::stringstream ss; + ss << "CUTLASS Exception (" << msg_prefix_ << ")" << std::endl + << " Error Code " << int(status_) << ": \"" + << cutlassGetStatusString( status_ ) << "\"" << std::endl + << " File " << file_ << std::endl + << " Line " << line_ << std::endl; + what_msg_ = ss.str(); + } }; // class cutlass_exception diff --git a/src/exceptions/hip_exception.hpp b/src/exceptions/hip_exception.hpp index 08a403022..770bb772f 100644 --- a/src/exceptions/hip_exception.hpp +++ b/src/exceptions/hip_exception.hpp @@ -31,6 +31,7 @@ class hip_exception : public std::exception { int line_; ///< Line number of file_ that threw exception std::string msg_prefix_; ///< General descriptor of task which threw exception hipError_t err_code_; ///< HIP error code pertaining to the thrown exception + std::string what_msg_; /** * @brief Get a descriptive message pertaining to the thrown HIP error @@ -39,16 +40,7 @@ class hip_exception : public std::exception { * the internal state of the exception object. */ const char* what() const noexcept override { - std::stringstream ss; - ss << "HIP Exception (" << msg_prefix_ << ")" << std::endl - << " Error Code " << int(err_code_) << ": \"" - << hipGetErrorString( err_code_ ) << "\"" << std::endl - << " File " << file_ << std::endl - << " Line " << line_ << std::endl; - - auto msg = ss.str(); - - return strdup( msg.c_str() ); + return what_msg_.c_str(); } public: @@ -62,7 +54,15 @@ class hip_exception : public std::exception { * @param[in] err HIP error code pertaining to the thrown exception */ hip_exception( std::string file, int line, std::string msg, hipError_t err ) : - file_(file), line_(line), msg_prefix_(msg), err_code_(err) { } + file_(file), line_(line), msg_prefix_(msg), err_code_(err) { + std::stringstream ss; + ss << "HIP Exception (" << msg_prefix_ << ")" << std::endl + << " Error Code " << int(err_code_) << ": \"" + << hipGetErrorString( err_code_ ) << "\"" << std::endl + << " File " << file_ << std::endl + << " Line " << line_ << std::endl; + what_msg_ = ss.str(); + } }; // class hip_exception diff --git a/src/exceptions/hipblas_exception.hpp b/src/exceptions/hipblas_exception.hpp index bb89a3316..388954762 100644 --- a/src/exceptions/hipblas_exception.hpp +++ b/src/exceptions/hipblas_exception.hpp @@ -85,6 +85,7 @@ class hipblas_exception : public std::exception { int line_; ///< Line number of file_ that threw exception std::string msg_prefix_; ///< General descriptor of task which threw exception hipblasStatus_t err_code_; ///< hipBLAS error code pertaining to the thrown exception + std::string what_msg_; /** * @brief Get a descriptive message pertaining to the thrown hipBLAS error @@ -93,17 +94,7 @@ class hipblas_exception : public std::exception { * the internal state of the exception object. */ const char* what() const noexcept override { - std::stringstream ss; - ss << "HIPBLAS Exception (" << msg_prefix_ << ")" << std::endl - << " Error Code " << int(err_code_) << ": \"" - << detail::hipblasGetErrorString( err_code_ ) - << "\"" << std::endl - << " File " << file_ << std::endl - << " Line " << line_ << std::endl; - - auto msg = ss.str(); - - return strdup( msg.c_str() ); + return what_msg_.c_str(); } public: @@ -116,9 +107,18 @@ class hipblas_exception : public std::exception { * @param[in] msg General descriptor of task which threw exception * @param[in] err hipBLAS error code pertaining to the thrown exception */ - hipblas_exception( std::string file, int line, std::string msg, + hipblas_exception( std::string file, int line, std::string msg, hipblasStatus_t err ) : - file_(file), line_(line), msg_prefix_(msg), err_code_(err) { } + file_(file), line_(line), msg_prefix_(msg), err_code_(err) { + std::stringstream ss; + ss << "HIPBLAS Exception (" << msg_prefix_ << ")" << std::endl + << " Error Code " << int(err_code_) << ": \"" + << detail::hipblasGetErrorString( err_code_ ) + << "\"" << std::endl + << " File " << file_ << std::endl + << " Line " << line_ << std::endl; + what_msg_ = ss.str(); + } }; // class hipblas_exception diff --git a/src/exceptions/magma_exception.hpp b/src/exceptions/magma_exception.hpp index 300565735..f1e811a03 100644 --- a/src/exceptions/magma_exception.hpp +++ b/src/exceptions/magma_exception.hpp @@ -29,6 +29,7 @@ class magma_exception : public std::exception { int line_; ///< Line number of file_ that threw exception std::string msg_prefix_; ///< General descriptor of task which threw exception magma_int_t err_code_; ///< MAGMA error code pertaining to the thrown exception + std::string what_msg_; /** * @brief Get a descriptive message pertaining to the thrown MAGMA error @@ -37,16 +38,7 @@ class magma_exception : public std::exception { * the internal state of the exception object. */ const char* what() const noexcept override { - std::stringstream ss; - ss << "MAGMA Exception (" << msg_prefix_ << ")" << std::endl - << " Error Code " << int(err_code_) << ": \"" - << magma_strerror( err_code_ ) << "\"" << std::endl - << " File " << file_ << std::endl - << " Line " << line_ << std::endl; - - auto msg = ss.str(); - - return strdup( msg.c_str() ); + return what_msg_.c_str(); } public: @@ -60,7 +52,15 @@ class magma_exception : public std::exception { * @param[in] err MAGMA error code pertaining to the thrown exception */ magma_exception( std::string file, int line, std::string msg, magma_int_t err ) : - file_(file), line_(line), msg_prefix_(msg), err_code_(err) { } + file_(file), line_(line), msg_prefix_(msg), err_code_(err) { + std::stringstream ss; + ss << "MAGMA Exception (" << msg_prefix_ << ")" << std::endl + << " Error Code " << int(err_code_) << ": \"" + << magma_strerror( err_code_ ) << "\"" << std::endl + << " File " << file_ << std::endl + << " Line " << line_ << std::endl; + what_msg_ = ss.str(); + } }; // class magma_exception diff --git a/src/external/CMakeLists.txt b/src/external/CMakeLists.txt index 46612c81b..c9c9d7077 100644 --- a/src/external/CMakeLists.txt +++ b/src/external/CMakeLists.txt @@ -21,19 +21,42 @@ if( GAUXC_ENABLE_HDF5 ) message(STATUS "HighFive REV = ${GAUXC_HIGHFIVE_REVISION} ") FetchContent_Declare( HighFive GIT_REPOSITORY ${GAUXC_HIGHFIVE_REPOSITORY} - GIT_TAG ${GAUXC_HIGHFIVE_REVISION} + GIT_TAG ${GAUXC_HIGHFIVE_REVISION} ) - + set(HIGHFIVE_USE_BOOST OFF CACHE BOOL "" ) set(HIGHFIVE_UNIT_TESTS OFF CACHE BOOL "" ) set(HIGHFIVE_EXAMPLES OFF CACHE BOOL "" ) #set(HIGHFIVE_PARALLEL_HDF5 ON CACHE BOOL "" ) set(HIGHFIVE_BUILD_DOCS OFF CACHE BOOL "" ) FetchContent_MakeAvailable( HighFive ) - + + # HighFive propagates HDF5_DEFINITIONS via its libdeps target. + # CMake's FindHDF5 module sets H5_BUILT_AS_DYNAMIC_LIB on Windows + # when HDF5_USE_STATIC_LIBRARIES is not set, even for static libs. + # Correct this when linking statically. + if(WIN32 AND HDF5_PROVIDES_STATIC_LIBS AND TARGET libdeps) + get_target_property(_libdeps_defs libdeps INTERFACE_COMPILE_DEFINITIONS) + if(_libdeps_defs) + list(REMOVE_ITEM _libdeps_defs "H5_BUILT_AS_DYNAMIC_LIB") + list(APPEND _libdeps_defs "H5_BUILT_AS_STATIC_LIB") + set_target_properties(libdeps PROPERTIES INTERFACE_COMPILE_DEFINITIONS "${_libdeps_defs}") + endif() + endif() + endif() target_sources( gauxc PRIVATE hdf5_write.cxx hdf5_read.cxx ) target_link_libraries( gauxc PUBLIC HighFive ) + + # When linking HDF5 statically on Windows, HDF5's transitive + # dependencies (zlib, szip/aec, shlwapi) must be linked explicitly. + if(WIN32 AND HDF5_PROVIDES_STATIC_LIBS) + find_library(ZLIB_LIBRARY NAMES zlib z) + find_library(SZIP_LIBRARY NAMES szip-static szip sz) + find_library(AEC_LIBRARY NAMES aec-static aec) + target_link_libraries( gauxc PUBLIC + ${ZLIB_LIBRARY} ${SZIP_LIBRARY} ${AEC_LIBRARY} shlwapi ) + endif() else() message(WARNING "GAUXC_ENABLE_HDF5 was enabled, but HDF5 was not found, Disabling HDF5 Bindings") endif() diff --git a/src/load_balancer/host/load_balancer_host_factory.cxx b/src/load_balancer/host/load_balancer_host_factory.cxx index f69d7fd9b..e5243c41a 100644 --- a/src/load_balancer/host/load_balancer_host_factory.cxx +++ b/src/load_balancer/host/load_balancer_host_factory.cxx @@ -21,8 +21,8 @@ std::shared_ptr LoadBalancerHostFactory::get_shared_instance( const Molecule& mol, const MolGrid& mg, const BasisSet& basis ) { - std::transform(kernel_name.begin(), kernel_name.end(), - kernel_name.begin(), ::toupper ); + std::transform(kernel_name.begin(), kernel_name.end(), + kernel_name.begin(), [](unsigned char c){ return static_cast(std::toupper(c)); } ); if( kernel_name == "DEFAULT" or kernel_name == "REPLICATED" ) diff --git a/src/load_balancer/host/replicated_host_load_balancer.cxx b/src/load_balancer/host/replicated_host_load_balancer.cxx index 8f05f186b..16cc7ea8e 100644 --- a/src/load_balancer/host/replicated_host_load_balancer.cxx +++ b/src/load_balancer/host/replicated_host_load_balancer.cxx @@ -69,11 +69,11 @@ std::vector< XCTask > HostReplicatedLoadBalancer::create_local_tasks_() const { XCTask task; task.iParent = iCurrent; // This enables lazy assignment of points vector (see CUDA impl) - task.npts = points.size(); + task.npts = static_cast(points.size()); task.points = std::move( points ); task.weights = std::move( weights ); task.bfn_screening.shell_list = std::move(shell_list); - task.bfn_screening.nbe = nbe; + task.bfn_screening.nbe = static_cast(nbe); task.dist_nearest = molmeta_->dist_nearest()[iCurrent]; #pragma omp critical diff --git a/src/load_balancer/load_balancer_impl.cxx b/src/load_balancer/load_balancer_impl.cxx index f6b853daa..a4465d7ed 100644 --- a/src/load_balancer/load_balancer_impl.cxx +++ b/src/load_balancer/load_balancer_impl.cxx @@ -65,7 +65,7 @@ const util::Timer& LoadBalancerImpl::get_timings() const { size_t LoadBalancerImpl::total_npts() const { - return std::accumulate( local_tasks_.cbegin(), local_tasks_.cend(), 0ul, + return std::accumulate( local_tasks_.cbegin(), local_tasks_.cend(), size_t{0}, []( const auto& a, const auto& b ) { return a + b.points.size(); }); diff --git a/src/molmeta.cxx b/src/molmeta.cxx index 3bad9987d..62fae37ea 100644 --- a/src/molmeta.cxx +++ b/src/molmeta.cxx @@ -16,7 +16,7 @@ namespace GauXC { MolMeta::MolMeta( const Molecule& mol ) : natoms_(mol.natoms()){ compute_rab(mol); compute_dist_nearest(); - sum_atomic_charges_ = std::accumulate( mol.begin(), mol.end(), 0ul, + sum_atomic_charges_ = std::accumulate( mol.begin(), mol.end(), size_t{0}, [](auto a, const auto& b){ return a + b.Z.get(); }); } diff --git a/src/reduction_driver/host/basic_mpi_reduction_driver.cxx b/src/reduction_driver/host/basic_mpi_reduction_driver.cxx index 904f7caf0..dfa10c263 100644 --- a/src/reduction_driver/host/basic_mpi_reduction_driver.cxx +++ b/src/reduction_driver/host/basic_mpi_reduction_driver.cxx @@ -60,8 +60,8 @@ BasicMPIReductionDriver::~BasicMPIReductionDriver() noexcept = default; BasicMPIReductionDriver::BasicMPIReductionDriver(const BasicMPIReductionDriver&) = default; -void BasicMPIReductionDriver::allreduce_typeerased( const void* src, void* dest, - size_t size, ReductionOp op, std::type_index idx, std::any optional_args ) { +void BasicMPIReductionDriver::allreduce_typeerased( const void* src, void* dest, + size_t size, [[maybe_unused]] ReductionOp op, std::type_index idx, std::any optional_args ) { if( optional_args.has_value() ) std::cout << "** Warning: Optional Args Are Not Used in BasiMPIReductionDriver::allreduce" << std::endl; @@ -78,8 +78,8 @@ void BasicMPIReductionDriver::allreduce_typeerased( const void* src, void* dest, } -void BasicMPIReductionDriver::allreduce_inplace_typeerased( void* data, size_t size, - ReductionOp op, std::type_index idx, std::any optional_args ) { +void BasicMPIReductionDriver::allreduce_inplace_typeerased( [[maybe_unused]] void* data, [[maybe_unused]] size_t size, + [[maybe_unused]] ReductionOp op, [[maybe_unused]] std::type_index idx, std::any optional_args ) { if( optional_args.has_value() ) std::cout << "** Warning: Optional Args Are Not Used in BasiMPIReductionDriver::allreduce" << std::endl; diff --git a/src/reduction_driver/reduction_driver_factory.cxx b/src/reduction_driver/reduction_driver_factory.cxx index 8b3d5f348..39fe2f10b 100644 --- a/src/reduction_driver/reduction_driver_factory.cxx +++ b/src/reduction_driver/reduction_driver_factory.cxx @@ -26,8 +26,8 @@ namespace GauXC { std::shared_ptr ReductionDriverFactory::get_shared_instance( const RuntimeEnvironment& rt, std::string kernel_name ) { - std::transform(kernel_name.begin(), kernel_name.end(), - kernel_name.begin(), ::toupper ); + std::transform(kernel_name.begin(), kernel_name.end(), + kernel_name.begin(), [](unsigned char c){ return static_cast(std::toupper(c)); } ); std::unique_ptr ptr = nullptr; diff --git a/src/xc_integrator/integrator_util/exx_screening.cxx b/src/xc_integrator/integrator_util/exx_screening.cxx index 5c7efcd13..80dcaaab0 100644 --- a/src/xc_integrator/integrator_util/exx_screening.cxx +++ b/src/xc_integrator/integrator_util/exx_screening.cxx @@ -13,6 +13,7 @@ #include "host/blas.hpp" #include #include +#include //#include //#include #ifdef GAUXC_HAS_CUDA @@ -127,8 +128,8 @@ void exx_ek_screening( // Compute approx F_i^(k) = |P_ij| * B_j^(k) //auto gemm_st = hrt_t::now(); std::vector task_approx_f( nbf * ntasks ); - blas::gemm( 'N', 'N', nbf, ntasks, nbf, 1., P_abs, ldp, - task_max_bfn.data(), nbf, 0., task_approx_f.data(), nbf ); + blas::gemm( 'N', 'N', static_cast(nbf), static_cast(ntasks), static_cast(nbf), 1., P_abs, static_cast(ldp), + task_max_bfn.data(), static_cast(nbf), 0., task_approx_f.data(), static_cast(nbf) ); //auto gemm_en = hrt_t::now(); //std::cout << "... done " << dur_t(gemm_en-gemm_st).count() << std::endl; @@ -188,14 +189,14 @@ void exx_ek_screening( task_ek_shells[i_block] |= (1u << i_local); task_ek_shells[j_block] |= (1u << j_local); - task_it->cou_screening.shell_pair_list.emplace_back(i,j); - task_it->cou_screening.shell_pair_idx_list.emplace_back(_j); + task_it->cou_screening.shell_pair_list.emplace_back(static_cast(i), static_cast(j)); + task_it->cou_screening.shell_pair_idx_list.emplace_back(static_cast(_j)); } } } uint32_t total_shells = 0; - for( auto x : task_ek_shells ) total_shells += __builtin_popcount(x); + for( auto x : task_ek_shells ) total_shells += std::popcount(x); std::vector ek_shells; ek_shells.reserve(total_shells); for( auto i_block = 0u; i_block < util::div_ceil(nshells,32); ++i_block ) { diff --git a/src/xc_integrator/integrator_util/spherical_harmonics.cxx b/src/xc_integrator/integrator_util/spherical_harmonics.cxx index bbc838e10..d84adc7fa 100644 --- a/src/xc_integrator/integrator_util/spherical_harmonics.cxx +++ b/src/xc_integrator/integrator_util/spherical_harmonics.cxx @@ -168,4 +168,4 @@ void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts const std::array x = {points[3 * i], points[3 * i + 1], points[3 * i + 2]}; scaled_ylm_new(lmax, x, center, radius, nlm, ylm_matrix + i * nharmonics); } -} \ No newline at end of file +} diff --git a/src/xc_integrator/integrator_util/spherical_harmonics.hpp b/src/xc_integrator/integrator_util/spherical_harmonics.hpp index 7ce495d8d..73c6a3bcb 100644 --- a/src/xc_integrator/integrator_util/spherical_harmonics.hpp +++ b/src/xc_integrator/integrator_util/spherical_harmonics.hpp @@ -4,4 +4,4 @@ #include -void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts, const std::array center, const double radius, double* ylm_matrix); \ No newline at end of file +void scaled_ylm_matrix(const int lmax, const double* points, const int32_t npts, const std::array center, const double radius, double* ylm_matrix); diff --git a/src/xc_integrator/local_work_driver/factory.cxx b/src/xc_integrator/local_work_driver/factory.cxx index fd6b86ad4..3fbf49ab1 100644 --- a/src/xc_integrator/local_work_driver/factory.cxx +++ b/src/xc_integrator/local_work_driver/factory.cxx @@ -22,7 +22,8 @@ LocalWorkDriverFactory::ptr_return_t LocalWorkDriverFactory::make_local_work_driver( ExecutionSpace ex, std::string name, LocalWorkSettings settings ) { - std::transform( name.begin(), name.end(), name.begin(), ::toupper ); + std::transform( name.begin(), name.end(), name.begin(), + [](unsigned char c){ return static_cast(std::toupper(c)); } ); (void)(settings); switch(ex) { diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/config_obara_saika.hpp b/src/xc_integrator/local_work_driver/host/obara_saika/src/config_obara_saika.hpp index 8b7cee2a2..b32ac2e57 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/config_obara_saika.hpp +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/config_obara_saika.hpp @@ -87,7 +87,7 @@ namespace XCPU { constexpr double deltaT = double(DEFAULT_MAX_T) / DEFAULT_NSEGMENT; constexpr double one_over_deltaT = 1 / deltaT; - int iseg = std::floor(T[i] * one_over_deltaT); + int iseg = static_cast(std::floor(T[i] * one_over_deltaT)); const double* boys_seg = boys_m + iseg * DEFAULT_LD_TABLE; const double ratio = (2 * iseg + 1); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx index c64d2d54b..cd6d444d7 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0.cxx @@ -30,7 +30,7 @@ void integral_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 1 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx index 6971c1a71..d5024357a 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_0_0.cxx @@ -32,7 +32,7 @@ void integral_0_0(size_t npts, int ldG, double *weights, double * /*boys_table*/) { - __attribute__((__aligned__(64))) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[1 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 1 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx index 3638d86af..045a5c860 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1.cxx @@ -30,7 +30,7 @@ void integral_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 9 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx index d0e655413..34f39af6a 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_0.cxx @@ -32,12 +32,12 @@ void integral_1_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[3 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[3 * NPTS_LOCAL + 3 * NPTS_LOCAL]; - double * __restrict__ temp = (buffer + 0); - double * __restrict__ Tval = (buffer + 3 * NPTS_LOCAL + 0 * NPTS_LOCAL); - double * __restrict__ Tval_inv_e = (buffer + 3 * NPTS_LOCAL + 1 * NPTS_LOCAL); - double * __restrict__ FmT = (buffer + 3 * NPTS_LOCAL + 2 * NPTS_LOCAL); + double * __restrict temp = (buffer + 0); + double * __restrict Tval = (buffer + 3 * NPTS_LOCAL + 0 * NPTS_LOCAL); + double * __restrict Tval_inv_e = (buffer + 3 * NPTS_LOCAL + 1 * NPTS_LOCAL); + double * __restrict FmT = (buffer + 3 * NPTS_LOCAL + 2 * NPTS_LOCAL); size_t npts_upper = NPTS_LOCAL * (npts / NPTS_LOCAL); size_t p_outer = 0; diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx index ee58d18f0..bf46e1efe 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_1_1.cxx @@ -32,7 +32,7 @@ void integral_1_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[9 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 9 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx index 035be5bef..bcaef2609 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2.cxx @@ -30,7 +30,7 @@ void integral_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 31 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx index 0343e6675..378ee6641 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_0.cxx @@ -32,7 +32,7 @@ void integral_2_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[6 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[6 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 6 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx index 6904c15d7..c8abdf6be 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_1.cxx @@ -32,7 +32,7 @@ void integral_2_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[16 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[16 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 16 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx index dbd9f500d..95989043d 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_2_2.cxx @@ -32,7 +32,7 @@ void integral_2_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[31 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 31 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx index c3faf7f43..48b26caf2 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3.cxx @@ -30,7 +30,7 @@ void integral_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 74 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx index 44c3542e0..bca56cfc9 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_0.cxx @@ -32,7 +32,7 @@ void integral_3_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[10 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[10 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 10 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx index 197e948ad..91148e596 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_1.cxx @@ -32,7 +32,7 @@ void integral_3_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[25 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[25 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 25 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx index 7c4a2ec67..eea293316 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_2.cxx @@ -32,7 +32,7 @@ void integral_3_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[46 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[46 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 46 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx index 251de89d9..3d46f0ac5 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_3_3.cxx @@ -32,7 +32,7 @@ void integral_3_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[74 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 74 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx index 67a9cace1..ad0e89b82 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4.cxx @@ -30,7 +30,7 @@ void integral_4(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 145 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx index 1b2f57f14..275edeed3 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_0.cxx @@ -32,7 +32,7 @@ void integral_4_0(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[15 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[15 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 15 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx index 6fefd7870..f189fe0d0 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_1.cxx @@ -32,7 +32,7 @@ void integral_4_1(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[36 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[36 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 36 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx index 0a88c5dd7..393bd36d7 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_2.cxx @@ -32,7 +32,7 @@ void integral_4_2(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[64 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[64 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 64 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx index e318e860f..4e2cdc506 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_3.cxx @@ -32,7 +32,7 @@ void integral_4_3(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[100 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[100 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 100 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx index 5aca482ab..301bde852 100644 --- a/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx +++ b/src/xc_integrator/local_work_driver/host/obara_saika/src/integral_4_4.cxx @@ -32,7 +32,7 @@ void integral_4_4(size_t npts, int ldG, double *weights, double *boys_table) { - __attribute__((__aligned__(64))) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; + alignas(64) double buffer[145 * NPTS_LOCAL + 3 * NPTS_LOCAL]; double *temp = (buffer + 0); double *Tval = (buffer + 145 * NPTS_LOCAL + 0 * NPTS_LOCAL); diff --git a/src/xc_integrator/local_work_driver/host/reference/gau2grid_collocation.cxx b/src/xc_integrator/local_work_driver/host/reference/gau2grid_collocation.cxx index 98f53d358..9dd6cf1ee 100644 --- a/src/xc_integrator/local_work_driver/host/reference/gau2grid_collocation.cxx +++ b/src/xc_integrator/local_work_driver/host/reference/gau2grid_collocation.cxx @@ -39,15 +39,15 @@ void gau2grid_collocation( size_t npts, for( size_t i = 0; i < nshells; ++i ) { const auto& sh = basis.at(shell_mask[i]); - int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; - gg_collocation( sh.l(), npts, points, 3, sh.nprim(), sh.coeff_data(), + int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; + gg_collocation( sh.l(), static_cast(npts), points, 3, sh.nprim(), sh.coeff_data(), sh.alpha_data(), sh.O_data(), order, rv + ncomp*npts ); ncomp += sh.size(); } - gg_fast_transpose( ncomp, npts, rv, basis_eval ); + gg_fast_transpose( static_cast(ncomp), static_cast(npts), rv, basis_eval ); a.deallocate( rv, npts*nbe ); #else @@ -99,19 +99,19 @@ void gau2grid_collocation_gradient( size_t npts, for( size_t i = 0; i < nshells; ++i ) { const auto& sh = basis.at(shell_mask[i]); - int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; - gg_collocation_deriv1( sh.l(), npts, points, 3, sh.nprim(), sh.coeff_data(), - sh.alpha_data(), sh.O_data(), order, rv + ncomp*npts, + int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; + gg_collocation_deriv1( sh.l(), static_cast(npts), points, 3, sh.nprim(), sh.coeff_data(), + sh.alpha_data(), sh.O_data(), order, rv + ncomp*npts, rv_x + ncomp*npts, rv_y + ncomp*npts, rv_z + ncomp*npts ); ncomp += sh.size(); } - gg_fast_transpose( ncomp, npts, rv, basis_eval ); - gg_fast_transpose( ncomp, npts, rv_x, dbasis_x_eval ); - gg_fast_transpose( ncomp, npts, rv_y, dbasis_y_eval ); - gg_fast_transpose( ncomp, npts, rv_z, dbasis_z_eval ); + gg_fast_transpose( static_cast(ncomp), static_cast(npts), rv, basis_eval ); + gg_fast_transpose( static_cast(ncomp), static_cast(npts), rv_x, dbasis_x_eval ); + gg_fast_transpose( static_cast(ncomp), static_cast(npts), rv_y, dbasis_y_eval ); + gg_fast_transpose( static_cast(ncomp), static_cast(npts), rv_z, dbasis_z_eval ); a.deallocate( rv, 4*npts*nbe ); @@ -175,15 +175,16 @@ void gau2grid_collocation_hessian( size_t npts, auto* rv_yz = rv_yy + npts * nbe; auto* rv_zz = rv_yz + npts * nbe; + const auto ul_npts = static_cast(npts); size_t ncomp = 0; for( size_t i = 0; i < nshells; ++i ) { const auto& sh = basis.at(shell_mask[i]); - int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; + int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; const auto ioff = ncomp*npts; - gg_collocation_deriv2( sh.l(), npts, points, 3, sh.nprim(), sh.coeff_data(), - sh.alpha_data(), sh.O_data(), order, rv + ioff, rv_x + ioff, rv_y + ioff, + gg_collocation_deriv2( sh.l(), ul_npts, points, 3, sh.nprim(), sh.coeff_data(), + sh.alpha_data(), sh.O_data(), order, rv + ioff, rv_x + ioff, rv_y + ioff, rv_z + ioff, rv_xx + ioff, rv_xy + ioff, rv_xz + ioff, rv_yy + ioff, rv_yz + ioff, rv_zz + ioff); @@ -191,16 +192,17 @@ void gau2grid_collocation_hessian( size_t npts, } - gg_fast_transpose( ncomp, npts, rv, basis_eval ); - gg_fast_transpose( ncomp, npts, rv_x, dbasis_x_eval ); - gg_fast_transpose( ncomp, npts, rv_y, dbasis_y_eval ); - gg_fast_transpose( ncomp, npts, rv_z, dbasis_z_eval ); - gg_fast_transpose( ncomp, npts, rv_xx, d2basis_xx_eval ); - gg_fast_transpose( ncomp, npts, rv_xy, d2basis_xy_eval ); - gg_fast_transpose( ncomp, npts, rv_xz, d2basis_xz_eval ); - gg_fast_transpose( ncomp, npts, rv_yy, d2basis_yy_eval ); - gg_fast_transpose( ncomp, npts, rv_yz, d2basis_yz_eval ); - gg_fast_transpose( ncomp, npts, rv_zz, d2basis_zz_eval ); + const auto ul_ncomp = static_cast(ncomp); + gg_fast_transpose( ul_ncomp, ul_npts, rv, basis_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_x, dbasis_x_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_y, dbasis_y_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_z, dbasis_z_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xx, d2basis_xx_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xy, d2basis_xy_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xz, d2basis_xz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_yy, d2basis_yy_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_yz, d2basis_yz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_zz, d2basis_zz_eval ); a.deallocate( rv, 10*npts*nbe ); @@ -257,15 +259,16 @@ void gau2grid_collocation_der3( size_t npts, auto* rv_zzz = rv_yzz + npts * nbe; + const auto ul_npts = static_cast(npts); size_t ncomp = 0; for( size_t i = 0; i < nshells; ++i ) { const auto& sh = basis.at(shell_mask[i]); - int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; + int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; const auto ioff = ncomp*npts; - gg_collocation_deriv3( sh.l(), npts, points, 3, sh.nprim(), sh.coeff_data(), - sh.alpha_data(), sh.O_data(), order, rv + ioff, rv_x + ioff, rv_y + ioff, + gg_collocation_deriv3( sh.l(), ul_npts, points, 3, sh.nprim(), sh.coeff_data(), + sh.alpha_data(), sh.O_data(), order, rv + ioff, rv_x + ioff, rv_y + ioff, rv_z + ioff, rv_xx + ioff, rv_xy + ioff, rv_xz + ioff, rv_yy + ioff, rv_yz + ioff, rv_zz + ioff, rv_xxx + ioff, rv_xxy + ioff, rv_xxz + ioff, rv_xyy + ioff, rv_xyz + ioff, rv_xzz + ioff, rv_yyy + ioff, rv_yyz + ioff, @@ -275,26 +278,27 @@ void gau2grid_collocation_der3( size_t npts, } - gg_fast_transpose( ncomp, npts, rv, basis_eval ); - gg_fast_transpose( ncomp, npts, rv_x, dbasis_x_eval ); - gg_fast_transpose( ncomp, npts, rv_y, dbasis_y_eval ); - gg_fast_transpose( ncomp, npts, rv_z, dbasis_z_eval ); - gg_fast_transpose( ncomp, npts, rv_xx, d2basis_xx_eval ); - gg_fast_transpose( ncomp, npts, rv_xy, d2basis_xy_eval ); - gg_fast_transpose( ncomp, npts, rv_xz, d2basis_xz_eval ); - gg_fast_transpose( ncomp, npts, rv_yy, d2basis_yy_eval ); - gg_fast_transpose( ncomp, npts, rv_yz, d2basis_yz_eval ); - gg_fast_transpose( ncomp, npts, rv_zz, d2basis_zz_eval ); - gg_fast_transpose( ncomp, npts, rv_xxx, d3basis_xxx_eval ); - gg_fast_transpose( ncomp, npts, rv_xxy, d3basis_xxy_eval ); - gg_fast_transpose( ncomp, npts, rv_xxz, d3basis_xxz_eval ); - gg_fast_transpose( ncomp, npts, rv_xyy, d3basis_xyy_eval ); - gg_fast_transpose( ncomp, npts, rv_xyz, d3basis_xyz_eval ); - gg_fast_transpose( ncomp, npts, rv_xzz, d3basis_xzz_eval ); - gg_fast_transpose( ncomp, npts, rv_yyy, d3basis_yyy_eval ); - gg_fast_transpose( ncomp, npts, rv_yyz, d3basis_yyz_eval ); - gg_fast_transpose( ncomp, npts, rv_yzz, d3basis_yzz_eval ); - gg_fast_transpose( ncomp, npts, rv_zzz, d3basis_zzz_eval ); + const auto ul_ncomp = static_cast(ncomp); + gg_fast_transpose( ul_ncomp, ul_npts, rv, basis_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_x, dbasis_x_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_y, dbasis_y_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_z, dbasis_z_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xx, d2basis_xx_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xy, d2basis_xy_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xz, d2basis_xz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_yy, d2basis_yy_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_yz, d2basis_yz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_zz, d2basis_zz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xxx, d3basis_xxx_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xxy, d3basis_xxy_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xxz, d3basis_xxz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xyy, d3basis_xyy_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xyz, d3basis_xyz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_xzz, d3basis_xzz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_yyy, d3basis_yyy_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_yyz, d3basis_yyz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_yzz, d3basis_yzz_eval ); + gg_fast_transpose( ul_ncomp, ul_npts, rv_zzz, d3basis_zzz_eval ); a.deallocate( rv, 20*npts*nbe ); diff --git a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx index 192cfcd33..30429432c 100644 --- a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx +++ b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx @@ -123,33 +123,40 @@ namespace GauXC { void ReferenceLocalHostWorkDriver::eval_xmat( size_t npts, size_t nbf, size_t nbe, const submat_map_t& submat_map, double fac, const double* P, size_t ldp, const double* basis_eval, size_t ldb, double* X, size_t ldx, double* scr ) { + const auto inbe = static_cast(nbe); + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildb = static_cast(ldb); + const auto ildx = static_cast(ldx); + const auto ildp = static_cast(ldp); const auto* P_use = P; size_t ldp_use = ldp; - + if( submat_map.size() > 1 ) { - detail::submat_set( nbf, nbf, nbe, nbe, P, ldp, scr, nbe, submat_map ); + detail::submat_set( inbf, inbf, inbe, inbe, P, ildp, scr, inbe, submat_map ); P_use = scr; ldp_use = nbe; } else if( nbe != nbf ) { P_use = P + submat_map[0][0]*(ldp+1); } - blas::gemm( 'N', 'N', nbe, npts, nbe, fac, P_use, ldp_use, basis_eval, ldb, - 0., X, ldx ); + blas::gemm( 'N', 'N', inbe, inpts, inbe, fac, P_use, static_cast(ldp_use), basis_eval, ildb, + 0., X, ildx ); } // U/VVar LDA (density) - void ReferenceLocalHostWorkDriver::eval_uvvar_lda_rks( size_t npts, size_t nbe, + void ReferenceLocalHostWorkDriver::eval_uvvar_lda_rks( size_t npts, size_t nbe, const double* basis_eval, const double* X, size_t ldx, double* den_eval) { + const auto inbe = static_cast(nbe); for( int32_t i = 0; i < (int32_t)npts; ++i ) { const size_t ioff = size_t(i) * ldx; const auto* X_i = X + ioff; - den_eval[i] = blas::dot( nbe, basis_eval + ioff, 1, X_i, 1 ); + den_eval[i] = blas::dot( inbe, basis_eval + ioff, 1, X_i, 1 ); } @@ -157,9 +164,11 @@ namespace GauXC { void ReferenceLocalHostWorkDriver::eval_uvvar_lda_uks( size_t npts, size_t nbe, - const double* basis_eval, const double* Xs, size_t ldxs, + const double* basis_eval, const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, double* den_eval) { - + + const auto inbe = static_cast(nbe); + for( int32_t i = 0; i < (int32_t)npts; ++i ) { const size_t ioffs = size_t(i) * ldxs; @@ -168,8 +177,8 @@ namespace GauXC { const auto* Xs_i = Xs + ioffs; const auto* Xz_i = Xz + ioffz; - const double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 ); - const double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 ); + const double rhos = blas::dot( inbe, basis_eval + ioffs, 1, Xs_i, 1 ); + const double rhoz = blas::dot( inbe, basis_eval + ioffz, 1, Xz_i, 1 ); den_eval[2*i] = 0.5*(rhos + rhoz); // rho_+ den_eval[2*i+1] = 0.5*(rhos - rhoz); // rho_- @@ -182,13 +191,14 @@ namespace GauXC { const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, double* den_eval, double* K, const double dtol) { + const auto inbe = static_cast(nbe); auto *KZ = K; // KZ // store K in the Z matrix auto *KY = KZ + npts; auto *KX = KY + npts; double dtolsq = dtol*dtol; - + for( int32_t i = 0; i < (int32_t)npts; ++i ) { const size_t ioffs = size_t(i) * ldxs; @@ -201,10 +211,10 @@ namespace GauXC { const auto* Xx_i = Xx + ioffx; const auto* Xy_i = Xy + ioffy; - const double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 ); - const double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 ); - const double rhox = blas::dot( nbe, basis_eval + ioffx, 1, Xx_i, 1 ); - const double rhoy = blas::dot( nbe, basis_eval + ioffy, 1, Xy_i, 1 ); + const double rhos = blas::dot( inbe, basis_eval + ioffs, 1, Xs_i, 1 ); + const double rhoz = blas::dot( inbe, basis_eval + ioffz, 1, Xz_i, 1 ); + const double rhox = blas::dot( inbe, basis_eval + ioffx, 1, Xx_i, 1 ); + const double rhoy = blas::dot( inbe, basis_eval + ioffy, 1, Xy_i, 1 ); double mtemp = rhoz * rhoz + rhox * rhox + rhoy * rhoy; double mnorm = 0; @@ -229,22 +239,24 @@ namespace GauXC { } - void ReferenceLocalHostWorkDriver::eval_uvvar_gga_rks( size_t npts, size_t nbe, - const double* basis_eval, const double* dbasis_x_eval, - const double *dbasis_y_eval, const double* dbasis_z_eval, const double* X, - size_t ldx, double* den_eval, double* dden_x_eval, double* dden_y_eval, + void ReferenceLocalHostWorkDriver::eval_uvvar_gga_rks( size_t npts, size_t nbe, + const double* basis_eval, const double* dbasis_x_eval, + const double *dbasis_y_eval, const double* dbasis_z_eval, const double* X, + size_t ldx, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma ) { + const auto inbe = static_cast(nbe); + for( int32_t i = 0; i < (int32_t)npts; ++i ) { const size_t ioff = size_t(i) * ldx; const auto* X_i = X + ioff; - den_eval[i] = blas::dot( nbe, basis_eval + ioff, 1, X_i, 1 ); + den_eval[i] = blas::dot( inbe, basis_eval + ioff, 1, X_i, 1 ); - const auto dx = 2. * blas::dot( nbe, dbasis_x_eval + ioff, 1, X_i, 1 ); - const auto dy = 2. * blas::dot( nbe, dbasis_y_eval + ioff, 1, X_i, 1 ); - const auto dz = 2. * blas::dot( nbe, dbasis_z_eval + ioff, 1, X_i, 1 ); + const auto dx = 2. * blas::dot( inbe, dbasis_x_eval + ioff, 1, X_i, 1 ); + const auto dy = 2. * blas::dot( inbe, dbasis_y_eval + ioff, 1, X_i, 1 ); + const auto dz = 2. * blas::dot( inbe, dbasis_z_eval + ioff, 1, X_i, 1 ); dden_x_eval[i] = dx; dden_y_eval[i] = dy; @@ -258,10 +270,12 @@ namespace GauXC { void ReferenceLocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double *dbasis_y_eval, const double* dbasis_z_eval, const double* Xs, - size_t ldxs, const double* Xz, size_t ldxz, + size_t ldxs, const double* Xz, size_t ldxz, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma ) { + const auto inbe = static_cast(nbe); + for( int32_t i = 0; i < (int32_t)npts; ++i ) { const size_t ioffs = size_t(i) * ldxs; @@ -270,26 +284,26 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe, const auto* Xs_i = Xs + ioffs; const auto* Xz_i = Xz + ioffz; - double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 ); // S density - double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 ); // Z density + double rhos = blas::dot( inbe, basis_eval + ioffs, 1, Xs_i, 1 ); // S density + double rhoz = blas::dot( inbe, basis_eval + ioffz, 1, Xz_i, 1 ); // Z density den_eval[2*i] = 0.5*(rhos + rhoz); // rho_+ den_eval[2*i+1] = 0.5*(rhos - rhoz); // rho_- const auto dndx = - 2. * blas::dot( nbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 ); const auto dndy = - 2. * blas::dot( nbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 ); const auto dndz = - 2. * blas::dot( nbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 ); const auto dMzdx = - 2. * blas::dot( nbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 ); const auto dMzdy = - 2. * blas::dot( nbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 ); const auto dMzdz = - 2. * blas::dot( nbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 ); dden_x_eval[2*i] = dndx; // dn / dx dden_y_eval[2*i] = dndy; // dn / dy @@ -317,21 +331,23 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe, void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double *dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, - const double* X, size_t ldx, const double* mmat_x, const double* mmat_y, - const double* mmat_z, size_t ldm, + const double* X, size_t ldx, const double* mmat_x, const double* mmat_y, + const double* mmat_z, [[maybe_unused]] size_t ldm, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* tau, double* lapl ) { + const auto inbe = static_cast(nbe); + for( int32_t i = 0; i < (int32_t)npts; ++i ) { const size_t ioff = size_t(i) * ldx; const auto* X_i = X + ioff; - den_eval[i] = blas::dot( nbe, basis_eval + ioff, 1, X_i, 1 ); + den_eval[i] = blas::dot( inbe, basis_eval + ioff, 1, X_i, 1 ); - const auto dx = 2. * blas::dot( nbe, dbasis_x_eval + ioff, 1, X_i, 1 ); - const auto dy = 2. * blas::dot( nbe, dbasis_y_eval + ioff, 1, X_i, 1 ); - const auto dz = 2. * blas::dot( nbe, dbasis_z_eval + ioff, 1, X_i, 1 ); + const auto dx = 2. * blas::dot( inbe, dbasis_x_eval + ioff, 1, X_i, 1 ); + const auto dy = 2. * blas::dot( inbe, dbasis_y_eval + ioff, 1, X_i, 1 ); + const auto dz = 2. * blas::dot( inbe, dbasis_z_eval + ioff, 1, X_i, 1 ); dden_x_eval[i] = dx; dden_y_eval[i] = dy; @@ -339,12 +355,12 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe, gamma[i] = dx*dx + dy*dy + dz*dz; - tau[i] = 0.5*blas::dot( nbe, dbasis_x_eval + ioff, 1, mmat_x + ioff, 1); - tau[i] += 0.5*blas::dot( nbe, dbasis_y_eval + ioff, 1, mmat_y + ioff, 1); - tau[i] += 0.5*blas::dot( nbe, dbasis_z_eval + ioff, 1, mmat_z + ioff, 1); + tau[i] = 0.5*blas::dot( inbe, dbasis_x_eval + ioff, 1, mmat_x + ioff, 1); + tau[i] += 0.5*blas::dot( inbe, dbasis_y_eval + ioff, 1, mmat_y + ioff, 1); + tau[i] += 0.5*blas::dot( inbe, dbasis_z_eval + ioff, 1, mmat_z + ioff, 1); if (lapl != nullptr) - lapl[i] = 2. * blas::dot( nbe, lbasis_eval + ioff, 1, X_i, 1) + 4. * tau[i]; + lapl[i] = 2. * blas::dot( inbe, lbasis_eval + ioff, 1, X_i, 1) + 4. * tau[i]; } } @@ -352,12 +368,14 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe, void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double *dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, - const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, - const double* mmat_xs, const double* mmat_ys, const double* mmat_zs, size_t ldms, - const double* mmat_xz, const double* mmat_yz, const double* mmat_zz, size_t ldmz, + const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, + const double* mmat_xs, const double* mmat_ys, const double* mmat_zs, [[maybe_unused]] size_t ldms, + const double* mmat_xz, const double* mmat_yz, const double* mmat_zz, [[maybe_unused]] size_t ldmz, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* tau, double* lapl ) { + const auto inbe = static_cast(nbe); + for( int32_t i = 0; i < (int32_t)npts; ++i ) { const size_t ioffs = size_t(i) * ldxs; @@ -366,26 +384,26 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, const auto* Xs_i = Xs + ioffs; const auto* Xz_i = Xz + ioffz; - double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 ); // S density - double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 ); // Z density + double rhos = blas::dot( inbe, basis_eval + ioffs, 1, Xs_i, 1 ); // S density + double rhoz = blas::dot( inbe, basis_eval + ioffz, 1, Xz_i, 1 ); // Z density den_eval[2*i] = 0.5*(rhos + rhoz); // rho_+ den_eval[2*i+1] = 0.5*(rhos - rhoz); // rho_- const auto dndx = - 2. * blas::dot( nbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 ); const auto dndy = - 2. * blas::dot( nbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 ); const auto dndz = - 2. * blas::dot( nbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 ); const auto dMzdx = - 2. * blas::dot( nbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 ); const auto dMzdy = - 2. * blas::dot( nbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 ); const auto dMzdz = - 2. * blas::dot( nbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 ); dden_x_eval[2*i] = dndx; // dn / dx dden_y_eval[2*i] = dndy; // dn / dy @@ -406,19 +424,19 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, gamma[3*i+1] = 0.25*(dn_sq - dMz_sq); gamma[3*i+2] = 0.25*(dn_sq + dMz_sq) - 0.5*dn_dMz; - auto taus = 0.5*blas::dot( nbe, dbasis_x_eval + ioffs, 1, mmat_xs + ioffs, 1); - taus += 0.5*blas::dot( nbe, dbasis_y_eval + ioffs, 1, mmat_ys + ioffs, 1); - taus += 0.5*blas::dot( nbe, dbasis_z_eval + ioffs, 1, mmat_zs + ioffs, 1); - auto tauz = 0.5*blas::dot( nbe, dbasis_x_eval + ioffz, 1, mmat_xz + ioffz, 1); - tauz += 0.5*blas::dot( nbe, dbasis_y_eval + ioffz, 1, mmat_yz + ioffz, 1); - tauz += 0.5*blas::dot( nbe, dbasis_z_eval + ioffz, 1, mmat_zz + ioffz, 1); + auto taus = 0.5*blas::dot( inbe, dbasis_x_eval + ioffs, 1, mmat_xs + ioffs, 1); + taus += 0.5*blas::dot( inbe, dbasis_y_eval + ioffs, 1, mmat_ys + ioffs, 1); + taus += 0.5*blas::dot( inbe, dbasis_z_eval + ioffs, 1, mmat_zs + ioffs, 1); + auto tauz = 0.5*blas::dot( inbe, dbasis_x_eval + ioffz, 1, mmat_xz + ioffz, 1); + tauz += 0.5*blas::dot( inbe, dbasis_y_eval + ioffz, 1, mmat_yz + ioffz, 1); + tauz += 0.5*blas::dot( inbe, dbasis_z_eval + ioffz, 1, mmat_zz + ioffz, 1); tau[2*i] = 0.5*(taus + tauz); tau[2*i+1] = 0.5*(taus - tauz); if (lapl != nullptr) { - auto lapls = 2. * blas::dot( nbe, lbasis_eval + ioffs, 1, Xs_i, 1) + 4. * taus; - auto laplz = 2. * blas::dot( nbe, lbasis_eval + ioffz, 1, Xz_i, 1) + 4. * tauz; + auto lapls = 2. * blas::dot( inbe, lbasis_eval + ioffs, 1, Xs_i, 1) + 4. * taus; + auto laplz = 2. * blas::dot( inbe, lbasis_eval + ioffz, 1, Xz_i, 1) + 4. * tauz; lapl[2*i] = 0.5*(lapls + laplz); lapl[2*i+1] = 0.5*(lapls - laplz); @@ -436,6 +454,8 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* Xy, size_t ldxy, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* K, double* H, const double dtol) { + const auto inbe = static_cast(nbe); + auto *KZ = K; // KZ // store K in the Z matrix auto *KY = KZ + npts; auto *KX = KY + npts; @@ -458,38 +478,38 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe, const auto* Xx_i = Xx + ioffx; const auto* Xy_i = Xy + ioffy; - const double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 ); - const double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 ); - const double rhox = blas::dot( nbe, basis_eval + ioffx, 1, Xx_i, 1 ); - const double rhoy = blas::dot( nbe, basis_eval + ioffy, 1, Xy_i, 1 ); + const double rhos = blas::dot( inbe, basis_eval + ioffs, 1, Xs_i, 1 ); + const double rhoz = blas::dot( inbe, basis_eval + ioffz, 1, Xz_i, 1 ); + const double rhox = blas::dot( inbe, basis_eval + ioffx, 1, Xx_i, 1 ); + const double rhoy = blas::dot( inbe, basis_eval + ioffy, 1, Xy_i, 1 ); const auto dndx = - 2. * blas::dot( nbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 ); const auto dndy = - 2. * blas::dot( nbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 ); const auto dndz = - 2. * blas::dot( nbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 ); + 2. * blas::dot( inbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 ); const auto dMzdx = - 2. * blas::dot( nbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 ); const auto dMzdy = - 2. * blas::dot( nbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 ); const auto dMzdz = - 2. * blas::dot( nbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 ); + 2. * blas::dot( inbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 ); const auto dMxdx = - 2. * blas::dot( nbe, dbasis_x_eval + ioffx, 1, Xx_i, 1 ); + 2. * blas::dot( inbe, dbasis_x_eval + ioffx, 1, Xx_i, 1 ); const auto dMxdy = - 2. * blas::dot( nbe, dbasis_y_eval + ioffx, 1, Xx_i, 1 ); + 2. * blas::dot( inbe, dbasis_y_eval + ioffx, 1, Xx_i, 1 ); const auto dMxdz = - 2. * blas::dot( nbe, dbasis_z_eval + ioffx, 1, Xx_i, 1 ); + 2. * blas::dot( inbe, dbasis_z_eval + ioffx, 1, Xx_i, 1 ); const auto dMydx = - 2. * blas::dot( nbe, dbasis_x_eval + ioffy, 1, Xy_i, 1 ); + 2. * blas::dot( inbe, dbasis_x_eval + ioffy, 1, Xy_i, 1 ); const auto dMydy = - 2. * blas::dot( nbe, dbasis_y_eval + ioffy, 1, Xy_i, 1 ); + 2. * blas::dot( inbe, dbasis_y_eval + ioffy, 1, Xy_i, 1 ); const auto dMydz = - 2. * blas::dot( nbe, dbasis_z_eval + ioffy, 1, Xy_i, 1 ); + 2. * blas::dot( inbe, dbasis_z_eval + ioffy, 1, Xy_i, 1 ); dden_x_eval[4 * i] = dndx; @@ -563,18 +583,21 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe, } // Eval Z Matrix LDA VXC - void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_rks( size_t npts, size_t nbf, + void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_rks( size_t npts, size_t nbf, const double* vrho, const double* basis_eval, double* Z, size_t ldz ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildz = static_cast(ldz); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Z, ldz ); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Z, ildz ); for( int32_t i = 0; i < (int32_t)npts; ++i ) { auto* z_col = Z + i*ldz; const double fact = 0.5 * vrho[i]; - GauXC::blas::scal( nbf, fact, z_col, 1 ); + GauXC::blas::scal( inbf, fact, z_col, 1 ); } @@ -585,9 +608,13 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* vrho, const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildzs = static_cast(ldzs); + const auto ildzz = static_cast(ldzz); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zs, ldzs); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zz, ldzz); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zs, ildzs); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zz, ildzz); for( int32_t i = 0; i < (int32_t)npts; ++i ) { @@ -598,11 +625,11 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe, const double factm = 0.5 * vrho[2*i+1]; //eq. 56 https://doi.org/10.1140/epjb/e2018-90170-1 - GauXC::blas::scal( nbf, 0.5*(factp + factm), zs_col, 1 ); - GauXC::blas::scal( nbf, 0.5*(factp - factm), zz_col, 1 ); + GauXC::blas::scal( inbf, 0.5*(factp + factm), zs_col, 1 ); + GauXC::blas::scal( inbf, 0.5*(factp - factm), zz_col, 1 ); } - + } @@ -610,14 +637,21 @@ void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nb const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K ) { + const auto inbe = static_cast(nbe); + const auto inpts = static_cast(npts); + const auto ildzs = static_cast(ldzs); + const auto ildzz = static_cast(ldzz); + const auto ildzx = static_cast(ldzx); + const auto ildzy = static_cast(ldzy); + auto *KZ = K; // KZ // store K in the Z matrix auto *KY = KZ + npts; auto *KX = KY + npts; - blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zs, ldzs); - blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zz, ldzz); - blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zx, ldzx); - blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zy, ldzy); + blas::lacpy( 'A', inbe, inpts, basis_eval, inbe, Zs, ildzs); + blas::lacpy( 'A', inbe, inpts, basis_eval, inbe, Zz, ildzz); + blas::lacpy( 'A', inbe, inpts, basis_eval, inbe, Zx, ildzx); + blas::lacpy( 'A', inbe, inpts, basis_eval, inbe, Zy, ildzy); for( int32_t i = 0; i < (int32_t)npts; ++i ) { @@ -631,45 +665,48 @@ void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nb const double factor = 0.5 * (factp - factm); //eq. 56 https://doi.org/10.1140/epjb/e2018-90170-1 - GauXC::blas::scal( nbe, 0.5*(factp + factm), zs_col, 1 ); - GauXC::blas::scal( nbe, KZ[i] * factor, zz_col, 1 ); - GauXC::blas::scal( nbe, KX[i] * factor, zx_col, 1 ); - GauXC::blas::scal( nbe, KY[i] * factor, zy_col, 1 ); - + GauXC::blas::scal( inbe, 0.5*(factp + factm), zs_col, 1 ); + GauXC::blas::scal( inbe, KZ[i] * factor, zz_col, 1 ); + GauXC::blas::scal( inbe, KX[i] * factor, zx_col, 1 ); + GauXC::blas::scal( inbe, KY[i] * factor, zy_col, 1 ); + } } // Eval Z Matrix GGA VXC - void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_rks( size_t npts, size_t nbf, - const double* vrho, const double* vgamma, const double* basis_eval, - const double* dbasis_x_eval, const double* dbasis_y_eval, - const double* dbasis_z_eval, const double* dden_x_eval, + void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_rks( size_t npts, size_t nbf, + const double* vrho, const double* vgamma, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, double* Z, size_t ldz ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + if( ldz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Z, nbf ); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Z, inbf ); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* z_col = Z + ioff; - auto* bf_x_col = dbasis_x_eval + ioff; - auto* bf_y_col = dbasis_y_eval + ioff; - auto* bf_z_col = dbasis_z_eval + ioff; + auto* bf_x_col = dbasis_x_eval + ioff; + auto* bf_y_col = dbasis_y_eval + ioff; + auto* bf_z_col = dbasis_z_eval + ioff; const auto lda_fact = 0.5 * vrho[i]; - blas::scal( nbf, lda_fact, z_col, 1 ); + blas::scal( inbf, lda_fact, z_col, 1 ); - const auto gga_fact = 2. * vgamma[i]; + const auto gga_fact = 2. * vgamma[i]; const auto x_fact = gga_fact * dden_x_eval[i]; const auto y_fact = gga_fact * dden_y_eval[i]; const auto z_fact = gga_fact * dden_z_eval[i]; - blas::axpy( nbf, x_fact, bf_x_col, 1, z_col, 1 ); - blas::axpy( nbf, y_fact, bf_y_col, 1, z_col, 1 ); - blas::axpy( nbf, z_fact, bf_z_col, 1, z_col, 1 ); + blas::axpy( inbf, x_fact, bf_x_col, 1, z_col, 1 ); + blas::axpy( inbf, y_fact, bf_y_col, 1, z_col, 1 ); + blas::axpy( inbf, z_fact, bf_z_col, 1, z_col, 1 ); } @@ -679,18 +716,22 @@ void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nb const double* vrho, const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, const double* dden_x_eval, - const double* dden_y_eval, const double* dden_z_eval, double* Zs, + const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildzs = static_cast(ldzs); + const auto ildzz = static_cast(ldzz); if( ldzs != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); if( ldzz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zs, ldzs); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zz, ldzz); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zs, ildzs); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zz, ildzz); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* zs_col = Zs + ioff; auto* zz_col = Zz + ioff; @@ -701,8 +742,8 @@ void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nb const double factp = 0.5 * vrho[2*i]; const double factm = 0.5 * vrho[2*i+1]; - GauXC::blas::scal( nbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " - GauXC::blas::scal( nbf, 0.5*(factp - factm), zz_col, 1 ); + GauXC::blas::scal( inbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " + GauXC::blas::scal( inbf, 0.5*(factp - factm), zz_col, 1 ); const auto gga_fact_pp = vgamma[3*i]; const auto gga_fact_pm = vgamma[3*i+1]; @@ -719,14 +760,14 @@ void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nb const auto x_fact_z = gga_fact_3 * dden_x_eval[2*i+1] + gga_fact_2 * dden_x_eval[2*i]; const auto y_fact_z = gga_fact_3 * dden_y_eval[2*i+1] + gga_fact_2 * dden_y_eval[2*i]; const auto z_fact_z = gga_fact_3 * dden_z_eval[2*i+1] + gga_fact_2 * dden_z_eval[2*i]; - - blas::axpy( nbf, x_fact_s, bf_x_col, 1, zs_col, 1 ); - blas::axpy( nbf, y_fact_s, bf_y_col, 1, zs_col, 1 ); - blas::axpy( nbf, z_fact_s, bf_z_col, 1, zs_col, 1 ); - blas::axpy( nbf, x_fact_z, bf_x_col, 1, zz_col, 1 ); - blas::axpy( nbf, y_fact_z, bf_y_col, 1, zz_col, 1 ); - blas::axpy( nbf, z_fact_z, bf_z_col, 1, zz_col, 1 ); + blas::axpy( inbf, x_fact_s, bf_x_col, 1, zs_col, 1 ); + blas::axpy( inbf, y_fact_s, bf_y_col, 1, zs_col, 1 ); + blas::axpy( inbf, z_fact_s, bf_z_col, 1, zs_col, 1 ); + + blas::axpy( inbf, x_fact_z, bf_x_col, 1, zz_col, 1 ); + blas::axpy( inbf, y_fact_z, bf_y_col, 1, zz_col, 1 ); + blas::axpy( inbf, z_fact_z, bf_z_col, 1, zz_col, 1 ); } } @@ -740,12 +781,15 @@ void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nb const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, double* Z, size_t ldz ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + if( ldz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Z, nbf ); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Z, inbf ); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* z_col = Z + ioff; auto* bf_x_col = dbasis_x_eval + ioff; @@ -753,21 +797,21 @@ void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nb auto* bf_z_col = dbasis_z_eval + ioff; const auto lda_fact = 0.5 * vrho[i]; - blas::scal( nbf, lda_fact, z_col, 1 ); + blas::scal( inbf, lda_fact, z_col, 1 ); const auto gga_fact = 2. * vgamma[i]; const auto x_fact = gga_fact * dden_x_eval[i]; const auto y_fact = gga_fact * dden_y_eval[i]; const auto z_fact = gga_fact * dden_z_eval[i]; - blas::axpy( nbf, x_fact, bf_x_col, 1, z_col, 1 ); - blas::axpy( nbf, y_fact, bf_y_col, 1, z_col, 1 ); - blas::axpy( nbf, z_fact, bf_z_col, 1, z_col, 1 ); + blas::axpy( inbf, x_fact, bf_x_col, 1, z_col, 1 ); + blas::axpy( inbf, y_fact, bf_y_col, 1, z_col, 1 ); + blas::axpy( inbf, z_fact, bf_z_col, 1, z_col, 1 ); if ( vlapl != nullptr ) { auto* lbf_col = lbasis_eval + ioff; const auto lapl_fact = vlapl[i]; - blas::axpy( nbf, lapl_fact, lbf_col, 1, z_col, 1 ); + blas::axpy( inbf, lapl_fact, lbf_col, 1, z_col, 1 ); } } @@ -775,23 +819,27 @@ void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nb } void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t nbf, - const double* vrho, const double* vgamma, const double* vlapl, + const double* vrho, const double* vgamma, const double* vlapl, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, const double* dden_x_eval, - const double* dden_y_eval, const double* dden_z_eval, double* Zs, + const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildzs = static_cast(ldzs); + const auto ildzz = static_cast(ldzz); if( ldzs != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); if( ldzz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zs, ldzs); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zz, ldzz); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zs, ildzs); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zz, ildzz); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* zs_col = Zs + ioff; auto* zz_col = Zz + ioff; @@ -803,8 +851,8 @@ void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t n const double factp = 0.5 * vrho[2*i]; const double factm = 0.5 * vrho[2*i+1]; - GauXC::blas::scal( nbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " - GauXC::blas::scal( nbf, 0.5*(factp - factm), zz_col, 1 ); + GauXC::blas::scal( inbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " + GauXC::blas::scal( inbf, 0.5*(factp - factm), zz_col, 1 ); const auto gga_fact_pp = vgamma[3*i]; const auto gga_fact_pm = vgamma[3*i+1]; @@ -822,40 +870,44 @@ void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t n const auto y_fact_z = gga_fact_3 * dden_y_eval[2*i+1] + gga_fact_2 * dden_y_eval[2*i]; const auto z_fact_z = gga_fact_3 * dden_z_eval[2*i+1] + gga_fact_2 * dden_z_eval[2*i]; - - blas::axpy( nbf, x_fact_s, bf_x_col, 1, zs_col, 1 ); - blas::axpy( nbf, y_fact_s, bf_y_col, 1, zs_col, 1 ); - blas::axpy( nbf, z_fact_s, bf_z_col, 1, zs_col, 1 ); - blas::axpy( nbf, x_fact_z, bf_x_col, 1, zz_col, 1 ); - blas::axpy( nbf, y_fact_z, bf_y_col, 1, zz_col, 1 ); - blas::axpy( nbf, z_fact_z, bf_z_col, 1, zz_col, 1 ); + blas::axpy( inbf, x_fact_s, bf_x_col, 1, zs_col, 1 ); + blas::axpy( inbf, y_fact_s, bf_y_col, 1, zs_col, 1 ); + blas::axpy( inbf, z_fact_s, bf_z_col, 1, zs_col, 1 ); + + blas::axpy( inbf, x_fact_z, bf_x_col, 1, zz_col, 1 ); + blas::axpy( inbf, y_fact_z, bf_y_col, 1, zz_col, 1 ); + blas::axpy( inbf, z_fact_z, bf_z_col, 1, zz_col, 1 ); if (vlapl != nullptr) { const auto lfactp = vlapl[2*i]; const auto lfactm = vlapl[2*i+1]; - blas::axpy( nbf, 0.5*(lfactp + lfactm), lbf_col, 1, zs_col, 1); - blas::axpy( nbf, 0.5*(lfactp - lfactm), lbf_col, 1, zz_col, 1); + blas::axpy( inbf, 0.5*(lfactp + lfactm), lbf_col, 1, zs_col, 1); + blas::axpy( inbf, 0.5*(lfactp - lfactm), lbf_col, 1, zz_col, 1); } } } - void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_rks(size_t npts, size_t nbf, - const double* vtau, const double* vlapl, - const double* dbasis_x_eval, const double* dbasis_y_eval, + void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_rks(size_t npts, size_t nbf, + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, double* mmat_x, double* mmat_y, double* mmat_z, size_t ldm ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildm = static_cast(ldm); + if( ldm != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - - blas::lacpy( 'A', nbf, npts, dbasis_x_eval, nbf, mmat_x, ldm); - blas::lacpy( 'A', nbf, npts, dbasis_y_eval, nbf, mmat_y, ldm); - blas::lacpy( 'A', nbf, npts, dbasis_z_eval, nbf, mmat_z, ldm); + + blas::lacpy( 'A', inbf, inpts, dbasis_x_eval, inbf, mmat_x, ildm); + blas::lacpy( 'A', inbf, inpts, dbasis_y_eval, inbf, mmat_y, ildm); + blas::lacpy( 'A', inbf, inpts, dbasis_z_eval, inbf, mmat_z, ildm); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* mmat_x_col = mmat_x + ioff; auto* mmat_y_col = mmat_y + ioff; auto* mmat_z_col = mmat_z + ioff; @@ -865,39 +917,44 @@ void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t n const auto tfact = 0.25 * vtau[i]; - blas::scal( nbf, tfact, mmat_x_col, 1); - blas::scal( nbf, tfact, mmat_y_col, 1); - blas::scal( nbf, tfact, mmat_z_col, 1); + blas::scal( inbf, tfact, mmat_x_col, 1); + blas::scal( inbf, tfact, mmat_y_col, 1); + blas::scal( inbf, tfact, mmat_z_col, 1); if ( vlapl != nullptr ) { const auto lfact = vlapl[i]; - blas::axpy( nbf, lfact, bf_x_col, 1, mmat_x_col, 1); - blas::axpy( nbf, lfact, bf_y_col, 1, mmat_y_col, 1); - blas::axpy( nbf, lfact, bf_z_col, 1, mmat_z_col, 1); + blas::axpy( inbf, lfact, bf_x_col, 1, mmat_x_col, 1); + blas::axpy( inbf, lfact, bf_y_col, 1, mmat_y_col, 1); + blas::axpy( inbf, lfact, bf_z_col, 1, mmat_z_col, 1); } } } -void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks(size_t npts, size_t nbf, - const double* vtau, const double* vlapl, - const double* dbasis_x_eval, const double* dbasis_y_eval, +void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks(size_t npts, size_t nbf, + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, double* mmat_xs, double* mmat_ys, double* mmat_zs, size_t ldms, double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildms = static_cast(ldms); + const auto ildmz = static_cast(ldmz); + if( ldms != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); if( ldmz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - - blas::lacpy( 'A', nbf, npts, dbasis_x_eval, nbf, mmat_xs, ldms); - blas::lacpy( 'A', nbf, npts, dbasis_y_eval, nbf, mmat_ys, ldms); - blas::lacpy( 'A', nbf, npts, dbasis_z_eval, nbf, mmat_zs, ldms); - blas::lacpy( 'A', nbf, npts, dbasis_x_eval, nbf, mmat_xz, ldmz); - blas::lacpy( 'A', nbf, npts, dbasis_y_eval, nbf, mmat_yz, ldmz); - blas::lacpy( 'A', nbf, npts, dbasis_z_eval, nbf, mmat_zz, ldmz); + + blas::lacpy( 'A', inbf, inpts, dbasis_x_eval, inbf, mmat_xs, ildms); + blas::lacpy( 'A', inbf, inpts, dbasis_y_eval, inbf, mmat_ys, ildms); + blas::lacpy( 'A', inbf, inpts, dbasis_z_eval, inbf, mmat_zs, ildms); + blas::lacpy( 'A', inbf, inpts, dbasis_x_eval, inbf, mmat_xz, ildmz); + blas::lacpy( 'A', inbf, inpts, dbasis_y_eval, inbf, mmat_yz, ildmz); + blas::lacpy( 'A', inbf, inpts, dbasis_z_eval, inbf, mmat_zz, ildmz); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* xs_col = mmat_xs + ioff; auto* ys_col = mmat_ys + ioff; auto* zs_col = mmat_zs + ioff; @@ -913,24 +970,24 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks(size_t npts, size_t nb const auto tfacts = 0.5*(tfactp + tfactm); const auto tfactz = 0.5*(tfactp - tfactm); - blas::scal( nbf, tfacts, xs_col, 1); - blas::scal( nbf, tfacts, ys_col, 1); - blas::scal( nbf, tfacts, zs_col, 1); - blas::scal( nbf, tfactz, xz_col, 1); - blas::scal( nbf, tfactz, yz_col, 1); - blas::scal( nbf, tfactz, zz_col, 1); + blas::scal( inbf, tfacts, xs_col, 1); + blas::scal( inbf, tfacts, ys_col, 1); + blas::scal( inbf, tfacts, zs_col, 1); + blas::scal( inbf, tfactz, xz_col, 1); + blas::scal( inbf, tfactz, yz_col, 1); + blas::scal( inbf, tfactz, zz_col, 1); if ( vlapl != nullptr ) { const auto lfactp = vlapl[2*i]; const auto lfactm = vlapl[2*i+1]; const auto lfacts = 0.5*(lfactp + lfactm); const auto lfactz = 0.5*(lfactp - lfactm); - blas::axpy( nbf, lfacts, bf_x_col, 1, xs_col, 1); - blas::axpy( nbf, lfacts, bf_y_col, 1, ys_col, 1); - blas::axpy( nbf, lfacts, bf_z_col, 1, zs_col, 1); - blas::axpy( nbf, lfactz, bf_x_col, 1, xz_col, 1); - blas::axpy( nbf, lfactz, bf_y_col, 1, yz_col, 1); - blas::axpy( nbf, lfactz, bf_z_col, 1, zz_col, 1); + blas::axpy( inbf, lfacts, bf_x_col, 1, xs_col, 1); + blas::axpy( inbf, lfacts, bf_y_col, 1, ys_col, 1); + blas::axpy( inbf, lfacts, bf_z_col, 1, zs_col, 1); + blas::axpy( inbf, lfactz, bf_x_col, 1, xz_col, 1); + blas::axpy( inbf, lfactz, bf_y_col, 1, yz_col, 1); + blas::axpy( inbf, lfactz, bf_z_col, 1, zz_col, 1); } } @@ -944,6 +1001,13 @@ void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_gks( size_t npts, size_t nb double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx, double* Zy, size_t ldzy, double* K, double* H ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildzs = static_cast(ldzs); + const auto ildzz = static_cast(ldzz); + const auto ildzx = static_cast(ldzx); + const auto ildzy = static_cast(ldzy); + auto *KZ = K; // KZ // store K in the Z matrix auto *KY = KZ + npts; auto *KX = KY + npts; @@ -957,14 +1021,14 @@ void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_gks( size_t npts, size_t nb if( ldzx != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); if( ldzy != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zs, ldzs); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zz, ldzz); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zx, ldzx); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zy, ldzy); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zs, ildzs); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zz, ildzz); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zx, ildzx); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zy, ildzy); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* zs_col = Zs + ioff; auto* zz_col = Zz + ioff; @@ -979,10 +1043,10 @@ void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_gks( size_t npts, size_t nb const double factm = 0.5 * vrho[2*i+1]; const double factor = 0.5 * (factp - factm); - GauXC::blas::scal( nbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " - GauXC::blas::scal( nbf, KZ[i]*factor, zz_col, 1 ); - GauXC::blas::scal( nbf, KX[i]*factor, zx_col, 1 ); - GauXC::blas::scal( nbf, KY[i]*factor, zy_col, 1 ); + GauXC::blas::scal( inbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " + GauXC::blas::scal( inbf, KZ[i]*factor, zz_col, 1 ); + GauXC::blas::scal( inbf, KX[i]*factor, zx_col, 1 ); + GauXC::blas::scal( inbf, KY[i]*factor, zy_col, 1 ); const auto gga_fact_pp = vgamma[3 * i]; const auto gga_fact_pm = vgamma[3 * i + 1]; @@ -1027,21 +1091,21 @@ void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_gks( size_t npts, size_t nb gga_fact_2 * HY[i] * dden_z_eval[4 * i]; - blas::axpy(nbf, x_fact_s, bf_x_col, 1, zs_col, 1); - blas::axpy(nbf, y_fact_s, bf_y_col, 1, zs_col, 1); - blas::axpy(nbf, z_fact_s, bf_z_col, 1, zs_col, 1); + blas::axpy(inbf, x_fact_s, bf_x_col, 1, zs_col, 1); + blas::axpy(inbf, y_fact_s, bf_y_col, 1, zs_col, 1); + blas::axpy(inbf, z_fact_s, bf_z_col, 1, zs_col, 1); - blas::axpy(nbf, x_fact_z, bf_x_col, 1, zz_col, 1); - blas::axpy(nbf, y_fact_z, bf_y_col, 1, zz_col, 1); - blas::axpy(nbf, z_fact_z, bf_z_col, 1, zz_col, 1); + blas::axpy(inbf, x_fact_z, bf_x_col, 1, zz_col, 1); + blas::axpy(inbf, y_fact_z, bf_y_col, 1, zz_col, 1); + blas::axpy(inbf, z_fact_z, bf_z_col, 1, zz_col, 1); - blas::axpy(nbf, x_fact_x, bf_x_col, 1, zx_col, 1); - blas::axpy(nbf, y_fact_x, bf_y_col, 1, zx_col, 1); - blas::axpy(nbf, z_fact_x, bf_z_col, 1, zx_col, 1); + blas::axpy(inbf, x_fact_x, bf_x_col, 1, zx_col, 1); + blas::axpy(inbf, y_fact_x, bf_y_col, 1, zx_col, 1); + blas::axpy(inbf, z_fact_x, bf_z_col, 1, zx_col, 1); - blas::axpy(nbf, x_fact_y, bf_x_col, 1, zy_col, 1); - blas::axpy(nbf, y_fact_y, bf_y_col, 1, zy_col, 1); - blas::axpy(nbf, z_fact_y, bf_z_col, 1, zy_col, 1); + blas::axpy(inbf, x_fact_y, bf_x_col, 1, zy_col, 1); + blas::axpy(inbf, y_fact_y, bf_y_col, 1, zy_col, 1); + blas::axpy(inbf, z_fact_y, bf_z_col, 1, zy_col, 1); } @@ -1157,10 +1221,10 @@ void ReferenceLocalHostWorkDriver::eval_tmat_gga_vxc_uks( size_t npts, const dou } -void ReferenceLocalHostWorkDriver::eval_tmat_mgga_vxc_rks( size_t npts, const double* vgamma, - const double* v2rho2, const double* v2rhogamma, const double* v2rholapl, const double* v2rhotau, - const double* v2gamma2, const double* v2gammalapl, const double* v2gammatau, - const double* v2lapl2, const double* v2lapltau, const double* v2tau2, +void ReferenceLocalHostWorkDriver::eval_tmat_mgga_vxc_rks( size_t npts, const double* vgamma, + const double* v2rho2, const double* v2rhogamma, [[maybe_unused]] const double* v2rholapl, const double* v2rhotau, + const double* v2gamma2, [[maybe_unused]] const double* v2gammalapl, const double* v2gammatau, + [[maybe_unused]] const double* v2lapl2, [[maybe_unused]] const double* v2lapltau, const double* v2tau2, const double* trho, const double* tdden_x_eval, const double* tdden_y_eval, const double* tdden_z_eval, const double* ttau, const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, double* A, double* B, double* C){ @@ -1290,12 +1354,18 @@ void ReferenceLocalHostWorkDriver::eval_tmat_mgga_vxc_uks( size_t npts, const do void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_uks_ts( size_t npts, size_t nbf, const double* vrho, const double* basis_eval, double* Za, size_t ldza, double* Zb, size_t ldzb ) { - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Za, ldza); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zb, ldzb); + + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildza = static_cast(ldza); + const auto ildzb = static_cast(ldzb); + + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Za, ildza); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zb, ildzb); for( int32_t i = 0; i < (int32_t)npts; ++i ) { //eq. 56 https://doi.org/10.1140/epjb/e2018-90170-1 - GauXC::blas::scal( nbf, 0.5 * vrho[2*i], Za + i*ldza, 1 ); - GauXC::blas::scal( nbf, 0.5 * vrho[2*i+1], Zb + i*ldzb, 1 ); + GauXC::blas::scal( inbf, 0.5 * vrho[2*i], Za + i*ldza, 1 ); + GauXC::blas::scal( inbf, 0.5 * vrho[2*i+1], Zb + i*ldzb, 1 ); } } @@ -1312,26 +1382,30 @@ void ReferenceLocalHostWorkDriver::eval_Bvec_gga_vxc_rks_ts( size_t npts, const void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_rks_ts( size_t npts, size_t nbf, const double* A, const double* B, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, - const double* dbasis_z_eval, double* Z, + const double* dbasis_z_eval, double* Z, size_t ldz) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildz = static_cast(ldz); + if( ldz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Z, ldz); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Z, ildz); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* z_col = Z + ioff; auto* bf_x_col = dbasis_x_eval + ioff; auto* bf_y_col = dbasis_y_eval + ioff; auto* bf_z_col = dbasis_z_eval + ioff; - GauXC::blas::scal( nbf, 0.5*A[i], z_col, 1 ); + GauXC::blas::scal( inbf, 0.5*A[i], z_col, 1 ); - blas::axpy( nbf, B[i*3], bf_x_col, 1, z_col, 1 ); - blas::axpy( nbf, B[i*3+1], bf_y_col, 1, z_col, 1 ); - blas::axpy( nbf, B[i*3+2], bf_z_col, 1, z_col, 1 ); + blas::axpy( inbf, B[i*3], bf_x_col, 1, z_col, 1 ); + blas::axpy( inbf, B[i*3+1], bf_y_col, 1, z_col, 1 ); + blas::axpy( inbf, B[i*3+2], bf_z_col, 1, z_col, 1 ); } } @@ -1367,18 +1441,22 @@ void ReferenceLocalHostWorkDriver::eval_Bvec_gga_vxc_uks_ts( size_t npts, const void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_uks_ts( size_t npts, size_t nbf, const double* A, const double* B, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, - const double* dbasis_z_eval, double* Za, + const double* dbasis_z_eval, double* Za, size_t ldza, double* Zb, size_t ldzb ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildza = static_cast(ldza); + const auto ildzb = static_cast(ldzb); if( ldza != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); if( ldzb != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Za, ldza); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zb, ldzb); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Za, ildza); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zb, ildzb); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* za_col = Za + ioff; auto* zb_col = Zb + ioff; @@ -1386,16 +1464,16 @@ void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_uks_ts( size_t npts, size_t auto* bf_y_col = dbasis_y_eval + ioff; auto* bf_z_col = dbasis_z_eval + ioff; - GauXC::blas::scal( nbf, 0.5*A[2*i], za_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " - GauXC::blas::scal( nbf, 0.5*A[2*i+1], zb_col, 1 ); + GauXC::blas::scal( inbf, 0.5*A[2*i], za_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " + GauXC::blas::scal( inbf, 0.5*A[2*i+1], zb_col, 1 ); - blas::axpy( nbf, B[i*6], bf_x_col, 1, za_col, 1 ); - blas::axpy( nbf, B[i*6+1], bf_y_col, 1, za_col, 1 ); - blas::axpy( nbf, B[i*6+2], bf_z_col, 1, za_col, 1 ); + blas::axpy( inbf, B[i*6], bf_x_col, 1, za_col, 1 ); + blas::axpy( inbf, B[i*6+1], bf_y_col, 1, za_col, 1 ); + blas::axpy( inbf, B[i*6+2], bf_z_col, 1, za_col, 1 ); - blas::axpy( nbf, B[i*6+3], bf_x_col, 1, zb_col, 1 ); - blas::axpy( nbf, B[i*6+4], bf_y_col, 1, zb_col, 1 ); - blas::axpy( nbf, B[i*6+5], bf_z_col, 1, zb_col, 1 ); + blas::axpy( inbf, B[i*6+3], bf_x_col, 1, zb_col, 1 ); + blas::axpy( inbf, B[i*6+4], bf_y_col, 1, zb_col, 1 ); + blas::axpy( inbf, B[i*6+5], bf_z_col, 1, zb_col, 1 ); } } @@ -1405,18 +1483,22 @@ void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_uks_ts( size_t npts, size_t const double* vrho, const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, const double* dden_x_eval, - const double* dden_y_eval, const double* dden_z_eval, double* Za, + const double* dden_y_eval, const double* dden_z_eval, double* Za, size_t ldza, double* Zb, size_t ldzb ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildza = static_cast(ldza); + const auto ildzb = static_cast(ldzb); if( ldza != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); if( ldzb != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Za, ldza); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zb, ldzb); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Za, ildza); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zb, ildzb); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* za_col = Za + ioff; auto* zb_col = Zb + ioff; @@ -1424,8 +1506,8 @@ void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_uks_ts( size_t npts, size_t auto* bf_y_col = dbasis_y_eval + ioff; auto* bf_z_col = dbasis_z_eval + ioff; - GauXC::blas::scal( nbf, 0.5*vrho[2*i], za_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " - GauXC::blas::scal( nbf, 0.5*vrho[2*i+1], zb_col, 1 ); + GauXC::blas::scal( inbf, 0.5*vrho[2*i], za_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " + GauXC::blas::scal( inbf, 0.5*vrho[2*i+1], zb_col, 1 ); const auto gga_fact_aa = vgamma[3*i]; const auto gga_fact_ab = vgamma[3*i+1]; @@ -1448,34 +1530,39 @@ void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_uks_ts( size_t npts, size_t const auto y_fact_b = 2 * gga_fact_bb * dden_y_eval_b + gga_fact_ab * dden_y_eval_a; const auto z_fact_b = 2 * gga_fact_bb * dden_z_eval_b + gga_fact_ab * dden_z_eval_a; - blas::axpy( nbf, x_fact_a, bf_x_col, 1, za_col, 1 ); - blas::axpy( nbf, y_fact_a, bf_y_col, 1, za_col, 1 ); - blas::axpy( nbf, z_fact_a, bf_z_col, 1, za_col, 1 ); + blas::axpy( inbf, x_fact_a, bf_x_col, 1, za_col, 1 ); + blas::axpy( inbf, y_fact_a, bf_y_col, 1, za_col, 1 ); + blas::axpy( inbf, z_fact_a, bf_z_col, 1, za_col, 1 ); - blas::axpy( nbf, x_fact_b, bf_x_col, 1, zb_col, 1 ); - blas::axpy( nbf, y_fact_b, bf_y_col, 1, zb_col, 1 ); - blas::axpy( nbf, z_fact_b, bf_z_col, 1, zb_col, 1 ); + blas::axpy( inbf, x_fact_b, bf_x_col, 1, zb_col, 1 ); + blas::axpy( inbf, y_fact_b, bf_y_col, 1, zb_col, 1 ); + blas::axpy( inbf, z_fact_b, bf_z_col, 1, zb_col, 1 ); } } void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks_ts( size_t npts, size_t nbf, - const double* vrho, const double* vgamma, const double* vlapl, + const double* vrho, const double* vgamma, const double* vlapl, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, const double* dden_x_eval, - const double* dden_y_eval, const double* dden_z_eval, double* Za, + const double* dden_y_eval, const double* dden_z_eval, double* Za, size_t ldza, double* Zb, size_t ldzb ) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildza = static_cast(ldza); + const auto ildzb = static_cast(ldzb); + if( ldza != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); if( ldzb != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Za, ldza); - blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zb, ldzb); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Za, ildza); + blas::lacpy( 'A', inbf, inpts, basis_eval, inbf, Zb, ildzb); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* za_col = Za + ioff; auto* zb_col = Zb + ioff; @@ -1484,9 +1571,9 @@ void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks_ts( size_t npts, size_ auto* bf_z_col = dbasis_z_eval + ioff; auto* lbf_col = lbasis_eval + ioff; - GauXC::blas::scal( nbf, 0.5*vrho[2*i], za_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " - GauXC::blas::scal( nbf, 0.5*vrho[2*i+1], zb_col, 1 ); - + GauXC::blas::scal( inbf, 0.5*vrho[2*i], za_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " + GauXC::blas::scal( inbf, 0.5*vrho[2*i+1], zb_col, 1 ); + // dden_x_eval, dden_y_eval, dden_z_eval are all still in Pauli representation // so we need to convert them to the two spinor representation const auto dden_x_eval_a = 0.5 * (dden_x_eval[2*i] + dden_x_eval[2*i+1]); @@ -1495,7 +1582,7 @@ void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks_ts( size_t npts, size_ const auto dden_y_eval_b = 0.5 * (dden_y_eval[2*i] - dden_y_eval[2*i+1]); const auto dden_z_eval_a = 0.5 * (dden_z_eval[2*i] + dden_z_eval[2*i+1]); const auto dden_z_eval_b = 0.5 * (dden_z_eval[2*i] - dden_z_eval[2*i+1]); - + const auto gga_fact_aa = vgamma[3*i]; const auto gga_fact_ab = vgamma[3*i+1]; const auto gga_fact_bb = vgamma[3*i+2]; @@ -1508,41 +1595,46 @@ void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks_ts( size_t npts, size_ const auto y_fact_b = 2 * gga_fact_bb * dden_y_eval_b + gga_fact_ab * dden_y_eval_a; const auto z_fact_b = 2 * gga_fact_bb * dden_z_eval_b + gga_fact_ab * dden_z_eval_a; - blas::axpy( nbf, x_fact_a, bf_x_col, 1, za_col, 1 ); - blas::axpy( nbf, y_fact_a, bf_y_col, 1, za_col, 1 ); - blas::axpy( nbf, z_fact_a, bf_z_col, 1, za_col, 1 ); + blas::axpy( inbf, x_fact_a, bf_x_col, 1, za_col, 1 ); + blas::axpy( inbf, y_fact_a, bf_y_col, 1, za_col, 1 ); + blas::axpy( inbf, z_fact_a, bf_z_col, 1, za_col, 1 ); - blas::axpy( nbf, x_fact_b, bf_x_col, 1, zb_col, 1 ); - blas::axpy( nbf, y_fact_b, bf_y_col, 1, zb_col, 1 ); - blas::axpy( nbf, z_fact_b, bf_z_col, 1, zb_col, 1 ); + blas::axpy( inbf, x_fact_b, bf_x_col, 1, zb_col, 1 ); + blas::axpy( inbf, y_fact_b, bf_y_col, 1, zb_col, 1 ); + blas::axpy( inbf, z_fact_b, bf_z_col, 1, zb_col, 1 ); if (vlapl != nullptr) { - blas::axpy( nbf, vlapl[2*i], lbf_col, 1, za_col, 1); - blas::axpy( nbf, vlapl[2*i + 1], lbf_col, 1, zb_col, 1); + blas::axpy( inbf, vlapl[2*i], lbf_col, 1, za_col, 1); + blas::axpy( inbf, vlapl[2*i + 1], lbf_col, 1, zb_col, 1); } } } -void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t nbf, - const double* vtau, const double* vlapl, - const double* dbasis_x_eval, const double* dbasis_y_eval, +void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t nbf, + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, double* mmat_xa, double* mmat_ya, double* mmat_za, size_t ldma, double* mmat_xb, double* mmat_yb, double* mmat_zb, size_t ldmb) { + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildma = static_cast(ldma); + const auto ildmb = static_cast(ldmb); + if( ldma != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); if( ldmb != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("Invalid Dims")); - - blas::lacpy( 'A', nbf, npts, dbasis_x_eval, nbf, mmat_xa, ldma); - blas::lacpy( 'A', nbf, npts, dbasis_y_eval, nbf, mmat_ya, ldma); - blas::lacpy( 'A', nbf, npts, dbasis_z_eval, nbf, mmat_za, ldma); - blas::lacpy( 'A', nbf, npts, dbasis_x_eval, nbf, mmat_xb, ldmb); - blas::lacpy( 'A', nbf, npts, dbasis_y_eval, nbf, mmat_yb, ldmb); - blas::lacpy( 'A', nbf, npts, dbasis_z_eval, nbf, mmat_zb, ldmb); + + blas::lacpy( 'A', inbf, inpts, dbasis_x_eval, inbf, mmat_xa, ildma); + blas::lacpy( 'A', inbf, inpts, dbasis_y_eval, inbf, mmat_ya, ildma); + blas::lacpy( 'A', inbf, inpts, dbasis_z_eval, inbf, mmat_za, ildma); + blas::lacpy( 'A', inbf, inpts, dbasis_x_eval, inbf, mmat_xb, ildmb); + blas::lacpy( 'A', inbf, inpts, dbasis_y_eval, inbf, mmat_yb, ildmb); + blas::lacpy( 'A', inbf, inpts, dbasis_z_eval, inbf, mmat_zb, ildmb); for( int32_t i = 0; i < (int32_t)npts; ++i ) { - const int32_t ioff = i * nbf; + const int32_t ioff = i * inbf; auto* xa_col = mmat_xa + ioff; auto* ya_col = mmat_ya + ioff; auto* za_col = mmat_za + ioff; @@ -1556,22 +1648,22 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t const auto tfacta = 0.25 * vtau[2*i]; const auto tfactb = 0.25 * vtau[2*i+1]; - blas::scal( nbf, tfacta, xa_col, 1); - blas::scal( nbf, tfacta, ya_col, 1); - blas::scal( nbf, tfacta, za_col, 1); - blas::scal( nbf, tfactb, xb_col, 1); - blas::scal( nbf, tfactb, yb_col, 1); - blas::scal( nbf, tfactb, zb_col, 1); + blas::scal( inbf, tfacta, xa_col, 1); + blas::scal( inbf, tfacta, ya_col, 1); + blas::scal( inbf, tfacta, za_col, 1); + blas::scal( inbf, tfactb, xb_col, 1); + blas::scal( inbf, tfactb, yb_col, 1); + blas::scal( inbf, tfactb, zb_col, 1); if ( vlapl != nullptr ) { const auto lfacta = vlapl[2*i]; const auto lfactb = vlapl[2*i+1]; - blas::axpy( nbf, lfacta, bf_x_col, 1, xa_col, 1); - blas::axpy( nbf, lfacta, bf_y_col, 1, ya_col, 1); - blas::axpy( nbf, lfacta, bf_z_col, 1, za_col, 1); - blas::axpy( nbf, lfactb, bf_x_col, 1, xb_col, 1); - blas::axpy( nbf, lfactb, bf_y_col, 1, yb_col, 1); - blas::axpy( nbf, lfactb, bf_z_col, 1, zb_col, 1); + blas::axpy( inbf, lfacta, bf_x_col, 1, xa_col, 1); + blas::axpy( inbf, lfacta, bf_y_col, 1, ya_col, 1); + blas::axpy( inbf, lfacta, bf_z_col, 1, za_col, 1); + blas::axpy( inbf, lfactb, bf_x_col, 1, xb_col, 1); + blas::axpy( inbf, lfactb, bf_y_col, 1, yb_col, 1); + blas::axpy( inbf, lfactb, bf_z_col, 1, zb_col, 1); } } @@ -1587,9 +1679,15 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t const double* basis_eval, const submat_map_t& submat_map, const double* Z, size_t ldz, double* VXC, size_t ldvxc, double* scr ) { - blas::syr2k('L', 'N', nbe, npts, 1., basis_eval, nbe, Z, ldz, 0., scr, nbe ); + const auto inbe = static_cast(nbe); + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildz = static_cast(ldz); + const auto ildvxc = static_cast(ldvxc); + + blas::syr2k('L', 'N', inbe, inpts, 1., basis_eval, inbe, Z, ildz, 0., scr, inbe ); - detail::inc_by_submat_atomic( nbf, nbf, nbe, nbe, VXC, ldvxc, scr, nbe, submat_map ); + detail::inc_by_submat_atomic( inbf, inbf, inbe, inbe, VXC, ildvxc, scr, inbe, submat_map ); } @@ -1599,10 +1697,17 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t const submat_map_t& submat_map_bra, const submat_map_t& submat_map_ket, const double* G, size_t ldg, double* K, size_t ldk, double* scr ) { - blas::gemm( 'N', 'T', nbe_bra, nbe_ket, npts, 1., basis_eval, nbe_bra, - G, ldg, 0., scr, nbe_bra ); + const auto inbe_bra = static_cast(nbe_bra); + const auto inbe_ket = static_cast(nbe_ket); + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildg = static_cast(ldg); + const auto ildk = static_cast(ldk); + + blas::gemm( 'N', 'T', inbe_bra, inbe_ket, inpts, 1., basis_eval, inbe_bra, + G, ildg, 0., scr, inbe_bra ); - detail::inc_by_submat_atomic( nbf, nbf, nbe_bra, nbe_ket, K, ldk, scr, nbe_bra, + detail::inc_by_submat_atomic( inbf, inbf, inbe_bra, inbe_ket, K, ildk, scr, inbe_bra, submat_map_bra, submat_map_ket ); } @@ -1615,20 +1720,27 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t const double* basis_eval, size_t ldb, double* F, size_t ldf, double* scr ) { + const auto inbe_bra = static_cast(nbe_bra); + const auto inbe_ket = static_cast(nbe_ket); + const auto inbf = static_cast(nbf); + const auto inpts = static_cast(npts); + const auto ildb = static_cast(ldb); + const auto ildf = static_cast(ldf); + const auto ildp = static_cast(ldp); const auto* P_use = P; size_t ldp_use = ldp; if( submat_map_bra.size() > 1 or submat_map_ket.size() > 1 ) { - detail::submat_set( nbf, nbf, nbe_bra, nbe_ket, P, ldp, - scr, nbe_bra, submat_map_bra, submat_map_ket ); + detail::submat_set( inbf, inbf, inbe_bra, inbe_ket, P, ildp, + scr, inbe_bra, submat_map_bra, submat_map_ket ); P_use = scr; ldp_use = nbe_bra; } else { P_use = P + submat_map_ket[0][0]*ldp + submat_map_bra[0][0]; } - blas::gemm( 'N', 'N', nbe_bra, npts, nbe_ket, 1., P_use, ldp_use, basis_eval, - ldb, 0., F, ldf ); + blas::gemm( 'N', 'N', inbe_bra, inpts, inbe_ket, 1., P_use, static_cast(ldp_use), basis_eval, + ildb, 0., F, ildf ); } @@ -1642,7 +1754,9 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t util::unused(basis_map); - // Cast points to Rys format (binary compatable) + const auto inpts = static_cast(npts); + const auto ildx = static_cast(ldx); + const auto ildg = static_cast(ldg); XCPU::point* _points = reinterpret_cast(const_cast(points)); std::vector _points_transposed(3 * npts); @@ -1667,8 +1781,9 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t const bool any_pure = std::any_of( shell_list, shell_list + nshells, [&](const auto& i){ return basis.at(i).pure(); } ); - const size_t nbe_cart = + const size_t nbe_cart = basis.nbf_cart_subset( shell_list, shell_list + nshells ); + const auto inbe_cart = static_cast(nbe_cart); std::vector X_cart, G_cart; if( any_pure ){ @@ -1686,11 +1801,11 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t const int shell_cart_sz = shell.cart_size(); if( shell.pure() and shell_l > 0 ) { - sph_trans.itform_bra_cm( shell_l, npts, X + ioff, ldx, - X_cart.data() + ioff_cart, nbe_cart ); + sph_trans.itform_bra_cm( shell_l, inpts, X + ioff, ildx, + X_cart.data() + ioff_cart, inbe_cart ); } else { - blas::lacpy( 'A', shell_sz, npts, X + ioff, ldx, - X_cart.data() + ioff_cart, nbe_cart ); + blas::lacpy( 'A', shell_sz, inpts, X + ioff, ildx, + X_cart.data() + ioff_cart, inbe_cart ); } ioff += shell_sz; ioff_cart += shell_cart_sz; @@ -1780,9 +1895,9 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t XCPU::compute_integral_shell_pair( ish == jsh, npts, _points_transposed.data(), bra.l(), ket.l(), bra_origin, ket_origin, - nprim_pair, prim_pair_data, - X_cart_rm.data()+ioff_cart, X_cart_rm.data()+joff_cart, npts, - G_cart_rm.data()+ioff_cart, G_cart_rm.data()+joff_cart, npts, + static_cast(nprim_pair), prim_pair_data, + X_cart_rm.data()+ioff_cart, X_cart_rm.data()+joff_cart, inpts, + G_cart_rm.data()+ioff_cart, G_cart_rm.data()+joff_cart, inpts, const_cast(weights), this->boys_table ); } #endif @@ -1805,11 +1920,11 @@ void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks_ts(size_t npts, size_t const int shell_cart_sz = shell.cart_size(); if( shell.pure() and shell_l > 0 ) { - sph_trans.tform_bra_cm( shell_l, npts, G_cart.data() + ioff_cart, nbe_cart, - G + ioff, ldg ); + sph_trans.tform_bra_cm( shell_l, inpts, G_cart.data() + ioff_cart, inbe_cart, + G + ioff, ildg ); } else { - blas::lacpy( 'A', shell_sz, npts, G_cart.data() + ioff_cart, nbe_cart, - G + ioff, ldg ); + blas::lacpy( 'A', shell_sz, inpts, G_cart.data() + ioff_cart, inbe_cart, + G + ioff, ildg ); } ioff += shell_sz; ioff_cart += shell_cart_sz; diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c index 4aa876364..2e18f7159 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_1rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_1rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump1[34] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7 diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h index c98f10241..5f1f05672 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_1rw.h @@ -1,6 +1,6 @@ #ifndef RYS_1RW_H_ #define RYS_1RW_H_ -void rys_1rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_1rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c index 78459eb8a..d98966172 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_2rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_2rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump2[41] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8 diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h index 309c3ec26..e18221d57 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_2rw.h @@ -1,6 +1,6 @@ #ifndef RYS_2RW_H_ #define RYS_2RW_H_ -void rys_2rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_2rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c index 299073ca9..c86131622 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_3rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_3rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump3[48] = { 1, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h index 904139b2c..affe560f0 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_3rw.h @@ -1,6 +1,6 @@ #ifndef RYS_3RW_H_ #define RYS_3RW_H_ -void rys_3rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_3rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c index 2b83ae652..f59c31eb2 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_4rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_4rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump4[54] = { 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h index dd6fac71f..5294e181d 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_4rw.h @@ -1,6 +1,6 @@ #ifndef RYS_4RW_H_ #define RYS_4RW_H_ -void rys_4rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_4rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c index a478610c9..ccc677f98 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.c @@ -6,7 +6,7 @@ #define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a)) -void rys_5rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]) { +void rys_5rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts) { int jump5[60] = { 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9 }; diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h index 8e4278431..1e76fe9e5 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_5rw.h @@ -1,6 +1,6 @@ #ifndef RYS_5RW_H_ #define RYS_5RW_H_ -void rys_5rw(int nt, const double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_5rw(int nt, const double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c index a9f8d22da..cee3f63e3 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_integral.c @@ -21,17 +21,23 @@ #define PI 3.14159265358979323846 +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#else +#define FORCE_INLINE inline __attribute__((always_inline)) #define MIN(a,b) \ ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a < _b ? _a : _b; }) +#endif // codelets -inline void __attribute__((always_inline)) compute_00(double beta, double *int_array, double *wgh) { +FORCE_INLINE void compute_00(double beta, double *int_array, double *wgh) { *(int_array + 0) = (*(int_array + 0)) * beta + *(wgh + 0); } -inline void __attribute__((always_inline)) compute_10_01(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_10_01(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double beta, double *int_array, double *rts, double *wgh) { double rt, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1; rt = *(rts + 0); @@ -49,7 +55,7 @@ inline void __attribute__((always_inline)) compute_10_01(double xPX, double yPX, *(int_array + 2) = (*(int_array + 2)) * beta + (*(wgh + 0)) * Cz0 + (*(wgh + 1)) * Cz1; } -inline void __attribute__((always_inline)) compute_20_02(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_20_02(double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { double B0, B1, rt0, rt1, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1, Cx2, Cy2, Cz2, Cx3, Cy3, Cz3; rt0 = *(rts + 0); @@ -82,7 +88,7 @@ inline void __attribute__((always_inline)) compute_20_02(double xPX, double yPX, *(int_array + 5) = (*(int_array + 5)) * beta + Cz2 * (*(wgh + 0)) + Cz3 * (*(wgh + 1)); } -inline void __attribute__((always_inline)) compute_11(double xAB, double yAB, double zAB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { +FORCE_INLINE void compute_11(double xAB, double yAB, double zAB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double beta, double *int_array, double *rts, double *wgh) { double B0, B1, rt0, rt1, Cx0, Cy0, Cz0, Cx1, Cy1, Cz1, Cx2, Cy2, Cz2, Cx3, Cy3, Cz3; rt0 = *(rts + 0); @@ -120,7 +126,7 @@ inline void __attribute__((always_inline)) compute_11(double xAB, double yAB, do } // nr roots > 2 -inline void __attribute__((always_inline)) compute_vrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double * rts, double *vrr_array, double *hrr_array) { +FORCE_INLINE void compute_vrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xPX, double yPX, double zPX, double xPC, double yPC, double zPC, double aP_inv, double * rts, double *vrr_array, double *hrr_array) { double *roots = (rts + 0); double *vrr = (vrr_array + 0); for(int r = 0; r < nr_roots; ++r) { @@ -210,7 +216,7 @@ inline void __attribute__((always_inline)) compute_vrr3(int nr_roots, int l, int } } -inline void __attribute__((always_inline)) compute_hrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xAB, double yAB, double zAB, double *vrr_array, double *hrr_array) { +FORCE_INLINE void compute_hrr3(int nr_roots, int l, int lA, int llA, int lB, int llB, double xAB, double yAB, double zAB, double *vrr_array, double *hrr_array) { for(int j = 1; j <= lA; ++j) { double *hrrj = (hrr_array + llA * j); @@ -271,11 +277,11 @@ inline void __attribute__((always_inline)) compute_hrr3(int nr_roots, int l, int } } -inline int __attribute__((always_inline)) index_calculation(int i, int j, int L) { +FORCE_INLINE int index_calculation(int i, int j, int L) { return (L - i) * (L - i + 1) / 2 + j; } -inline void __attribute__((always_inline)) compute_reduction(int nr_roots, int lA, int lB, double *weights, double *hrr_array, double *result, double beta) { +FORCE_INLINE void compute_reduction(int nr_roots, int lA, int lB, double *weights, double *hrr_array, double *result, double beta) { int offsetB = (lB + 1) * (lB + 2) / 2; for(int ia = 0; ia <= lA; ++ia) { diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c index 905d05d49..f9eba534d 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.c @@ -3,6 +3,9 @@ #include #include #include +#ifdef _MSC_VER +#include +#endif #include "boys.h" @@ -15,9 +18,9 @@ void rys_rw(int nt, int ngqp, - double tval[restrict], - double rts[restrict], - double wts[restrict]) { + double *__restrict tval, + double *__restrict rts, + double *__restrict wts) { switch (ngqp) { case 1: rys_1rw(nt, tval, rts, wts); @@ -36,7 +39,11 @@ void rys_rw(int nt, return; default: { +#ifdef _MSC_VER + double *ryszero = (double *)_malloca(nt * sizeof(double)); +#else double ryszero[nt]; +#endif for (int n = 0; n < nt; n++) { const double t = tval[n]; @@ -61,7 +68,10 @@ void rys_rw(int nt, int nmom = (ngqp << 1) - 1; rys_xrw(nt, ntgqp, ngqp, nmom, tval, ryszero, rts, wts); - + +#ifdef _MSC_VER + _freea(ryszero); +#endif return; } } diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h index 659cddefb..9d0f6ed21 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_rw.h @@ -1,6 +1,6 @@ #ifndef RYS_RW_H_ #define RYS_RW_H_ -void rys_rw(int nt, int ngqp, double tval[restrict], double rts[restrict], double wts[restrict]); +void rys_rw(int nt, int ngqp, double *__restrict tval, double *__restrict rts, double *__restrict wts); #endif diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c index 35ba680fe..2089bd0f6 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.c @@ -2,16 +2,28 @@ #include #include #include +#ifdef _MSC_VER +#include +#endif #include "jacobi.h" void rys_xrw(int nt, int ntgqp, int ngqp, int nmom, - const double tval[restrict], - const double ryszero[restrict], - double rts[restrict], - double wts[restrict]) { + const double *__restrict tval, + const double *__restrict ryszero, + double *__restrict rts, + double *__restrict wts) { +#ifdef _MSC_VER + double *a = (double *)_malloca(nmom * sizeof(double)); + double *b = (double *)_malloca((nmom-1) * sizeof(double)); + double *mom = (double *)_malloca(nmom * sizeof(double)); + double *dia = (double *)_malloca(ngqp * sizeof(double)); + double *off = (double *)_malloca(ngqp * sizeof(double)); + double *row1 = (double *)_malloca(nmom * sizeof(double)); + double *row2 = (double *)_malloca(nmom * sizeof(double)); +#else double a[nmom]; double b[nmom-1]; double mom[nmom]; @@ -19,6 +31,7 @@ void rys_xrw(int nt, double off[ngqp]; double row1[nmom]; double row2[nmom]; +#endif int nrts = 0; for (int n = 0; n < nt; n += 1) { @@ -261,4 +274,14 @@ void rys_xrw(int nt, nrts += ngqp; } } + +#ifdef _MSC_VER + _freea(row2); + _freea(row1); + _freea(off); + _freea(dia); + _freea(mom); + _freea(b); + _freea(a); +#endif } diff --git a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h index f107d589b..b99cdcc4a 100644 --- a/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h +++ b/src/xc_integrator/local_work_driver/host/rys/src/rys_xrw.h @@ -5,9 +5,9 @@ void rys_xrw(int nt, int ntgqp, int ngqp, int nmom, - const double tval[restrict], - const double ryszero[restrict], - double rts[restrict], - double wts[restrict]); + const double *__restrict tval, + const double *__restrict ryszero, + double *__restrict rts, + double *__restrict wts); #endif diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_dd_psi.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_dd_psi.hpp index 211a4abb4..a3bfaa364 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_dd_psi.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_dd_psi.hpp @@ -71,7 +71,7 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& mol = this->load_balancer_->molecule(); // Atom-specific data - int natom = mol.size(); + int natom = static_cast(mol.size()); std::vector radii(natom); for (int i = 0; i < natom; ++i) { radii[i] = uff_radius_103(mol[i].Z); @@ -114,9 +114,9 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& task = tasks[iT]; // Get tasks constants - const int32_t npts = task.points.size(); + const int32_t npts = static_cast(task.points.size()); const int32_t nbe = task.bfn_screening.nbe; - const int32_t nshells = task.bfn_screening.shell_list.size(); + const int32_t nshells = static_cast(task.bfn_screening.shell_list.size()); const auto* points = task.points.data()->data(); const auto* weights = task.weights.data(); @@ -167,10 +167,10 @@ void ReferenceReplicatedXCHostIntegrator:: den_eval[i] *= -weights[i]; } std::vector offset_local_dd_psi(ldPsi, 0.0); - blas::gemm('N', 'N', ldPsi, 1, npts, - 1.0, ylm_matrix.data(), ldPsi, - den_eval, npts, - 0.0, offset_local_dd_psi.data(), ldPsi); + blas::gemm('N', 'N', static_cast(ldPsi), 1, npts, + 1.0, ylm_matrix.data(), static_cast(ldPsi), + den_eval, npts, + 0.0, offset_local_dd_psi.data(), static_cast(ldPsi)); for (int j = 0; j < ldPsi; ++j) { dd_Psi[atom_offset + j] += offset_local_dd_psi[j]; } diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_dd_psi_potential.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_dd_psi_potential.hpp index 58b9eddf6..1976a2778 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_dd_psi_potential.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_dd_psi_potential.hpp @@ -36,7 +36,7 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& mol = this->load_balancer_->molecule(); const size_t natom = mol.size(); const size_t nharmonics = (max_Ylm + 1) * (max_Ylm + 1); - if (m != nharmonics || n != natom) { + if (m != static_cast(nharmonics) || n != static_cast(natom)) { GAUXC_GENERIC_EXCEPTION("m must be nharmonics and n must be natom"); } // Get Tasks @@ -70,7 +70,7 @@ void ReferenceReplicatedXCHostIntegrator:: // Atom-specific data std::vector radii(mol.size()); - for (int i = 0; i < mol.size(); ++i) { + for (size_t i = 0; i < mol.size(); ++i) { radii[i] = uff_radius_103(mol[i].Z); } @@ -111,9 +111,9 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& task = tasks[iT]; // Get tasks constants - const int32_t npts = task.points.size(); + const int32_t npts = static_cast(task.points.size()); const int32_t nbe = task.bfn_screening.nbe; - const int32_t nshells = task.bfn_screening.shell_list.size(); + const int32_t nshells = static_cast(task.bfn_screening.shell_list.size()); const auto* points = task.points.data()->data(); const auto* weights = task.weights.data(); diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_grad.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_grad.hpp index f04ae24b7..64c92995c 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_grad.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_grad.hpp @@ -52,7 +52,7 @@ void ReferenceReplicatedXCHostIntegrator:: if( not this->reduction_driver_->takes_host_memory() ) GAUXC_GENERIC_EXCEPTION("This Module Only Works With Host Reductions"); - const int natoms = this->load_balancer_->molecule().natoms(); + const int natoms = static_cast(this->load_balancer_->molecule().natoms()); this->reduction_driver_->allreduce_inplace( EXC_GRAD, 3*natoms, ReductionOp::Sum ); }); @@ -61,7 +61,7 @@ void ReferenceReplicatedXCHostIntegrator:: template void ReferenceReplicatedXCHostIntegrator:: - eval_exc_grad_( int64_t m, int64_t n, const value_type* Ps, int64_t ldps, + eval_exc_grad_( int64_t m, int64_t n, const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, value_type* EXC_GRAD, const IntegratorSettingsXC& ks_settings ) { @@ -94,7 +94,7 @@ void ReferenceReplicatedXCHostIntegrator:: if( not this->reduction_driver_->takes_host_memory() ) GAUXC_GENERIC_EXCEPTION("This Module Only Works With Host Reductions"); - const int natoms = this->load_balancer_->molecule().natoms(); + const int natoms = static_cast(this->load_balancer_->molecule().natoms()); this->reduction_driver_->allreduce_inplace( EXC_GRAD, 3*natoms, ReductionOp::Sum ); }); @@ -132,7 +132,7 @@ void ReferenceReplicatedXCHostIntegrator:: BasisSetMap basis_map(basis,mol); const int32_t nbf = basis.nbf(); - const int32_t natoms = mol.natoms(); + const int32_t natoms = static_cast(mol.natoms()); // Sort tasks on size (XXX: maybe doesnt matter?) auto task_comparator = []( const XCTask& a, const XCTask& b ) { @@ -169,9 +169,9 @@ void ReferenceReplicatedXCHostIntegrator:: auto& task = tasks[iT]; // Get tasks constants - const int32_t npts = task.points.size(); + const int32_t npts = static_cast(task.points.size()); const int32_t nbe = task.bfn_screening.nbe; - const int32_t nshells = task.bfn_screening.shell_list.size(); + const int32_t nshells = static_cast(task.bfn_screening.shell_list.size()); const size_t spin_dim_scal = is_rks ? 1 : 2; // last case is_uks const size_t gga_dim_scal = is_rks ? 1 : 3; @@ -414,7 +414,7 @@ void ReferenceReplicatedXCHostIntegrator:: } double g_acc_x(0), g_acc_y(0), g_acc_z(0); - for( int ibf = 0, mu = bf_off; ibf < sh_sz; ++ibf, ++mu ) + for( int ibf = 0, mu = static_cast(bf_off); ibf < sh_sz; ++ibf, ++mu ) for( int ipt = 0; ipt < npts; ++ipt ) { const int32_t mu_i = mu + ipt*nbe; diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp index 141085c9f..6e7192c53 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp @@ -162,7 +162,6 @@ void ReferenceReplicatedXCHostIntegrator:: auto& tasks = this->load_balancer_->get_tasks(); std::sort( task_begin, task_end, task_comparator ); - // Check that Partition Weights have been calculated auto& lb_state = this->load_balancer_->state(); if( not lb_state.modified_weights_are_stored ) { @@ -215,9 +214,9 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& task = *(task_begin + iT); // Get tasks constants - const int32_t npts = task.points.size(); + const int32_t npts = static_cast(task.points.size()); const int32_t nbe = task.bfn_screening.nbe; - const int32_t nshells = task.bfn_screening.shell_list.size(); + const int32_t nshells = static_cast(task.bfn_screening.shell_list.size()); const auto* points = task.points.data()->data(); const auto* weights = task.weights.data(); diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exx.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exx.hpp index 7cce12dea..003de5f53 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exx.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exx.hpp @@ -315,7 +315,7 @@ void ReferenceReplicatedXCHostIntegrator:: // Loop over sparse shell pairs const auto sp_row_ptr = shpairs.row_ptr(); const auto sp_col_ind = shpairs.col_ind(); - for( auto i = 0; i < nshells_bf; ++i ) { + for( size_t i = 0; i < nshells_bf; ++i ) { const auto j_st = sp_row_ptr[i]; const auto j_en = sp_row_ptr[i+1]; for( auto _j = j_st; _j < j_en; ++_j ) { @@ -460,7 +460,7 @@ void ReferenceReplicatedXCHostIntegrator:: gen_compressed_submat_map( basis_map, ek_shell_list, nbf, nbf ); // Get tasks constants - const int32_t npts = task.points.size(); + const int32_t npts = static_cast(task.points.size()); const auto* points = task.points.data()->data(); const auto* weights = task.weights.data(); diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_fxc_contraction.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_fxc_contraction.hpp index 192fe0f89..7320039c7 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_fxc_contraction.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_fxc_contraction.hpp @@ -93,8 +93,8 @@ template void ReferenceReplicatedXCHostIntegrator:: fxc_contraction_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, - const value_type* tPs, int64_t ldtps, - const value_type* tPz, int64_t ldtpz, + const value_type* tPs, [[maybe_unused]] int64_t ldtps, + const value_type* tPz, [[maybe_unused]] int64_t ldtpz, value_type* FXCs, int64_t ldfxcs, value_type* FXCz, int64_t ldfxcz, value_type *N_EL, const IntegratorSettingsXC& settings, @@ -178,9 +178,9 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& task = *(task_begin + iT); // Get tasks constants - const int32_t npts = task.points.size(); + const int32_t npts = static_cast(task.points.size()); const int32_t nbe = task.bfn_screening.nbe; - const int32_t nshells = task.bfn_screening.shell_list.size(); + const int32_t nshells = static_cast(task.bfn_screening.shell_list.size()); const auto* points = task.points.data()->data(); const auto* weights = task.weights.data(); diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_integrate_den.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_integrate_den.hpp index e0ad145f5..5aa55d837 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_integrate_den.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_integrate_den.hpp @@ -108,9 +108,9 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& task = tasks[iT]; // Get tasks constants - const int32_t npts = task.points.size(); + const int32_t npts = static_cast(task.points.size()); const int32_t nbe = task.bfn_screening.nbe; - const int32_t nshells = task.bfn_screening.shell_list.size(); + const int32_t nshells = static_cast(task.bfn_screening.shell_list.size()); const auto* points = task.points.data()->data(); const auto* weights = task.weights.data(); diff --git a/src/xc_integrator/replicated/host/replicated_xc_host_integrator.cxx b/src/xc_integrator/replicated/host/replicated_xc_host_integrator.cxx index 72ef87b87..fe58dd5a0 100644 --- a/src/xc_integrator/replicated/host/replicated_xc_host_integrator.cxx +++ b/src/xc_integrator/replicated/host/replicated_xc_host_integrator.cxx @@ -37,8 +37,9 @@ typename ReplicatedXCHostIntegratorFactory::ptr_return_t GAUXC_GENERIC_EXCEPTION("Passed LWD Not valid for Host ExSpace"); } - std::transform(integrator_kernel.begin(), integrator_kernel.end(), - integrator_kernel.begin(), ::toupper ); + std::transform(integrator_kernel.begin(), integrator_kernel.end(), + integrator_kernel.begin(), + [](unsigned char c){ return static_cast(std::toupper(c)); } ); if( integrator_kernel == "DEFAULT" ) integrator_kernel = "REFERENCE"; diff --git a/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_exc_grad.hpp b/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_exc_grad.hpp index f329bc025..43a1b0343 100644 --- a/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_exc_grad.hpp +++ b/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_exc_grad.hpp @@ -22,7 +22,7 @@ void ShellBatchedReplicatedXCIntegrator @@ -31,7 +31,7 @@ void ShellBatchedReplicatedXCIntegratortimer_.time_op_accumulate("XCIntegrator.ExtractSubDensity",[&]() { - detail::submat_set( basis.nbf(), basis.nbf(), nbe, nbe, Ps, ldps, - Ps_submat, nbe, union_submat_cut ); + detail::submat_set( static_cast(basis.nbf()), static_cast(basis.nbf()), static_cast(nbe), static_cast(nbe), Ps, static_cast(ldps), + Ps_submat, static_cast(nbe), union_submat_cut ); if(Pz) - detail::submat_set( basis.nbf(), basis.nbf(), nbe, nbe, Pz, ldpz, - Pz_submat, nbe, union_submat_cut ); + detail::submat_set( static_cast(basis.nbf()), static_cast(basis.nbf()), static_cast(nbe), static_cast(nbe), Pz, static_cast(ldpz), + Pz_submat, static_cast(nbe), union_submat_cut ); if(Py) - detail::submat_set( basis.nbf(), basis.nbf(), nbe, nbe, Py, ldpy, - Py_submat, nbe, union_submat_cut ); + detail::submat_set( static_cast(basis.nbf()), static_cast(basis.nbf()), static_cast(nbe), static_cast(nbe), Py, static_cast(ldpy), + Py_submat, static_cast(nbe), union_submat_cut ); if(Px) - detail::submat_set( basis.nbf(), basis.nbf(), nbe, nbe, Px, ldpx, - Px_submat, nbe, union_submat_cut ); + detail::submat_set( static_cast(basis.nbf()), static_cast(basis.nbf()), static_cast(nbe), static_cast(nbe), Px, static_cast(ldpx), + Px_submat, static_cast(nbe), union_submat_cut ); } ); @@ -415,20 +415,20 @@ void ShellBatchedReplicatedXCIntegratortimer_.time_op_accumulate("XCIntegrator.IncrementSubPotential",[&]() { if(VXCs) - detail::inc_by_submat( basis.nbf(), basis.nbf(), nbe, nbe, VXCs, ldvxcs, - VXCs_submat, nbe, union_submat_cut ); + detail::inc_by_submat( static_cast(basis.nbf()), static_cast(basis.nbf()), static_cast(nbe), static_cast(nbe), VXCs, static_cast(ldvxcs), + VXCs_submat, static_cast(nbe), union_submat_cut ); if(VXCz) - detail::inc_by_submat( basis.nbf(), basis.nbf(), nbe, nbe, VXCz, ldvxcz, - VXCz_submat, nbe, union_submat_cut ); + detail::inc_by_submat( static_cast(basis.nbf()), static_cast(basis.nbf()), static_cast(nbe), static_cast(nbe), VXCz, static_cast(ldvxcz), + VXCz_submat, static_cast(nbe), union_submat_cut ); if(VXCy) - detail::inc_by_submat( basis.nbf(), basis.nbf(), nbe, nbe, VXCy, ldvxcy, - VXCy_submat, nbe, union_submat_cut ); + detail::inc_by_submat( static_cast(basis.nbf()), static_cast(basis.nbf()), static_cast(nbe), static_cast(nbe), VXCy, static_cast(ldvxcy), + VXCy_submat, static_cast(nbe), union_submat_cut ); if(VXCx) - detail::inc_by_submat( basis.nbf(), basis.nbf(), nbe, nbe, VXCx, ldvxcx, - VXCx_submat, nbe, union_submat_cut ); + detail::inc_by_submat( static_cast(basis.nbf()), static_cast(basis.nbf()), static_cast(nbe), static_cast(nbe), VXCx, static_cast(ldvxcx), + VXCx_submat, static_cast(nbe), union_submat_cut ); }); diff --git a/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_fxc_contraction.hpp b/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_fxc_contraction.hpp index 289de9600..6dc916cb1 100644 --- a/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_fxc_contraction.hpp +++ b/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_fxc_contraction.hpp @@ -41,7 +41,7 @@ void ShellBatchedReplicatedXCIntegrator(std::max(1., max_shell_list.size() * overlap_pthresh[idx] )); host_task_iterator search_st = task_begin; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 46dbe487d..b7b090d70 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -75,6 +75,9 @@ if(GAUXC_ENABLE_CUTLASS) include(gauxc-cutlass) target_link_libraries(gauxc_test PUBLIC gauxc_cutlass) endif() +if(MSVC AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options( gauxc_test PRIVATE -Wno-unused-variable ) +endif() set( GAUXC_REF_DATA_PATH "${PROJECT_SOURCE_DIR}/tests/ref_data" ) @@ -87,6 +90,9 @@ add_executable( standalone_driver standalone_driver.cxx standards.cxx basis/pars target_link_libraries( standalone_driver PUBLIC gauxc gauxc_catch2 Eigen3::Eigen ) target_include_directories( standalone_driver PRIVATE ${PROJECT_BINARY_DIR}/tests ) target_include_directories( standalone_driver PRIVATE ${PROJECT_SOURCE_DIR}/tests ) +if(MSVC AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options( standalone_driver PRIVATE -Wno-unused-variable ) +endif() #add_executable( grid_opt grid_opt.cxx standards.cxx basis/parse_basis.cxx ini_input.cxx ) #target_link_libraries( grid_opt PUBLIC gauxc gauxc_catch2 Eigen3::Eigen ) diff --git a/tests/basis/parse_basis.cxx b/tests/basis/parse_basis.cxx index 0bf4cd8ec..3c1e8a6de 100644 --- a/tests/basis/parse_basis.cxx +++ b/tests/basis/parse_basis.cxx @@ -131,7 +131,7 @@ std::map am_map = { namespace detail { inline static auto tokenize( std::string str, - std::string delim = " " ) { + [[maybe_unused]] std::string delim = " " ) { std::istringstream iss(str); std::vector tokens; @@ -179,7 +179,7 @@ BasisSet parse_basis( const Molecule& mol, std::string atom_symb = atom_line.substr(0,2); if( atom_symb[1] == ' ' ) atom_symb = atom_symb[0]; std::transform( atom_symb.begin(), atom_symb.end(), atom_symb.begin(), - [](auto a){ return std::toupper(a); } ); + [](unsigned char a){ return static_cast(std::toupper(a)); } ); //std::cout << atom_symb << std::endl; int Z = atomic_number_map.at(atom_symb); @@ -208,9 +208,8 @@ BasisSet parse_basis( const Molecule& mol, } using prim_array = Shell::prim_array; - using cart_array = Shell::cart_array; - prim_array alpha_arr, coeff_primary_arr, coeff_secondary_arr; + prim_array alpha_arr, coeff_primary_arr, coeff_secondary_arr{}; std::copy( alpha.begin(), alpha.end(), alpha_arr.begin() ); std::copy( coeff_primary.begin(), coeff_primary.end(), coeff_primary_arr.begin() ); @@ -254,7 +253,7 @@ BasisSet parse_basis( const Molecule& mol, BasisSet basis; for( auto iAt = 0; iAt < mol.size(); ++iAt ) { const auto& atom = mol.at(iAt); - BasisSet atom_basis = basis_shells.at(atom.Z.get()); + BasisSet atom_basis = basis_shells.at(static_cast(atom.Z.get())); for( auto& sh : atom_basis ) sh.O() = {atom.x, atom.y, atom.z}; basis.insert(basis.end(), atom_basis.begin(), atom_basis.end() ); diff --git a/tests/grid_test.cxx b/tests/grid_test.cxx index c308adf8e..0ba8e0957 100644 --- a/tests/grid_test.cxx +++ b/tests/grid_test.cxx @@ -46,7 +46,7 @@ TEST_CASE("Grid", "[grid]") { SECTION("Full Construction") { Grid grid( mk_sphere, BatchSize(batch_sz) ); - CHECK( grid.batcher().max_batch_size() == batch_sz ); + CHECK( grid.batcher().max_batch_size() == static_cast(batch_sz) ); for( auto i = 0; i < mk_batch.nbatches(); ++i ) { diff --git a/tests/ini_input.cxx b/tests/ini_input.cxx index a5f6ed561..613b06fd3 100644 --- a/tests/ini_input.cxx +++ b/tests/ini_input.cxx @@ -106,7 +106,7 @@ void INIFile::parse() { // Obtain the section header name sectionHeader = line.substr(1,line.length()-2); std::transform(sectionHeader.begin(),sectionHeader.end(),sectionHeader.begin(), - [](unsigned char c){ return std::toupper(c);} ); + [](unsigned char c){ return static_cast(std::toupper(c));} ); // Create a dictionary entry for the section header dict_[sectionHeader] = @@ -134,7 +134,7 @@ void INIFile::parse() { dataHeader = tokens[0]; std::transform(dataHeader.begin(),dataHeader.end(),dataHeader.begin(), - [](unsigned char c){ return std::toupper(c);} ); + [](unsigned char c){ return static_cast(std::toupper(c));} ); // Create a dictionary entry for the data field in the current // section header @@ -191,7 +191,7 @@ std::pair INIFile::splitQuery( for(auto &X : tokens) { trim(X); std::transform(X.begin(),X.end(),X.begin(), - [](unsigned char c){ return std::toupper(c);} ); + [](unsigned char c){ return static_cast(std::toupper(c));} ); } return diff --git a/tests/molgrid_test.cxx b/tests/molgrid_test.cxx index 1de1be986..953e75abb 100644 --- a/tests/molgrid_test.cxx +++ b/tests/molgrid_test.cxx @@ -249,7 +249,7 @@ TEST_CASE("Grid Specification", "[molgrid]") { atomic_grid_variant gs; std::vector ref_pruning_regions; - UnprunedAtomicGridSpecification unp_gs = + UnprunedAtomicGridSpecification unp_gs = MolGridFactory::create_default_unpruned_grid_spec(Z,rq,gsz); SECTION("Unpruned") { gs = MolGridFactory::create_default_pruned_grid_spec( diff --git a/tests/standalone_driver.cxx b/tests/standalone_driver.cxx index 68a9c13aa..f4af33756 100644 --- a/tests/standalone_driver.cxx +++ b/tests/standalone_driver.cxx @@ -77,7 +77,8 @@ int main(int argc, char** argv) { int lmax = 2; auto string_to_upper = []( auto& str ) { - std::transform( str.begin(), str.end(), str.begin(), ::toupper ); + std::transform( str.begin(), str.end(), str.begin(), + [](unsigned char c){ return static_cast(std::toupper(c)); } ); }; #define OPTIONAL_KEYWORD(NAME,VAR,TYPE) \ @@ -229,7 +230,7 @@ int main(int argc, char** argv) { matrix_type P, Pz, Py, Px, VXC_ref, VXCz_ref, VXCy_ref, VXCx_ref, K_ref; matrix_type ddX, ddPsi_ref, ddPsi_potential_ref; matrix_type FXC_ref, FXCz_ref; - double EXC_ref; + double EXC_ref = 0.0; std::vector EXC_GRAD_ref(3*mol.size()); bool rks = true, uks = false, gks = false; size_t N_EL_ref = MolMeta(mol).sum_atomic_charges(); @@ -454,7 +455,7 @@ int main(int argc, char** argv) { matrix_type VXC, VXCz, VXCy, VXCx, K, FXC, FXCz; matrix_type ddPsi, ddPsiPotential; - double EXC, N_EL; + double EXC = 0.0, N_EL = 0.0; std::cout << std::scientific << std::setprecision(12); if( integrate_den ) { @@ -464,7 +465,7 @@ int main(int argc, char** argv) { N_EL = integrator.integrate_den( P ); if(!world_rank) std::cout << "N_EL = " << N_EL << std::endl; } else { - N_EL = N_EL_ref; + N_EL = static_cast(N_EL_ref); } if( integrate_vxc ) { diff --git a/tests/weights_generate.hpp b/tests/weights_generate.hpp index 34788701d..f245120ae 100644 --- a/tests/weights_generate.hpp +++ b/tests/weights_generate.hpp @@ -77,6 +77,7 @@ void generate_weights_data( const Molecule& mol, const BasisSet& basis, reference_lko_weights_host( mol, lb.molmeta(), tasks.begin(), tasks.end() ); break; + default: break; } // Clear out unneeded data diff --git a/tests/weights_host.hpp b/tests/weights_host.hpp index 821330ae3..5242014e9 100644 --- a/tests/weights_host.hpp +++ b/tests/weights_host.hpp @@ -40,6 +40,7 @@ void test_host_weights( const std::string& filename, XCWeightAlg weight_alg ) { ref_data.mol, *ref_data.meta, ref_data.tasks_unm.begin(), ref_data.tasks_unm.end() ); break; + default: break; } diff --git a/tests/xc_integrator.cxx b/tests/xc_integrator.cxx index 947a9914b..08f4ca214 100644 --- a/tests/xc_integrator.cxx +++ b/tests/xc_integrator.cxx @@ -188,7 +188,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, // Integrate Density if( check_integrate_den and rks) { - auto N_EL_ref = std::accumulate( mol.begin(), mol.end(), 0ul, + auto N_EL_ref = std::accumulate( mol.begin(), mol.end(), size_t{0}, [](const auto& a, const auto &b) { return a + b.Z.get(); }); auto N_EL = integrator.integrate_den( P ); // Factor of 2 b/c P is the alpha density for RKS