/** * Contains SSE2 and MMX versions of certain operations for real. * * Copyright: Copyright Digital Mars 2008 - 2016. * License: Distributed under the * $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0). * Authors: Walter Bright, based on code originally written by Burton Radons * Source: $(DRUNTIMESRC src/rt/_arrayreal.d) */ module rt.arrayreal; // debug=PRINTF import core.cpuid; import rt.util.array; version (unittest) { private import core.stdc.stdio : printf; /* This is so unit tests will test every CPU variant */ int cpuid; const int CPUID_MAX = 1; nothrow: @property bool mmx() { return cpuid == 1 && core.cpuid.mmx; } @property bool sse() { return cpuid == 2 && core.cpuid.sse; } @property bool sse2() { return cpuid == 3 && core.cpuid.sse2; } @property bool amd3dnow() { return cpuid == 4 && core.cpuid.amd3dnow; } } else { alias core.cpuid.mmx mmx; alias core.cpuid.sse sse; alias core.cpuid.sse2 sse2; alias core.cpuid.amd3dnow amd3dnow; } //version = log; alias real T; extern (C) @trusted nothrow: /* ======================================================================== */ /*********************** * Computes: * a[] = b[] + c[] */ T[] _arraySliceSliceAddSliceAssign_r(T[] a, T[] c, T[] b) { enforceTypedArraysConformable("vector operation", a, b); enforceTypedArraysConformable("vector operation", a, c); foreach (i; 0..a.length) a[i] = b[i] + c[i]; return a; } unittest { debug(PRINTF) printf("_arraySliceSliceAddSliceAssign_r unittest\n"); for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) { version (log) printf(" cpuid %d\n", cpuid); for (int j = 0; j < 2; j++) { const int dim = 67; T[] a = new T[dim + j]; // aligned on 16 byte boundary a = a[j .. dim + j]; // misalign for second iteration T[] b = new T[dim + j]; b = b[j .. dim + j]; T[] c = new T[dim + j]; c = c[j .. dim + j]; for (int i = 0; i < dim; i++) { a[i] = cast(T)i; b[i] = cast(T)(i + 7); c[i] = cast(T)(i * 2); } c[] = a[] + b[]; for (int i = 0; i < dim; i++) { if (c[i] != cast(T)(a[i] + b[i])) { printf("[%d]: %Lg != %Lg + %Lg\n", i, c[i], a[i], b[i]); assert(0); } } } } } /* ======================================================================== */ /*********************** * Computes: * a[] = b[] - c[] */ T[] _arraySliceSliceMinSliceAssign_r(T[] a, T[] c, T[] b) { enforceTypedArraysConformable("vector operation", a, b); enforceTypedArraysConformable("vector operation", a, c); foreach (i; 0..a.length) a[i] = b[i] - c[i]; return a; } unittest { debug(PRINTF) printf("_arraySliceSliceMinSliceAssign_r unittest\n"); for (cpuid = 0; cpuid < CPUID_MAX; cpuid++) { version (log) printf(" cpuid %d\n", cpuid); for (int j = 0; j < 2; j++) { const int dim = 67; T[] a = new T[dim + j]; // aligned on 16 byte boundary a = a[j .. dim + j]; // misalign for second iteration T[] b = new T[dim + j]; b = b[j .. dim + j]; T[] c = new T[dim + j]; c = c[j .. dim + j]; for (int i = 0; i < dim; i++) { a[i] = cast(T)i; b[i] = cast(T)(i + 7); c[i] = cast(T)(i * 2); } c[] = a[] - b[]; for (int i = 0; i < dim; i++) { if (c[i] != cast(T)(a[i] - b[i])) { printf("[%d]: %Lg != %Lg - %Lg\n", i, c[i], a[i], b[i]); assert(0); } } } } } /* ======================================================================== */ /*********************** * Computes: * a[] -= b[] * value */ T[] _arraySliceExpMulSliceMinass_r(T[] a, T value, T[] b) { return _arraySliceExpMulSliceAddass_r(a, -value, b); } /*********************** * Computes: * a[] += b[] * value */ T[] _arraySliceExpMulSliceAddass_r(T[] a, T value, T[] b) { enforceTypedArraysConformable("vector operation", a, b); auto aptr = a.ptr; auto aend = aptr + a.length; auto bptr = b.ptr; // Handle remainder while (aptr < aend) *aptr++ += *bptr++ * value; return a; } unittest { debug(PRINTF) printf("_arraySliceExpMulSliceAddass_r unittest\n"); cpuid = 1; { version (log) printf(" cpuid %d\n", cpuid); for (int j = 0; j < 1; j++) { const int dim = 67; T[] a = new T[dim + j]; // aligned on 16 byte boundary a = a[j .. dim + j]; // misalign for second iteration T[] b = new T[dim + j]; b = b[j .. dim + j]; T[] c = new T[dim + j]; c = c[j .. dim + j]; for (int i = 0; i < dim; i++) { a[i] = cast(T)i; b[i] = cast(T)(i + 7); c[i] = cast(T)(i * 2); } b[] = c[]; c[] += a[] * 6; for (int i = 0; i < dim; i++) { //printf("[%d]: %Lg ?= %Lg + %Lg * 6\n", i, c[i], b[i], a[i]); if (c[i] != cast(T)(b[i] + a[i] * 6)) { printf("[%d]: %Lg ?= %Lg + %Lg * 6\n", i, c[i], b[i], a[i]); assert(0); } } } } }