From 02ca32ae0a5bc290918d2b2a3288e385b9cc6b11 Mon Sep 17 00:00:00 2001
From: Ralph Amissah <ralph.amissah@gmail.com>
Date: Fri, 19 Feb 2021 17:10:51 -0500
Subject: external & build dependences in src tree

- external & build dependences boost licensed
  - ext_depends (external depends)
    - D-YAML
      - tinyendian
    - d2sqlite3
    - imageformats
  - build_depends
    - dub2nix
---
 src/ext_depends/tinyendian/source/tinyendian.d | 213 +++++++++++++++++++++++++
 1 file changed, 213 insertions(+)
 create mode 100644 src/ext_depends/tinyendian/source/tinyendian.d

(limited to 'src/ext_depends/tinyendian/source')

diff --git a/src/ext_depends/tinyendian/source/tinyendian.d b/src/ext_depends/tinyendian/source/tinyendian.d
new file mode 100644
index 0000000..731b048
--- /dev/null
+++ b/src/ext_depends/tinyendian/source/tinyendian.d
@@ -0,0 +1,213 @@
+//          Copyright Ferdinand Majerech 2014.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)
+
+/// A minimal library providing functionality for changing the endianness of data.
+module tinyendian;
+
+import std.system : Endian, endian;
+
+/// Unicode UTF encodings.
+enum UTFEncoding : ubyte
+{
+    UTF_8,
+    UTF_16,
+    UTF_32
+}
+///
+@safe unittest
+{
+    const ints = [314, -101];
+    int[2] intsSwapBuffer = ints;
+    swapByteOrder(intsSwapBuffer[]);
+    swapByteOrder(intsSwapBuffer[]);
+    assert(ints == intsSwapBuffer, "Lost information when swapping byte order");
+
+    const floats = [3.14f, 10.1f];
+    float[2] floatsSwapBuffer = floats;
+    swapByteOrder(floatsSwapBuffer[]);
+    swapByteOrder(floatsSwapBuffer[]);
+    assert(floats == floatsSwapBuffer, "Lost information when swapping byte order");
+}
+
+/** Swap byte order of items in an array in place.
+ *
+ * Params:
+ *
+ * T     = Item type. Must be either 2 or 4 bytes long.
+ * array = Buffer with values to fix byte order of.
+ */
+void swapByteOrder(T)(T[] array) @trusted @nogc pure nothrow
+if (T.sizeof == 2 || T.sizeof == 4)
+{
+    // Swap the byte order of all read characters.
+    foreach (ref item; array)
+    {
+        static if (T.sizeof == 2)
+        {
+            import std.algorithm.mutation : swap;
+            swap(*cast(ubyte*)&item, *(cast(ubyte*)&item + 1));
+        }
+        else static if (T.sizeof == 4)
+        {
+            import core.bitop : bswap;
+            const swapped = bswap(*cast(uint*)&item);
+            item = *cast(const(T)*)&swapped;
+        }
+        else static assert(false, "Unsupported T: " ~ T.stringof);
+    }
+}
+
+/// See fixUTFByteOrder.
+struct FixUTFByteOrderResult
+{
+    ubyte[] array;
+    UTFEncoding encoding;
+    Endian endian;
+    uint bytesStripped = 0;
+}
+
+/** Convert byte order of an array encoded in UTF(8/16/32) to system endianness in place.
+ *
+ * Uses the UTF byte-order-mark (BOM) to determine UTF encoding. If there is no BOM
+ * at the beginning of array, UTF-8 is assumed (this is compatible with ASCII). The
+ * BOM, if any, will be removed from the buffer.
+ *
+ * If the encoding is determined to be UTF-16 or UTF-32 and there aren't enough bytes
+ * for the last code unit (i.e. if array.length is odd for UTF-16 or not divisible by
+ * 4 for UTF-32), the extra bytes (1 for UTF-16, 1-3 for UTF-32) are stripped.
+ *
+ * Note that this function does $(B not) check if the array is a valid UTF string. It
+ * only works with the BOM and 1,2 or 4-byte items.
+ *
+ * Params:
+ *
+ * array = The array with UTF-data.
+ *
+ * Returns:
+ *
+ * A struct with the following members:
+ *
+ * $(D ubyte[] array)            A slice of the input array containing data in correct
+ *                               byte order, without BOM and in case of UTF-16/UTF-32,
+ *                               without stripped bytes, if any.
+ * $(D UTFEncoding encoding)     Encoding of the result (UTF-8, UTF-16 or UTF-32)
+ * $(D std.system.Endian endian) Endianness of the original array.
+ * $(D uint bytesStripped)       Number of bytes stripped from a UTF-16/UTF-32 array, if
+ *                               any. This is non-zero only if array.length was not
+ *                               divisible by 2 or 4 for UTF-16 and UTF-32, respectively.
+ *
+ * Complexity: (BIGOH array.length)
+ */
+auto fixUTFByteOrder(ubyte[] array) @safe @nogc pure nothrow
+{
+    // Enumerates UTF BOMs, matching indices to byteOrderMarks/bomEndian.
+    enum BOM: ubyte
+    {
+        UTF_8     = 0,
+        UTF_16_LE = 1,
+        UTF_16_BE = 2,
+        UTF_32_LE = 3,
+        UTF_32_BE = 4,
+        None      = ubyte.max
+    }
+
+    // These 2 are from std.stream
+    static immutable ubyte[][5] byteOrderMarks = [ [0xEF, 0xBB, 0xBF],
+                                                   [0xFF, 0xFE],
+                                                   [0xFE, 0xFF],
+                                                   [0xFF, 0xFE, 0x00, 0x00],
+                                                   [0x00, 0x00, 0xFE, 0xFF] ];
+    static immutable Endian[5] bomEndian = [ endian,
+                                             Endian.littleEndian,
+                                             Endian.bigEndian,
+                                             Endian.littleEndian, 
+                                             Endian.bigEndian ];
+
+    // Documented in function ddoc.
+
+    FixUTFByteOrderResult result;
+
+    // Detect BOM, if any, in the bytes we've read. -1 means no BOM.
+    // Need the last match: First 2 bytes of UTF-32LE BOM match the UTF-16LE BOM. If we
+    // used the first match, UTF-16LE would be detected when we have a UTF-32LE BOM.
+    import std.algorithm.searching : startsWith;
+    BOM bomId = BOM.None;
+    foreach (i, bom; byteOrderMarks)
+        if (array.startsWith(bom))
+            bomId = cast(BOM)i;
+
+    result.endian = (bomId != BOM.None) ? bomEndian[bomId] : Endian.init;
+
+    // Start of UTF data (after BOM, if any)
+    size_t start = 0;
+    // If we've read more than just the BOM, put the rest into the array.
+    with(BOM) final switch(bomId)
+    {
+        case None: result.encoding = UTFEncoding.UTF_8; break;
+        case UTF_8:
+            start = 3;
+            result.encoding = UTFEncoding.UTF_8;
+            break;
+        case UTF_16_LE, UTF_16_BE:
+            result.bytesStripped = array.length % 2;
+            start = 2;
+            result.encoding = UTFEncoding.UTF_16;
+            break;
+        case UTF_32_LE, UTF_32_BE:
+            result.bytesStripped = array.length % 4;
+            start = 4;
+            result.encoding = UTFEncoding.UTF_32;
+            break;
+    }
+
+    // If there's a BOM, we need to move data back to ensure it starts at array[0]
+    if (start != 0)
+    {
+        array = array[start .. $  - result.bytesStripped];
+    }
+
+    // We enforce above that array.length is divisible by 2/4 for UTF-16/32
+    if (endian != result.endian)
+    {
+        if (result.encoding == UTFEncoding.UTF_16)
+            swapByteOrder(cast(wchar[])array);
+        else if (result.encoding == UTFEncoding.UTF_32)
+            swapByteOrder(cast(dchar[])array);
+    }
+
+    result.array = array;
+    return result;
+}
+///
+@safe unittest
+{
+    {
+        ubyte[] s = [0xEF, 0xBB, 0xBF, 'a'];
+        FixUTFByteOrderResult r = fixUTFByteOrder(s);
+        assert(r.encoding == UTFEncoding.UTF_8);
+        assert(r.array.length == 1);
+        assert(r.array == ['a']);
+        assert(r.endian == Endian.littleEndian);
+    }
+
+    {
+        ubyte[] s = ['a'];
+        FixUTFByteOrderResult r = fixUTFByteOrder(s);
+        assert(r.encoding == UTFEncoding.UTF_8);
+        assert(r.array.length == 1);
+        assert(r.array == ['a']);
+        assert(r.endian == Endian.bigEndian);
+    }
+
+    {
+        // strip 'a' b/c not complete unit
+        ubyte[] s = [0xFE, 0xFF, 'a'];
+        FixUTFByteOrderResult r = fixUTFByteOrder(s);
+        assert(r.encoding == UTFEncoding.UTF_16);
+        assert(r.array.length == 0);
+        assert(r.endian == Endian.bigEndian);
+    }
+
+}
-- 
cgit v1.2.3