mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			Remove unused modules from libphobos std.internal package.
libphobos/ChangeLog: 2018-11-19 Iain Buclaw <ibuclaw@gdcproject.org> * src/Makefile.am: Remove std.internal.digest.sha_SSSE3 and std.internal.math.biguintx86 modules. * src/Makefile.in: Rebuild. * src/std/internal/digest/sha_SSSE3.d: Remove. * src/std/internal/math/biguintx86.d: Remove. From-SVN: r266256
This commit is contained in:
		
							parent
							
								
									bee39274cb
								
							
						
					
					
						commit
						6feee1e1b8
					
				|  | @ -1,3 +1,11 @@ | |||
| 2018-11-19  Iain Buclaw  <ibuclaw@gdcproject.org> | ||||
| 
 | ||||
| 	* src/Makefile.am: Remove std.internal.digest.sha_SSSE3 and | ||||
| 	std.internal.math.biguintx86 modules. | ||||
| 	* src/Makefile.in: Rebuild. | ||||
| 	* src/std/internal/digest/sha_SSSE3.d: Remove. | ||||
| 	* src/std/internal/math/biguintx86.d: Remove. | ||||
| 
 | ||||
| 2018-11-02  Iain Buclaw  <ibuclaw@gdcproject.org> | ||||
| 
 | ||||
| 	PR d/87827 | ||||
|  |  | |||
|  | @ -156,9 +156,8 @@ PHOBOS_DSOURCES = etc/c/curl.d etc/c/sqlite3.d etc/c/zlib.d \ | |||
| 	std/experimental/logger/multilogger.d \
 | ||||
| 	std/experimental/logger/nulllogger.d std/experimental/logger/package.d \
 | ||||
| 	std/experimental/typecons.d std/file.d std/format.d std/functional.d \
 | ||||
| 	std/getopt.d std/internal/cstring.d std/internal/digest/sha_SSSE3.d \
 | ||||
| 	std/internal/math/biguintcore.d std/internal/math/biguintnoasm.d \
 | ||||
| 	std/internal/math/biguintx86.d std/internal/math/errorfunction.d \
 | ||||
| 	std/getopt.d std/internal/cstring.d std/internal/math/biguintcore.d \
 | ||||
| 	std/internal/math/biguintnoasm.d std/internal/math/errorfunction.d \
 | ||||
| 	std/internal/math/gammafunction.d std/internal/scopebuffer.d \
 | ||||
| 	std/internal/test/dummyrange.d std/internal/test/range.d \
 | ||||
| 	std/internal/test/uda.d std/internal/unicode_comp.d \
 | ||||
|  |  | |||
|  | @ -193,10 +193,8 @@ am__objects_1 = etc/c/curl.lo etc/c/sqlite3.lo etc/c/zlib.lo \ | |||
| 	std/experimental/logger/package.lo \
 | ||||
| 	std/experimental/typecons.lo std/file.lo std/format.lo \
 | ||||
| 	std/functional.lo std/getopt.lo std/internal/cstring.lo \
 | ||||
| 	std/internal/digest/sha_SSSE3.lo \
 | ||||
| 	std/internal/math/biguintcore.lo \
 | ||||
| 	std/internal/math/biguintnoasm.lo \
 | ||||
| 	std/internal/math/biguintx86.lo \
 | ||||
| 	std/internal/math/errorfunction.lo \
 | ||||
| 	std/internal/math/gammafunction.lo std/internal/scopebuffer.lo \
 | ||||
| 	std/internal/test/dummyrange.lo std/internal/test/range.lo \
 | ||||
|  | @ -282,10 +280,8 @@ am__DEPENDENCIES_1 = etc/c/curl.t.lo etc/c/sqlite3.t.lo \ | |||
| 	std/experimental/logger/package.t.lo \
 | ||||
| 	std/experimental/typecons.t.lo std/file.t.lo std/format.t.lo \
 | ||||
| 	std/functional.t.lo std/getopt.t.lo std/internal/cstring.t.lo \
 | ||||
| 	std/internal/digest/sha_SSSE3.t.lo \
 | ||||
| 	std/internal/math/biguintcore.t.lo \
 | ||||
| 	std/internal/math/biguintnoasm.t.lo \
 | ||||
| 	std/internal/math/biguintx86.t.lo \
 | ||||
| 	std/internal/math/errorfunction.t.lo \
 | ||||
| 	std/internal/math/gammafunction.t.lo \
 | ||||
| 	std/internal/scopebuffer.t.lo \
 | ||||
|  | @ -392,10 +388,8 @@ am__DEPENDENCIES_4 = etc/c/curl.t.o etc/c/sqlite3.t.o etc/c/zlib.t.o \ | |||
| 	std/experimental/logger/package.t.o \
 | ||||
| 	std/experimental/typecons.t.o std/file.t.o std/format.t.o \
 | ||||
| 	std/functional.t.o std/getopt.t.o std/internal/cstring.t.o \
 | ||||
| 	std/internal/digest/sha_SSSE3.t.o \
 | ||||
| 	std/internal/math/biguintcore.t.o \
 | ||||
| 	std/internal/math/biguintnoasm.t.o \
 | ||||
| 	std/internal/math/biguintx86.t.o \
 | ||||
| 	std/internal/math/errorfunction.t.o \
 | ||||
| 	std/internal/math/gammafunction.t.o \
 | ||||
| 	std/internal/scopebuffer.t.o std/internal/test/dummyrange.t.o \
 | ||||
|  | @ -788,9 +782,8 @@ PHOBOS_DSOURCES = etc/c/curl.d etc/c/sqlite3.d etc/c/zlib.d \ | |||
| 	std/experimental/logger/multilogger.d \
 | ||||
| 	std/experimental/logger/nulllogger.d std/experimental/logger/package.d \
 | ||||
| 	std/experimental/typecons.d std/file.d std/format.d std/functional.d \
 | ||||
| 	std/getopt.d std/internal/cstring.d std/internal/digest/sha_SSSE3.d \
 | ||||
| 	std/internal/math/biguintcore.d std/internal/math/biguintnoasm.d \
 | ||||
| 	std/internal/math/biguintx86.d std/internal/math/errorfunction.d \
 | ||||
| 	std/getopt.d std/internal/cstring.d std/internal/math/biguintcore.d \
 | ||||
| 	std/internal/math/biguintnoasm.d std/internal/math/errorfunction.d \
 | ||||
| 	std/internal/math/gammafunction.d std/internal/scopebuffer.d \
 | ||||
| 	std/internal/test/dummyrange.d std/internal/test/range.d \
 | ||||
| 	std/internal/test/uda.d std/internal/unicode_comp.d \
 | ||||
|  | @ -1032,16 +1025,11 @@ std/internal/$(am__dirstamp): | |||
| 	@$(MKDIR_P) std/internal | ||||
| 	@: > std/internal/$(am__dirstamp) | ||||
| std/internal/cstring.lo: std/internal/$(am__dirstamp) | ||||
| std/internal/digest/$(am__dirstamp): | ||||
| 	@$(MKDIR_P) std/internal/digest | ||||
| 	@: > std/internal/digest/$(am__dirstamp) | ||||
| std/internal/digest/sha_SSSE3.lo: std/internal/digest/$(am__dirstamp) | ||||
| std/internal/math/$(am__dirstamp): | ||||
| 	@$(MKDIR_P) std/internal/math | ||||
| 	@: > std/internal/math/$(am__dirstamp) | ||||
| std/internal/math/biguintcore.lo: std/internal/math/$(am__dirstamp) | ||||
| std/internal/math/biguintnoasm.lo: std/internal/math/$(am__dirstamp) | ||||
| std/internal/math/biguintx86.lo: std/internal/math/$(am__dirstamp) | ||||
| std/internal/math/errorfunction.lo: std/internal/math/$(am__dirstamp) | ||||
| std/internal/math/gammafunction.lo: std/internal/math/$(am__dirstamp) | ||||
| std/internal/scopebuffer.lo: std/internal/$(am__dirstamp) | ||||
|  | @ -1174,8 +1162,6 @@ mostlyclean-compile: | |||
| 	-rm -f std/experimental/logger/*.lo | ||||
| 	-rm -f std/internal/*.$(OBJEXT) | ||||
| 	-rm -f std/internal/*.lo | ||||
| 	-rm -f std/internal/digest/*.$(OBJEXT) | ||||
| 	-rm -f std/internal/digest/*.lo | ||||
| 	-rm -f std/internal/math/*.$(OBJEXT) | ||||
| 	-rm -f std/internal/math/*.lo | ||||
| 	-rm -f std/internal/test/*.$(OBJEXT) | ||||
|  | @ -1401,7 +1387,6 @@ clean-libtool: | |||
| 	-rm -rf std/experimental/allocator/building_blocks/.libs std/experimental/allocator/building_blocks/_libs | ||||
| 	-rm -rf std/experimental/logger/.libs std/experimental/logger/_libs | ||||
| 	-rm -rf std/internal/.libs std/internal/_libs | ||||
| 	-rm -rf std/internal/digest/.libs std/internal/digest/_libs | ||||
| 	-rm -rf std/internal/math/.libs std/internal/math/_libs | ||||
| 	-rm -rf std/internal/test/.libs std/internal/test/_libs | ||||
| 	-rm -rf std/internal/windows/.libs std/internal/windows/_libs | ||||
|  | @ -1529,7 +1514,6 @@ distclean-generic: | |||
| 	-rm -f std/experimental/allocator/building_blocks/$(am__dirstamp) | ||||
| 	-rm -f std/experimental/logger/$(am__dirstamp) | ||||
| 	-rm -f std/internal/$(am__dirstamp) | ||||
| 	-rm -f std/internal/digest/$(am__dirstamp) | ||||
| 	-rm -f std/internal/math/$(am__dirstamp) | ||||
| 	-rm -f std/internal/test/$(am__dirstamp) | ||||
| 	-rm -f std/internal/windows/$(am__dirstamp) | ||||
|  |  | |||
|  | @ -1,729 +0,0 @@ | |||
| // Written in the D programming language.
 | ||||
| 
 | ||||
| /** | ||||
|  * Computes SHA1 digests of arbitrary data, using an optimized algorithm with SSSE3 instructions. | ||||
|  * | ||||
|  * Authors: | ||||
|  * The general idea is described by Dean Gaudet. | ||||
|  * Another important observation is published by Max Locktyukhin. | ||||
|  * (Both implementations are public domain.) | ||||
|  * Translation to X86 and D by Kai Nacke <kai@redstar.de> | ||||
|  * | ||||
|  * References: | ||||
|  *      $(LINK2 http://arctic.org/~dean/crypto/sha1.html)
 | ||||
|  *      $(LINK2 http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/, Fast implementation of SHA1)
 | ||||
|  */ | ||||
| module std.internal.digest.sha_SSSE3; | ||||
| 
 | ||||
| version (D_InlineAsm_X86) | ||||
| { | ||||
|     version (D_PIC) {} // Bugzilla 9378
 | ||||
|     else | ||||
|     { | ||||
|         private version = USE_SSSE3; | ||||
|         private version = _32Bit; | ||||
|     } | ||||
| } | ||||
| else version (D_InlineAsm_X86_64) | ||||
| { | ||||
|     private version = USE_SSSE3; | ||||
|     private version = _64Bit; | ||||
| } | ||||
| 
 | ||||
| /* | ||||
|  * The idea is quite simple. The SHA-1 specification defines the following message schedule: | ||||
|  *     W[i] = (W[i-3] ^ W[i-8]  ^ W[i-14] ^ W[i-16]) rol 1 | ||||
|  * | ||||
|  * To employ SSE, simply write down the formula four times: | ||||
|  *     W[i  ] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1 | ||||
|  *     W[i+1] = (W[i-2] ^ W[i-7] ^ W[i-13] ^ W[i-15]) rol 1 | ||||
|  *     W[i+2] = (W[i-1] ^ W[i-6] ^ W[i-12] ^ W[i-14]) rol 1 | ||||
|  *     W[i+3] = (W[i  ] ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1 | ||||
|  * The last formula requires value W[i] computed with the first formula. | ||||
|  * Because the xor operation and the rotate operation are commutative, we can replace the | ||||
|  * last formula with | ||||
|  *     W[i+3] = (     0 ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1 | ||||
|  * and then calculate | ||||
|  *     W[i+3] ^= W[i] rol 1 | ||||
|  * which unfortunately requires many additional operations. This approach was described by | ||||
|  * Dean Gaudet. | ||||
|  * | ||||
|  * Max Locktyukhin observed that | ||||
|  *     W[i] = W[i-A] ^ W[i-B] | ||||
|  * is equivalent to | ||||
|  *     W[i] = W[i-2*A] ^ W[i-2*B] | ||||
|  * (if the indices are still in valid ranges). Using this observation, the formula is | ||||
|  * translated to | ||||
|  *     W[i] = (W[i-6] ^ W[i-16] ^ W[i-28] ^ W[i-32]) rol 2 | ||||
|  * Again, to employ SSE the formula is used four times. | ||||
|  * | ||||
|  * Later on, the expression W[i] + K(i) is used. (K(i) is the constant used in round i.) | ||||
|  * Once the 4 W[i] are calculated, we can also add the four K(i) values with one SSE instruction. | ||||
|  * | ||||
|  * The 32bit and 64bit implementations are almost identical. The main difference is that there | ||||
|  * are only 8 XMM registers in 32bit mode. Therefore, space on the stack is needed to save | ||||
|  * computed values. | ||||
|  */ | ||||
| 
 | ||||
| version (USE_SSSE3) | ||||
| { | ||||
|     /* | ||||
|      * The general idea is to use the XMM registers as a sliding window over | ||||
|      * message schedule. XMM0 to XMM7 are used to store the last 64 byte of | ||||
|      * the message schedule. In 64 bit mode this is fine because of the number of | ||||
|      * registers. The main difference of the 32 bit code is that a part of the | ||||
|      * calculated message schedule is saved on the stack because 2 temporary | ||||
|      * registers are needed. | ||||
|      */ | ||||
| 
 | ||||
|     /* Number of message words we are precalculating. */ | ||||
|     private immutable int PRECALC_AHEAD = 16; | ||||
| 
 | ||||
|     /* T1 and T2 are used for intermediate results of computations. */ | ||||
|     private immutable string T1 = "EAX"; | ||||
|     private immutable string T2 = "EBX"; | ||||
| 
 | ||||
|     /* The registers used for the SHA-1 variables. */ | ||||
|     private immutable string A = "ECX"; | ||||
|     private immutable string B = "ESI"; | ||||
|     private immutable string C = "EDI"; | ||||
|     private immutable string D = "EBP"; | ||||
|     private immutable string E = "EDX"; | ||||
| 
 | ||||
|     /* */ | ||||
|     version (_32Bit) | ||||
|     { | ||||
|         private immutable string SP = "ESP"; | ||||
|         private immutable string BUFFER_PTR = "EAX"; | ||||
|         private immutable string STATE_PTR = "EBX"; | ||||
| 
 | ||||
|         // Control byte for shuffle instruction (only used in round 0-15)
 | ||||
|         private immutable string X_SHUFFLECTL = "XMM6"; | ||||
| 
 | ||||
|         // Round constant (only used in round 0-15)
 | ||||
|         private immutable string X_CONSTANT = "XMM7"; | ||||
|     } | ||||
|     version (_64Bit) | ||||
|     { | ||||
|         private immutable string SP = "RSP"; | ||||
|         private immutable string BUFFER_PTR = "R9"; | ||||
|         private immutable string STATE_PTR = "R8"; | ||||
|         private immutable string CONSTANTS_PTR = "R10"; | ||||
| 
 | ||||
|         // Registers for temporary results (XMM10 and XMM11 are also used temporary)
 | ||||
|         private immutable string W_TMP = "XMM8"; | ||||
|         private immutable string W_TMP2 = "XMM9"; | ||||
| 
 | ||||
|         // Control byte for shuffle instruction (only used in round 0-15)
 | ||||
|         private immutable string X_SHUFFLECTL = "XMM12"; | ||||
| 
 | ||||
|         // Round constant
 | ||||
|         private immutable string X_CONSTANT = "XMM13"; | ||||
|     } | ||||
| 
 | ||||
|     /* The control words for the byte shuffle instruction and the round constants. */ | ||||
|     align(16) public immutable uint[20] constants = | ||||
|     [ | ||||
|         // The control words for the byte shuffle instruction.
 | ||||
|         0x0001_0203, 0x0405_0607, 0x0809_0a0b, 0x0c0d_0e0f, | ||||
|         // Constants for round 0-19
 | ||||
|         0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999, | ||||
|         // Constants for round 20-39
 | ||||
|         0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, | ||||
|         // Constants for round 40-59
 | ||||
|         0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, | ||||
|         // Constants for round 60-79
 | ||||
|         0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 | ||||
|     ]; | ||||
| 
 | ||||
|     /** Simple version to produce numbers < 100 as string. */ | ||||
|     private nothrow pure string to_string(uint i) | ||||
|     { | ||||
|         if (i < 10) | ||||
|             return "0123456789"[i .. i + 1]; | ||||
| 
 | ||||
|         assert(i < 100); | ||||
|         char[2] s; | ||||
|         s[0] = cast(char)(i / 10 + '0'); | ||||
|         s[1] = cast(char)(i % 10 + '0'); | ||||
|         return s.idup; | ||||
|     } | ||||
| 
 | ||||
|     /** Returns the reference to the byte shuffle control word. */ | ||||
|     private nothrow pure string bswap_shufb_ctl() | ||||
|     { | ||||
|         version (_64Bit) | ||||
|             return "["~CONSTANTS_PTR~"]"; | ||||
|         else | ||||
|             return "[constants]"; | ||||
|     } | ||||
| 
 | ||||
|     /** Returns the reference to constant used in round i. */ | ||||
|     private nothrow pure string constant(uint i) | ||||
|     { | ||||
|         version (_64Bit) | ||||
|             return "16 + 16*"~to_string(i/20)~"["~CONSTANTS_PTR~"]"; | ||||
|         else | ||||
|             return "[constants + 16 + 16*"~to_string(i/20)~"]"; | ||||
|     } | ||||
| 
 | ||||
|     /** Returns the XMM register number used in round i */ | ||||
|     private nothrow pure uint regno(uint i) | ||||
|     { | ||||
|         return (i/4)&7; | ||||
|     } | ||||
| 
 | ||||
|     /** Returns reference to storage of vector W[i .. i+4]. */ | ||||
|     private nothrow pure string WiV(uint i) | ||||
|     { | ||||
|         return "["~SP~" + WI_PTR + "~to_string((i/4)&7)~"*16]"; | ||||
|     } | ||||
| 
 | ||||
|     /** Returns reference to storage of vector (W + K)[i .. i+4]. */ | ||||
|     private nothrow pure string WiKiV(uint i) | ||||
|     { | ||||
|         return "["~SP~" + WI_PLUS_KI_PTR + "~to_string((i/4)&3)~"*16]"; | ||||
|     } | ||||
| 
 | ||||
|     /** Returns reference to storage of value W[i] + K[i]. */ | ||||
|     private nothrow pure string WiKi(uint i) | ||||
|     { | ||||
|         return "["~SP~" + WI_PLUS_KI_PTR + 4*"~to_string(i&15)~"]"; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Chooses the instruction sequence based on the 32bit or 64bit model. | ||||
|      */ | ||||
|     private nothrow pure string[] swt3264(string[] insn32, string[] insn64) | ||||
|     { | ||||
|         version (_32Bit) | ||||
|         { | ||||
|             return insn32; | ||||
|         } | ||||
|         version (_64Bit) | ||||
|         { | ||||
|             return insn64; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Flattens the instruction sequence and wraps it in an asm block. | ||||
|      */ | ||||
|     private nothrow pure string wrap(string[] insn) | ||||
|     { | ||||
|         string s = "asm pure nothrow @nogc {"; | ||||
|         foreach (t; insn) s ~= (t ~ "; \n"); | ||||
|         s ~= "}"; | ||||
|         return s; | ||||
|         // Is not CTFE:
 | ||||
|         // return "asm pure nothrow @nogc { " ~ join(insn, "; \n") ~ "}";
 | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Weaves the 2 instruction sequences together. | ||||
|      */ | ||||
|     private nothrow pure string[] weave(string[] seq1, string[] seq2, uint dist = 1) | ||||
|     { | ||||
|         string[] res = []; | ||||
|         auto i1 = 0, i2 = 0; | ||||
|         while (i1 < seq1.length || i2 < seq2.length) | ||||
|         { | ||||
|             if (i2 < seq2.length) | ||||
|             { | ||||
|                 res ~= seq2[i2 .. i2+1]; | ||||
|                 i2 += 1; | ||||
|             } | ||||
|             if (i1 < seq1.length) | ||||
|             { | ||||
|                 import std.algorithm.comparison : min; | ||||
| 
 | ||||
|                 res ~= seq1[i1 .. min(i1+dist, $)]; | ||||
|                 i1 += dist; | ||||
|             } | ||||
|         } | ||||
|         return res; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Generates instructions to load state from memory into registers. | ||||
|      */ | ||||
|     private nothrow pure string[] loadstate(string base, string a, string b, string c, string d, string e) | ||||
|     { | ||||
|         return ["mov "~a~",["~base~" + 0*4]", | ||||
|                 "mov "~b~",["~base~" + 1*4]", | ||||
|                 "mov "~c~",["~base~" + 2*4]", | ||||
|                 "mov "~d~",["~base~" + 3*4]", | ||||
|                 "mov "~e~",["~base~" + 4*4]" ]; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Generates instructions to update state from registers, saving result in memory. | ||||
|      */ | ||||
|     private nothrow pure string[] savestate(string base, string a, string b, string c, string d, string e) | ||||
|     { | ||||
|         return ["add ["~base~" + 0*4],"~a, | ||||
|                 "add ["~base~" + 1*4],"~b, | ||||
|                 "add ["~base~" + 2*4],"~c, | ||||
|                 "add ["~base~" + 3*4],"~d, | ||||
|                 "add ["~base~" + 4*4],"~e ]; | ||||
|     } | ||||
| 
 | ||||
|     /** Calculates Ch(x, y, z) = z ^ (x & (y ^ z)) */ | ||||
|     private nothrow pure string[] Ch(string x, string y, string z) | ||||
|     { | ||||
|         return ["mov "~T1~","~y, | ||||
|                 "xor "~T1~","~z, | ||||
|                 "and "~T1~","~x, | ||||
|                 "xor "~T1~","~z ]; | ||||
|     } | ||||
| 
 | ||||
|     /** Calculates Parity(x, y, z) = x ^ y ^ z */ | ||||
|     private nothrow pure string[] Parity(string x, string y, string z) | ||||
|     { | ||||
|         return ["mov "~T1~","~z, | ||||
|                 "xor "~T1~","~y, | ||||
|                 "xor "~T1~","~x ]; | ||||
|     } | ||||
| 
 | ||||
|     /** Calculates Maj(x, y, z) = (x & y) | (z & (x ^ y)) */ | ||||
|     private nothrow pure string[] Maj(string x, string y, string z) | ||||
|     { | ||||
|         return ["mov "~T1~","~y, | ||||
|                 "mov "~T2~","~x, | ||||
|                 "or  "~T1~","~x, | ||||
|                 "and "~T2~","~y, | ||||
|                 "and "~T1~","~z, | ||||
|                 "or  "~T1~","~T2 ]; | ||||
|     } | ||||
| 
 | ||||
|     /** Returns function for round i. Function returns result in T1 and may destroy T2. */ | ||||
|     private nothrow pure string[] F(int i, string b, string c, string d) | ||||
|     { | ||||
|         string[] insn; | ||||
|         if (i >=  0 && i <= 19) insn = Ch(b, c, d); | ||||
|         else if (i >= 20 && i <= 39) insn = Parity(b, c, d); | ||||
|         else if (i >= 40 && i <= 59) insn = Maj(b, c, d); | ||||
|         else if (i >= 60 && i <= 79) insn = Parity(b, c, d); | ||||
|         else assert(false, "Coding error"); | ||||
|         return insn; | ||||
|     } | ||||
| 
 | ||||
|     /** Returns instruction used to setup a round. */ | ||||
|     private nothrow pure string[] xsetup(int i) | ||||
|     { | ||||
|         if (i == 0) | ||||
|         { | ||||
|             return swt3264(["movdqa "~X_SHUFFLECTL~","~bswap_shufb_ctl(), | ||||
|                              "movdqa "~X_CONSTANT~","~constant(i)], | ||||
|                             ["movdqa "~X_SHUFFLECTL~","~bswap_shufb_ctl(), | ||||
|                              "movdqa "~X_CONSTANT~","~constant(i)]); | ||||
|         } | ||||
|         version (_64Bit) | ||||
|         { | ||||
|             if (i%20 == 0) | ||||
|             { | ||||
|                 return ["movdqa "~X_CONSTANT~","~constant(i)]; | ||||
|             } | ||||
|         } | ||||
|         return []; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Loads the message words and performs the little to big endian conversion. | ||||
|      * Requires that the shuffle control word and the round constant is loaded | ||||
|      * into required XMM register. The BUFFER_PTR register must point to the | ||||
|      * buffer. | ||||
|      */ | ||||
|     private nothrow pure string[] precalc_00_15(int i) | ||||
|     { | ||||
|         int regno = regno(i); | ||||
| 
 | ||||
|         string W = "XMM" ~ to_string(regno); | ||||
|         version (_32Bit) | ||||
|         { | ||||
|             string W_TMP = "XMM" ~ to_string(regno+2); | ||||
|         } | ||||
|         version (_64Bit) | ||||
|         { | ||||
|             string W_TMP = "XMM" ~ to_string(regno+8); | ||||
|         } | ||||
| 
 | ||||
|         if ((i & 3) == 0) | ||||
|         { | ||||
|             return ["movdqu "~W~",["~BUFFER_PTR~" + "~to_string(regno)~"*16]"]; | ||||
|         } | ||||
|         else if ((i & 3) == 1) | ||||
|         { | ||||
|             return ["pshufb "~W~","~X_SHUFFLECTL] ~ | ||||
|                     swt3264(["movdqa "~WiV(i)~","~W], []); | ||||
|         } | ||||
|         else if ((i & 3) == 2) | ||||
|         { | ||||
|             return ["movdqa "~W_TMP~","~W, | ||||
|                     "paddd "~W_TMP~","~X_CONSTANT, | ||||
|                    ]; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return ["movdqa "~WiKiV(i)~","~W_TMP, | ||||
|                    ]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Done on 4 consequtive W[i] values in a single XMM register | ||||
|      *  W[i  ] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1 | ||||
|      *  W[i+1] = (W[i-2] ^ W[i-7] ^ W[i-13] ^ W[i-15]) rol 1 | ||||
|      *  W[i+2] = (W[i-1] ^ W[i-6] ^ W[i-12] ^ W[i-14]) rol 1 | ||||
|      *  W[i+3] = (   0   ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1 | ||||
|      * | ||||
|      * This additional calculation unfortunately requires many additional operations | ||||
|      *  W[i+3] ^= W[i] rol 1 | ||||
|      * | ||||
|      * Once we have 4 W[i] values in XMM we can also add four K values with one instruction | ||||
|      *   W[i:i+3] += {K,K,K,K} | ||||
|      */ | ||||
|     private nothrow pure string[] precalc_16_31(int i) | ||||
|     { | ||||
|         int regno = regno(i); | ||||
| 
 | ||||
|         string W = "XMM" ~ to_string(regno); | ||||
|         string W_minus_4 = "XMM" ~ to_string((regno-1)&7); | ||||
|         string W_minus_8 = "XMM" ~ to_string((regno-2)&7); | ||||
|         string W_minus_12 = "XMM" ~ to_string((regno-3)&7); | ||||
|         string W_minus_16 = "XMM" ~ to_string((regno-4)&7); | ||||
|         version (_32Bit) | ||||
|         { | ||||
|             string W_TMP = "XMM" ~ to_string((regno+1)&7); | ||||
|             string W_TMP2 = "XMM" ~ to_string((regno+2)&7); | ||||
|         } | ||||
| 
 | ||||
|         if ((i & 3) == 0) | ||||
|         { | ||||
|             return ["movdqa "~W~","~W_minus_12, | ||||
|                     "palignr "~W~","~W_minus_16~",8",   // W[i] = W[i-14]
 | ||||
|                     "pxor "~W~","~W_minus_16,           // W[i] ^= W[i-16]
 | ||||
|                     "pxor "~W~","~W_minus_8,            // W[i] ^= W[i-8]
 | ||||
|                     "movdqa "~W_TMP~","~W_minus_4, | ||||
|             ]; | ||||
|         } | ||||
|         else if ((i & 3) == 1) | ||||
|         { | ||||
|             return ["psrldq "~W_TMP~",4",               // W[i-3]
 | ||||
|                     "pxor "~W~","~W_TMP,                // W[i] ^= W[i-3]
 | ||||
|                     "movdqa "~W_TMP~","~W, | ||||
|                     "psrld "~W~",31", | ||||
|                     "pslld "~W_TMP~",1", | ||||
|             ]; | ||||
|         } | ||||
|         else if ((i & 3) == 2) | ||||
|         { | ||||
|             return ["por "~W~","~W_TMP, | ||||
|                     "movdqa "~W_TMP~","~W, | ||||
|                     "pslldq "~W_TMP~",12", | ||||
|                     "movdqa "~W_TMP2~","~W_TMP, | ||||
|                     "pslld "~W_TMP~",1", | ||||
|             ]; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return ["psrld "~W_TMP2~",31", | ||||
|                     "por "~W_TMP~","~W_TMP2, | ||||
|                     "pxor "~W~","~W_TMP, | ||||
|                     "movdqa "~W_TMP~","~W ] ~ | ||||
|                    swt3264(["movdqa "~WiV(i)~","~W, | ||||
|                             "paddd "~W_TMP~","~constant(i) ], | ||||
|                            ["paddd "~W_TMP~","~X_CONSTANT ]) ~ | ||||
|                    ["movdqa "~WiKiV(i)~","~W_TMP]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /** Performs the main calculation as decribed above. */ | ||||
|     private nothrow pure string[] precalc_32_79(int i) | ||||
|     { | ||||
|         int regno = regno(i); | ||||
| 
 | ||||
|         string W = "XMM" ~ to_string(regno); | ||||
|         string W_minus_4 = "XMM" ~ to_string((regno-1)&7); | ||||
|         string W_minus_8 = "XMM" ~ to_string((regno-2)&7); | ||||
|         string W_minus_16 = "XMM" ~ to_string((regno-4)&7); | ||||
|         version (_32Bit) | ||||
|         { | ||||
|             string W_minus_28 = "[ESP + WI_PTR + "~ to_string((regno-7)&7)~"*16]"; | ||||
|             string W_minus_32 = "[ESP + WI_PTR + "~ to_string((regno-8)&7)~"*16]"; | ||||
|             string W_TMP = "XMM" ~ to_string((regno+1)&7); | ||||
|             string W_TMP2 = "XMM" ~ to_string((regno+2)&7); | ||||
|         } | ||||
|         version (_64Bit) | ||||
|         { | ||||
|             string W_minus_28 = "XMM" ~ to_string((regno-7)&7); | ||||
|             string W_minus_32 = "XMM" ~ to_string((regno-8)&7); | ||||
|         } | ||||
| 
 | ||||
|         if ((i & 3) == 0) | ||||
|         { | ||||
|             return swt3264(["movdqa "~W~","~W_minus_32], []) ~ | ||||
|                    ["movdqa "~W_TMP~","~W_minus_4, | ||||
|                     "pxor "~W~","~W_minus_28,         // W is W_minus_32 before xor
 | ||||
|                     "palignr "~W_TMP~","~W_minus_8~",8", | ||||
|             ]; | ||||
|         } | ||||
|         else if ((i & 3) == 1) | ||||
|         { | ||||
|             return ["pxor "~W~","~W_minus_16, | ||||
|                     "pxor "~W~","~W_TMP, | ||||
|                     "movdqa "~W_TMP~","~W, | ||||
|             ]; | ||||
|         } | ||||
|         else if ((i & 3) == 2) | ||||
|         { | ||||
|             return ["psrld "~W~",30", | ||||
|                     "pslld "~W_TMP~",2", | ||||
|                     "por "~W_TMP~","~W, | ||||
|             ]; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             if (i < 76) | ||||
|                 return ["movdqa "~W~","~W_TMP] ~ | ||||
|                        swt3264(["movdqa "~WiV(i)~","~W, | ||||
|                                 "paddd "~W_TMP~","~constant(i)], | ||||
|                                ["paddd "~W_TMP~","~X_CONSTANT]) ~ | ||||
|                        ["movdqa "~WiKiV(i)~","~W_TMP]; | ||||
|             else | ||||
|                 return swt3264(["paddd "~W_TMP~","~constant(i)], | ||||
|                                ["paddd "~W_TMP~","~X_CONSTANT]) ~ | ||||
|                        ["movdqa "~WiKiV(i)~","~W_TMP]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /** Choose right precalc method. */ | ||||
|     private nothrow pure string[] precalc(int i) | ||||
|     { | ||||
|         if (i >= 0 && i < 16) return precalc_00_15(i); | ||||
|         if (i >= 16 && i < 32) return precalc_16_31(i); | ||||
|         if (i >= 32 && i < 80) return precalc_32_79(i); | ||||
|         return []; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Return code for round i and i+1. | ||||
|      * Performs the following rotation: | ||||
|      * in=>out: A=>D, B=>E, C=>A, D=>B, E=>C | ||||
|      */ | ||||
|     private nothrow pure string[] round(int i, string a, string b, string c, string d, string e) | ||||
|     { | ||||
|         return xsetup(PRECALC_AHEAD + i) ~ | ||||
|                weave(F(i, b, c, d) ~ // Returns result in T1; may destroy T2
 | ||||
|                ["add "~e~","~WiKi(i), | ||||
|                 "ror "~b~",2", | ||||
|                 "mov "~T2~","~a, | ||||
|                 "add "~d~","~WiKi(i+1), | ||||
|                 "rol "~T2~",5", | ||||
|                 "add "~e~","~T1 ], | ||||
|                 precalc(PRECALC_AHEAD + i), 2) ~ | ||||
|                weave( | ||||
|                ["add "~T2~","~e,  // T2 = (A <<< 5) + F(B, C, D) + Wi + Ki + E
 | ||||
|                 "mov "~e~","~T2, | ||||
|                 "rol "~T2~",5", | ||||
|                 "add "~d~","~T2 ] ~ | ||||
|                F(i+1, a, b, c) ~ // Returns result in T1; may destroy T2
 | ||||
|                ["add "~d~","~T1, | ||||
|                 "ror "~a~",2"], | ||||
|                 precalc(PRECALC_AHEAD + i+1), 2); | ||||
|     } | ||||
| 
 | ||||
|     // Offset into stack (see below)
 | ||||
|     version (_32Bit) | ||||
|     { | ||||
|         private enum { STATE_OFS = 4, WI_PLUS_KI_PTR = 8, WI_PTR = 72 }; | ||||
|     } | ||||
|     version (_64Bit) | ||||
|     { | ||||
|         private enum { WI_PLUS_KI_PTR = 0 }; | ||||
|     } | ||||
| 
 | ||||
|     /** The prologue sequence. */ | ||||
|     private nothrow pure string[] prologue() | ||||
|     { | ||||
|         version (_32Bit) | ||||
|         { | ||||
|             /* | ||||
|              * Parameters: | ||||
|              *   EAX contains pointer to input buffer | ||||
|              * | ||||
|              * Stack layout as follows: | ||||
|              * +----------------+ | ||||
|              * | ptr to state   | | ||||
|              * +----------------+ | ||||
|              * | return address | | ||||
|              * +----------------+ | ||||
|              * | EBP            | | ||||
|              * +----------------+ | ||||
|              * | ESI            | | ||||
|              * +----------------+ | ||||
|              * | EDI            | | ||||
|              * +----------------+ | ||||
|              * | EBX            | | ||||
|              * +----------------+ | ||||
|              * | Space for      | | ||||
|              * | Wi             | <- ESP+72 | ||||
|              * +----------------+ | ||||
|              * | Space for      | | ||||
|              * | Wi+Ki          | <- ESP+8 | ||||
|              * +----------------+ <- 16byte aligned | ||||
|              * | ptr to state   | <- ESP+4 | ||||
|              * +----------------+ | ||||
|              * | old ESP        | <- ESP | ||||
|              * +----------------+ | ||||
|              */ | ||||
|             static assert(BUFFER_PTR == "EAX"); | ||||
|             static assert(STATE_PTR == "EBX"); | ||||
|             return [// Save registers according to calling convention
 | ||||
|                     "push EBP", | ||||
|                     "push ESI", | ||||
|                     "push EDI", | ||||
|                     "push EBX", | ||||
|                     // Load parameters
 | ||||
|                     "mov EBX, [ESP + 5*4]", //pointer to state
 | ||||
|                     // Align stack
 | ||||
|                     "mov EBP, ESP", | ||||
|                     "sub ESP, 4*16 + 8*16", | ||||
|                     "and ESP, 0xffff_fff0", | ||||
|                     "push EBX", | ||||
|                     "push EBP", | ||||
|             ]; | ||||
|         } | ||||
|         version (_64Bit) | ||||
|         { | ||||
|             /* | ||||
|              * Parameters: | ||||
|              *   RDX contains pointer to state | ||||
|              *   RSI contains pointer to input buffer | ||||
|              *   RDI contains pointer to constants | ||||
|              * | ||||
|              * Stack layout as follows: | ||||
|              * +----------------+ | ||||
|              * | return address | | ||||
|              * +----------------+ | ||||
|              * | RBP            | | ||||
|              * +----------------+ | ||||
|              * | RBX            | | ||||
|              * +----------------+ | ||||
|              * | Unused         | | ||||
|              * +----------------+ | ||||
|              * | Space for      | | ||||
|              * | Wi+Ki          | <- RSP | ||||
|              * +----------------+ <- 16byte aligned | ||||
|              */ | ||||
|             return [// Save registers according to calling convention
 | ||||
|                     "push RBP", | ||||
|                     "push RBX", | ||||
|                     // Save parameters
 | ||||
|                     "mov "~STATE_PTR~", RDX", //pointer to state
 | ||||
|                     "mov "~BUFFER_PTR~", RSI", //pointer to buffer
 | ||||
|                     "mov "~CONSTANTS_PTR~", RDI", //pointer to constants to avoid absolute addressing
 | ||||
|                     // Align stack
 | ||||
|                     "sub RSP, 4*16+8", | ||||
|             ]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|       * The epilogue sequence. Just pop the saved registers from stack and return to caller. | ||||
|       */ | ||||
|     private nothrow pure string[] epilogue() | ||||
|     { | ||||
|         version (_32Bit) | ||||
|         { | ||||
|             return ["pop ESP", | ||||
|                     "pop EBX", | ||||
|                     "pop EDI", | ||||
|                     "pop ESI", | ||||
|                     "pop EBP", | ||||
|                     "ret 4", | ||||
|                    ]; | ||||
|         } | ||||
|         version (_64Bit) | ||||
|         { | ||||
|             return ["add RSP,4*16+8", | ||||
|                     "pop RBX", | ||||
|                     "pop RBP", | ||||
|                     "ret 0", | ||||
|                    ]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // constants as extra argument for PIC, see Bugzilla 9378
 | ||||
|     import std.meta : AliasSeq; | ||||
|     version (_64Bit) | ||||
|         alias ExtraArgs = AliasSeq!(typeof(&constants)); | ||||
|     else | ||||
|         alias ExtraArgs = AliasSeq!(); | ||||
| 
 | ||||
|     /** | ||||
|      * | ||||
|      */ | ||||
|     public void transformSSSE3(uint[5]* state, const(ubyte[64])* buffer, ExtraArgs) pure nothrow @nogc | ||||
|     { | ||||
|         mixin(wrap(["naked;"] ~ prologue())); | ||||
|         // Precalc first 4*16=64 bytes
 | ||||
|         mixin(wrap(xsetup(0))); | ||||
|         mixin(wrap(weave(precalc(0)~precalc(1)~precalc(2)~precalc(3), | ||||
|                          precalc(4)~precalc(5)~precalc(6)~precalc(7)))); | ||||
|         mixin(wrap(weave(loadstate(STATE_PTR, A, B, C, D, E), | ||||
|                    weave(precalc(8)~precalc(9)~precalc(10)~precalc(11), | ||||
|                          precalc(12)~precalc(13)~precalc(14)~precalc(15))))); | ||||
|         // Round 1
 | ||||
|         mixin(wrap(round( 0, A, B, C, D, E))); | ||||
|         mixin(wrap(round( 2, D, E, A, B, C))); | ||||
|         mixin(wrap(round( 4, B, C, D, E, A))); | ||||
|         mixin(wrap(round( 6, E, A, B, C, D))); | ||||
|         mixin(wrap(round( 8, C, D, E, A, B))); | ||||
|         mixin(wrap(round(10, A, B, C, D, E))); | ||||
|         mixin(wrap(round(12, D, E, A, B, C))); | ||||
|         mixin(wrap(round(14, B, C, D, E, A))); | ||||
|         mixin(wrap(round(16, E, A, B, C, D))); | ||||
|         mixin(wrap(round(18, C, D, E, A, B))); | ||||
|         // Round 2
 | ||||
|         mixin(wrap(round(20, A, B, C, D, E))); | ||||
|         mixin(wrap(round(22, D, E, A, B, C))); | ||||
|         mixin(wrap(round(24, B, C, D, E, A))); | ||||
|         mixin(wrap(round(26, E, A, B, C, D))); | ||||
|         mixin(wrap(round(28, C, D, E, A, B))); | ||||
|         mixin(wrap(round(30, A, B, C, D, E))); | ||||
|         mixin(wrap(round(32, D, E, A, B, C))); | ||||
|         mixin(wrap(round(34, B, C, D, E, A))); | ||||
|         mixin(wrap(round(36, E, A, B, C, D))); | ||||
|         mixin(wrap(round(38, C, D, E, A, B))); | ||||
|         // Round 3
 | ||||
|         mixin(wrap(round(40, A, B, C, D, E))); | ||||
|         mixin(wrap(round(42, D, E, A, B, C))); | ||||
|         mixin(wrap(round(44, B, C, D, E, A))); | ||||
|         mixin(wrap(round(46, E, A, B, C, D))); | ||||
|         mixin(wrap(round(48, C, D, E, A, B))); | ||||
|         mixin(wrap(round(50, A, B, C, D, E))); | ||||
|         mixin(wrap(round(52, D, E, A, B, C))); | ||||
|         mixin(wrap(round(54, B, C, D, E, A))); | ||||
|         mixin(wrap(round(56, E, A, B, C, D))); | ||||
|         mixin(wrap(round(58, C, D, E, A, B))); | ||||
|         // Round 4
 | ||||
|         mixin(wrap(round(60, A, B, C, D, E))); | ||||
|         mixin(wrap(round(62, D, E, A, B, C))); | ||||
|         mixin(wrap(round(64, B, C, D, E, A))); | ||||
|         mixin(wrap(round(66, E, A, B, C, D))); | ||||
|         mixin(wrap(round(68, C, D, E, A, B))); | ||||
|         mixin(wrap(round(70, A, B, C, D, E))); | ||||
|         mixin(wrap(round(72, D, E, A, B, C))); | ||||
|         mixin(wrap(round(74, B, C, D, E, A))); | ||||
|         mixin(wrap(round(76, E, A, B, C, D))); | ||||
|         mixin(wrap(round(78, C, D, E, A, B))); | ||||
|         version (_32Bit) | ||||
|         { | ||||
|             // Load pointer to state
 | ||||
|             mixin(wrap(["mov "~STATE_PTR~",[ESP + STATE_OFS]"])); | ||||
|         } | ||||
|         mixin(wrap(savestate(STATE_PTR, A, B, C, D, E))); | ||||
|         mixin(wrap(epilogue())); | ||||
|     } | ||||
| } | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	 Iain Buclaw
						Iain Buclaw