| | 1 | = strlen = |
| | 2 | |
| | 3 | == Variants == |
| | 4 | |
| | 5 | ||= '''Name''' =||= '''Description''' =|| |
| | 6 | || stock || MD amd64 version {{rep stosq}} || |
| | 7 | || SSE2 || {{{movdqu}}} for block-store || |
| | 8 | || SSE2 aligned || {{{movaps}}} for aligned block-store and {{{movdqu}}} for unaligned || |
| | 9 | || AVX 128 || 128-bit {{{vmovdqu}}} for block-store || |
| | 10 | || AVX 256 || 256-bit {{{vmovdqu}}} for block-store || |
| | 11 | || ERMS || {{{repne stosb}}} for machines with ERMS || |
| | 12 | |
| | 13 | == Machines Tested == |
| | 14 | |
| | 15 | ||= '''CPU''' =||= '''Speed (GHz)''' =||= '''Notes''' =|| |
| | 16 | || AMD FX-8120 || 3.11 || 1 x 8 zoo.freebsd.org || |
| | 17 | || AMD Opteron 6328 || 3.20 || 2 x 8 Supermicro H8DG6/H8DGi || |
| | 18 | || Intel Xeon X5365 || 3.00 || 2 x 4 Supermicro X7DBU || |
| | 19 | || Intel Xeon X5482 || 3.20 || 2 x 4 Supermicro X7DWN+ || |
| | 20 | || Intel Xeon X5675 || 3.07 || Westmere 2 x 6 Supermicro X8DTU || |
| | 21 | || Intel Core i5-2520M || 2.50 || Sandy Bridge 1 x 4 Thinkpad X220 (4286) || |
| | 22 | || Intel Core i5-2500K || 3.30 || Sandy Bridge 1 x 4 MSI Z77A-G45 (MS-7752) || |
| | 23 | || Intel Xeon E5-2680 || 2.70 || Romley 2 x 8 Supermicro X9DRW || |
| | 24 | || Intel Xeon E5-2667 v2 || 3.30 || Romley V2 2 x 8 Supermicro X9DRW (supports ERMS) || |
| | 25 | |
| | 26 | == Test Cases == |
| | 27 | |
| | 28 | ||= '''Name''' =||= '''Description''' =|| |
| | 29 | || page || set page to 0xa5 || |
| | 30 | || short || set aligned 15 bytes to 0xa5 || |
| | 31 | || short2 || set aligned 32 bytes to 0xa5 || |
| | 32 | || short3 || set aligned 48 bytes to 0xa5 || |
| | 33 | || offset || set misaligned ( + 4) 128 bytes to 0 || |
| | 34 | || offset2 || set misaligned ( + 7) 97 bytes to 0 || |
| | 35 | |
| | 36 | == Results == |
| | 37 | |
| | 38 | The numbers are the min value in the distribution where the values are a TSC delta across a single invocation of the test. |
| | 39 | |
| | 40 | Bold indicates the lowest time among the given variations in a Test and CPU combination. Green text is used for times faster than the stock implementation, and red text is used for times slower than the stock implementation. |
| | 41 | |
| | 42 | {{{#!th rowspan=3 |
| | 43 | '''CPU''' |
| | 44 | }}} |
| | 45 | {{{#!th colspan=36 |
| | 46 | '''Test / Variant''' |
| | 47 | }}} |
| | 48 | |-- |
| | 49 | {{{#!th colspan=6 |
| | 50 | '''page''' |
| | 51 | }}} |
| | 52 | {{{#!th colspan=6 |
| | 53 | '''short''' |
| | 54 | }}} |
| | 55 | {{{#!th colspan=6 |
| | 56 | '''short2''' |
| | 57 | }}} |
| | 58 | {{{#!th colspan=6 |
| | 59 | '''short3''' |
| | 60 | }}} |
| | 61 | {{{#!th colspan=6 |
| | 62 | '''offset''' |
| | 63 | }}} |
| | 64 | {{{#!th colspan=6 |
| | 65 | '''offset2''' |
| | 66 | }}} |
| | 67 | |-- |
| | 68 | ||= '''stock''' =||= '''SSE2''' =||= '''SSSE2 aligned''' =||= '''AVX 128''' =||= '''AVX 256''' =||= '''ERMS''' =|| \ |
| | 69 | ||= '''stock''' =||= '''SSE2''' =||= '''SSSE2 aligned''' =||= '''AVX 128''' =||= '''AVX 256''' =||= '''ERMS''' =|| \ |
| | 70 | ||= '''stock''' =||= '''SSE2''' =||= '''SSSE2 aligned''' =||= '''AVX 128''' =||= '''AVX 256''' =||= '''ERMS''' =|| \ |
| | 71 | ||= '''stock''' =||= '''SSE2''' =||= '''SSSE2 aligned''' =||= '''AVX 128''' =||= '''AVX 256''' =||= '''ERMS''' =|| \ |
| | 72 | ||= '''stock''' =||= '''SSE2''' =||= '''SSSE2 aligned''' =||= '''AVX 128''' =||= '''AVX 256''' =||= '''ERMS''' =|| \ |
| | 73 | ||= '''stock''' =||= '''SSE2''' =||= '''SSSE2 aligned''' =||= '''AVX 128''' =||= '''AVX 256''' =||= '''ERMS''' =|| |
| | 74 | || AMD FX-8120 || |
| | 75 | || AMD Opteron 6328 || |
| | 76 | || Intel Xeon X5365 || |
| | 77 | || Intel Xeon X5482 || |
| | 78 | || Intel Xeon X5675 || |
| | 79 | || Intel Core i5-2520M || |
| | 80 | || Intel Core i5-2500K || |
| | 81 | || Intel Xeon E5-2680 || |
| | 82 | || Intel Xeon E5-2667 v2 || |
| | 83 | |
| | 84 | == Conclusions == |