99
"vmovdqa (%0,%%rax), %%ymm0 \n\t"
100
"vmovdqa 32(%0,%%rax), %%ymm1 \n\t"
101
"vmovdqa 64(%0,%%rax), %%ymm2 \n\t"
102
"vmovdqa 96(%0,%%rax), %%ymm3 \n\t"
103
"vmovdqa 128(%0,%%rax), %%ymm4 \n\t"
104
"vmovdqa 160(%0,%%rax), %%ymm5 \n\t"
105
"vmovdqa 192(%0,%%rax), %%ymm6 \n\t"
106
"vmovdqa 224(%0,%%rax), %%ymm7 \n\t"
108
"vmovdqa 256(%0,%%rax), %%ymm8 \n\t"
109
"vmovdqa 288(%0,%%rax), %%ymm9 \n\t"
110
"vmovdqa 320(%0,%%rax), %%ymm10 \n\t"
111
"vmovdqa 352(%0,%%rax), %%ymm11 \n\t"
112
"vmovdqa 384(%0,%%rax), %%ymm12 \n\t"
113
"vmovdqa 416(%0,%%rax), %%ymm13 \n\t"
114
"vmovdqa 448(%0,%%rax), %%ymm14 \n\t"
115
"vmovdqa 480(%0,%%rax), %%ymm15 \n\t"
117
"vmovntps %%ymm0, (%1,%%rax) \n\t"
118
"vmovntps %%ymm1, 32(%1,%%rax) \n\t"
119
"vmovntps %%ymm2, 64(%1,%%rax) \n\t"
120
"vmovntps %%ymm3, 96(%1,%%rax) \n\t"
121
"vmovntps %%ymm4, 128(%1,%%rax) \n\t"
122
"vmovntps %%ymm5, 160(%1,%%rax) \n\t"
123
"vmovntps %%ymm6, 192(%1,%%rax) \n\t"
124
"vmovntps %%ymm7, 224(%1,%%rax) \n\t"
126
"vmovntps %%ymm8, 256(%1,%%rax) \n\t"
127
"vmovntps %%ymm9, 288(%1,%%rax) \n\t"
128
"vmovntps %%ymm10, 320(%1,%%rax) \n\t"
129
"vmovntps %%ymm11, 352(%1,%%rax) \n\t"
130
"vmovntps %%ymm12, 384(%1,%%rax) \n\t"
131
"vmovntps %%ymm13, 416(%1,%%rax) \n\t"
132
"vmovntps %%ymm14, 448(%1,%%rax) \n\t"
133
"vmovntps %%ymm15, 480(%1,%%rax) \n\t"
99
"vmovdqa (%1,%%rax), %%ymm0 \n\t"
100
"vmovdqa 32(%1,%%rax), %%ymm1 \n\t"
101
"vmovdqa 64(%1,%%rax), %%ymm2 \n\t"
102
"vmovdqa 96(%1,%%rax), %%ymm3 \n\t"
103
"vmovdqa 128(%1,%%rax), %%ymm4 \n\t"
104
"vmovdqa 160(%1,%%rax), %%ymm5 \n\t"
105
"vmovdqa 192(%1,%%rax), %%ymm6 \n\t"
106
"vmovdqa 224(%1,%%rax), %%ymm7 \n\t"
108
"vmovdqa 256(%1,%%rax), %%ymm8 \n\t"
109
"vmovdqa 288(%1,%%rax), %%ymm9 \n\t"
110
"vmovdqa 320(%1,%%rax), %%ymm10 \n\t"
111
"vmovdqa 352(%1,%%rax), %%ymm11 \n\t"
112
"vmovdqa 384(%1,%%rax), %%ymm12 \n\t"
113
"vmovdqa 416(%1,%%rax), %%ymm13 \n\t"
114
"vmovdqa 448(%1,%%rax), %%ymm14 \n\t"
115
"vmovdqa 480(%1,%%rax), %%ymm15 \n\t"
117
"vmovdqa %%ymm0, (%0,%%rax) \n\t"
118
"vmovdqa %%ymm1, 32(%0,%%rax) \n\t"
119
"vmovntps %%ymm2, 64(%0,%%rax) \n\t"
120
"vmovntps %%ymm3, 96(%0,%%rax) \n\t"
121
"vmovntps %%ymm4, 128(%0,%%rax) \n\t"
122
"vmovntps %%ymm5, 160(%0,%%rax) \n\t"
123
"vmovntps %%ymm6, 192(%0,%%rax) \n\t"
124
"vmovntps %%ymm7, 224(%0,%%rax) \n\t"
126
"vmovntps %%ymm8, 256(%0,%%rax) \n\t"
127
"vmovntps %%ymm9, 288(%0,%%rax) \n\t"
128
"vmovntps %%ymm10, 320(%0,%%rax) \n\t"
129
"vmovntps %%ymm11, 352(%0,%%rax) \n\t"
130
"vmovntps %%ymm12, 384(%0,%%rax) \n\t"
131
"vmovntps %%ymm13, 416(%0,%%rax) \n\t"
132
"vmovntps %%ymm14, 448(%0,%%rax) \n\t"
133
"vmovntps %%ymm15, 480(%0,%%rax) \n\t"
135
135
"add $512, %%rax \n\t"
142
142
: "p" (dst), "p" (src), "r" (sse_size)
147
void pcilib_pagecpy(void *dst, void *src, size_t size) {
147
void pcilib_pagecpy(void *dst, const void *src, size_t size) {
148
148
int gen = pcilib_get_cpu_gen();
149
if ((gen > 3)&&(size%4096==0)&&((uintptr_t)dst%32==0)&&((uintptr_t)src%32==0)) {
149
if ((gen > 3)&&((size%4096)==0)&&(((uintptr_t)dst%32)==0)&&(((uintptr_t)src%32)==0)) {
150
150
pcilib_memcpy4k_avx(dst, src, size);
152
152
memcpy(dst, src, size);