/alps/pcitool

To get this branch, use:
bzr branch http://suren.me/webbzr/alps/pcitool
330 by Suren A. Chilingaryan
Support for 64-bit registes
1
#define _POSIX_C_SOURCE 200112L
2
#define _GNU_SOURCE
3
4
#include <stdio.h>
5
#include <string.h>
6
#include <unistd.h>
7
#include <stdint.h>
8
#include <assert.h>
9
#include <ctype.h>
10
#include <time.h>
11
#include <sched.h>
12
#include <arpa/inet.h>
13
#include <sys/time.h>
14
15
#include "cpu.h"
16
#include "pci.h"
17
#include "tools.h"
18
#include "error.h"
19
20
21
/*
22
void *memcpy128(void * dst, void const * src, size_t len) {
23
24
    long pos = - (len>>2);
25
    char * plDst = (char *) dst - 4 * pos;
26
    char const * plSrc = (char const *) src - 4 * pos;
27
28
    if (pos) {
29
        __asm__ __volatile__ (
30
            "1:						\n\t"
31
            "mov	(%0,%2,4), %%edi		\n\t"
32
            "mov	%%edi, (%1,%2,4)		\n\t"
33
            "inc	%2				\n\t"
34
            "jnz 	1b				\n\t"
35
	: 
36
	: "r" (plSrc), "r" (plDst), "r" (pos)
37
	: "%edi"
38
        );
39
    }
40
41
42
43
    long pos = - ((len>>4)<<4);
44
    char * plDst = (char *) dst - pos;
45
    char const * plSrc = (char const *) src - pos;
46
47
    if (pos) {
48
        __asm__ __volatile__ (
49
            "1:						\n\t"
50
//            "movdqa	(%0,%2), %%xmm0			\n\t"
51
            "mov	(%0,%2), %%esi			\n\t"
52
            "movd	%%esi, %%xmm0			\n\t"
53
            "mov	4(%0,%2), %%esi			\n\t"
54
            "movd	%%esi, %%xmm1			\n\t"
55
            "mov	8(%0,%2), %%esi			\n\t"
56
            "movd	%%esi, %%xmm2			\n\t"
57
            "mov	12(%0,%2), %%esi		\n\t"
58
            "movd	%%esi, %%xmm3			\n\t"
59
	    "pslldq	$4, %%xmm1			\n\t"
60
	    "por	%%xmm1, %%xmm0			\n\t"
61
	    "pslldq	$8, %%xmm2			\n\t"
62
	    "por	%%xmm2, %%xmm0			\n\t"
63
	    "pslldq	$12, %%xmm3			\n\t"
64
	    "por	%%xmm3, %%xmm0			\n\t"
65
	    
66
            "movntdq	%%xmm0, (%1,%2)			\n\t"
67
            "add	$16, %2				\n\t"
68
            "jnz 	1b				\n\t"
69
	: 
70
	: "r" (plSrc), "r" (plDst), "r" (pos)
71
	: "%rsi"
72
        );
73
    }
74
75
76
77
    len &= 0x3;
78
79
    char * pcDst = (char *) plDst;
80
    char const * pcSrc = (char const *) plSrc;
81
82
    while (len--) {
83
        *pcDst++ = *pcSrc++;
84
    }
85
86
    return (dst);
87
} 
88
*/
89
341 by Suren A. Chilingaryan
Fix AVX memory copy
90
void pcilib_memcpy4k_avx(void *dst, const void *src, size_t size) {
330 by Suren A. Chilingaryan
Support for 64-bit registes
91
    size_t sse_size = (size / 512);
92
93
    __asm__ __volatile__ (
94
            "push 	%2			\n\t"
95
            "mov        $0, %%rax		\n\t"
96
97
            "1:					\n\t"
98
341 by Suren A. Chilingaryan
Fix AVX memory copy
99
            "vmovdqa 	   (%1,%%rax), %%ymm0	\n\t"
100
            "vmovdqa 	 32(%1,%%rax), %%ymm1	\n\t"
101
            "vmovdqa 	 64(%1,%%rax), %%ymm2	\n\t"
102
            "vmovdqa 	 96(%1,%%rax), %%ymm3	\n\t"
103
            "vmovdqa 	128(%1,%%rax), %%ymm4	\n\t"
104
            "vmovdqa 	160(%1,%%rax), %%ymm5	\n\t"
105
            "vmovdqa 	192(%1,%%rax), %%ymm6	\n\t"
106
            "vmovdqa 	224(%1,%%rax), %%ymm7	\n\t"
107
108
            "vmovdqa 	256(%1,%%rax), %%ymm8	\n\t"
109
            "vmovdqa 	288(%1,%%rax), %%ymm9	\n\t"
110
            "vmovdqa 	320(%1,%%rax), %%ymm10	\n\t"
111
            "vmovdqa 	352(%1,%%rax), %%ymm11	\n\t"
112
            "vmovdqa 	384(%1,%%rax), %%ymm12	\n\t"
113
            "vmovdqa 	416(%1,%%rax), %%ymm13	\n\t"
114
            "vmovdqa 	448(%1,%%rax), %%ymm14	\n\t"
115
            "vmovdqa 	480(%1,%%rax), %%ymm15	\n\t"
116
117
            "vmovdqa	%%ymm0,    (%0,%%rax)	\n\t"
118
            "vmovdqa	%%ymm1,  32(%0,%%rax)	\n\t"
119
            "vmovntps	%%ymm2,  64(%0,%%rax)	\n\t"
120
            "vmovntps	%%ymm3,  96(%0,%%rax)	\n\t"
121
            "vmovntps	%%ymm4, 128(%0,%%rax)	\n\t"
122
            "vmovntps	%%ymm5, 160(%0,%%rax)	\n\t"
123
            "vmovntps	%%ymm6, 192(%0,%%rax)	\n\t"
124
            "vmovntps	%%ymm7, 224(%0,%%rax)	\n\t"
125
126
            "vmovntps	%%ymm8,  256(%0,%%rax)	\n\t"
127
            "vmovntps	%%ymm9,  288(%0,%%rax)	\n\t"
128
            "vmovntps	%%ymm10, 320(%0,%%rax)	\n\t"
129
            "vmovntps	%%ymm11, 352(%0,%%rax)	\n\t"
130
            "vmovntps	%%ymm12, 384(%0,%%rax)	\n\t"
131
            "vmovntps	%%ymm13, 416(%0,%%rax)	\n\t"
132
            "vmovntps	%%ymm14, 448(%0,%%rax)	\n\t"
133
            "vmovntps	%%ymm15, 480(%0,%%rax)	\n\t"
330 by Suren A. Chilingaryan
Support for 64-bit registes
134
135
            "add	$512, %%rax		\n\t"
136
            "dec	%2			\n\t"
137
            "jnz 	1b			\n\t"
138
            "pop 	%2			\n\t"
139
341 by Suren A. Chilingaryan
Fix AVX memory copy
140
            "mfence"
330 by Suren A. Chilingaryan
Support for 64-bit registes
141
    :
142
    : "p" (dst), "p" (src), "r" (sse_size)
143
    : "%rax"
144
        );
145
}
146
341 by Suren A. Chilingaryan
Fix AVX memory copy
147
void pcilib_pagecpy(void *dst, const void *src, size_t size) {
330 by Suren A. Chilingaryan
Support for 64-bit registes
148
    int gen = pcilib_get_cpu_gen();
341 by Suren A. Chilingaryan
Fix AVX memory copy
149
    if ((gen > 3)&&((size%4096)==0)&&(((uintptr_t)dst%32)==0)&&(((uintptr_t)src%32)==0)) {
330 by Suren A. Chilingaryan
Support for 64-bit registes
150
	pcilib_memcpy4k_avx(dst, src, size);
151
    } else
152
	memcpy(dst, src, size);
153
}