forked from CNugteren/myGEMM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
112 lines (89 loc) · 3.06 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# ==================================================================================================
# Project:
# Exploring the performance of general matrix-multiplication on an NVIDIA Tesla K40m GPU.
#
# File information:
# Institution.... SURFsara <www.surfsara.nl>
# Author......... Cedric Nugteren <[email protected]>
# Changed at..... 2014-11-07
# License........ MIT license
# Tab-size....... 4 spaces
# Line length.... 100 characters
#
# ==================================================================================================
# Set the location of CUDA, OpenCL and clBlas
CUDADIR = $(CUDA_HOME)
OPENCLDIR = $(CUDA_HOME)
CLBLASDIR = $(CLBLAS_HOME)
# Disable all CUDA components (including cuBLAS) in the code to run on a non-NVIDIA system
ENABLE_CUDA = 1
# ==================================================================================================
# Compilers
CXX = g++
NVCC = nvcc
# Compiler flags
CXXFLAGS += -O3 -Wall
NVFLAGS += -O3 -arch=sm_35 -Xcompiler -Wall
#NVFLAGS += -maxrregcount 127
# Folders
SRCDIR = src
BINDIR = bin
OBJDIR = obj
SCRDIR = scripts
# Disable/enable CUDA in the C++ code
ifeq ($(ENABLE_CUDA),1)
DEFINES += -DENABLE_CUDA
endif
# Load OpenCL and the clBlas library
INCLUDES += -I$(OPENCLDIR)/include -I$(CLBLASDIR)/include
LDFLAGS += -L$(OPENCLDIR)/lib64 -L$(CLBLASDIR)/lib64
LDFLAGS += -lOpenCL -lclBLAS
# Load CUDA and the cuBLAS library
ifeq ($(ENABLE_CUDA),1)
INCLUDES += -I$(CUDADIR)/include
LDFLAGS += -L$(CUDADIR)/lib64
LDFLAGS += -lcuda -lcudart -lcublas
endif
# Set the source files
CPPSOURCES = main.cpp clGEMM.cpp libclblas.cpp
GPUSOURCES = cuGEMM.cu libcublas.cu
# Define the names of the object files and the binary
OBJS = $(CPPSOURCES:%.cpp=$(OBJDIR)/%.cpp.o)
ifeq ($(ENABLE_CUDA),1)
OBJS += $(GPUSOURCES:%.cu=$(OBJDIR)/%.cu.o)
endif
BIN = $(BINDIR)/myGEMM
# ==================================================================================================
# All (default target)
all: build run
# Build the binary from the objects
build: $(OBJS)
@mkdir -p $(BINDIR)
$(CXX) $(CXXFLAGS) $(DEFINES) $(INCLUDES) $(OBJS) $(LDFLAGS) -o $(BIN)
# C++ sources
$(OBJDIR)/%.cpp.o: $(SRCDIR)/%.cpp $(SRCDIR)/*.h
@mkdir -p $(OBJDIR)
$(CXX) -c $(CXXFLAGS) $(DEFINES) $(INCLUDES) $< -o $@
# CUDA sources
$(OBJDIR)/%.cu.o: $(SRCDIR)/%.cu $(SRCDIR)/*.h $(SRCDIR)/*.cl
@mkdir -p $(OBJDIR)
$(NVCC) -c $(NVFLAGS) $(DEFINES) $(INCLUDES) $< -o $@
# Generate assembly code from the kernels and print some statistics
inspect:
$(NVCC) -cubin $(NVFLAGS) -Xptxas -v $(INCLUDES) $(SRCDIR)/cuGEMM.cu -o $(BIN).cu.cubin
nvdisasm -lrm narrow $(BIN).cu.cubin > $(BIN).cu.asm
cuobjdump $(BIN) -xptx cuGEMM
mv cuGEMM.sm_35.ptx $(BIN).cu.ptx
cuobjdump $(BIN) -sass > $(BIN).cu.sass
sh $(SCRDIR)/stats.sh $(BIN).cu.sass
# Execute the binary
run:
./$(BIN)
# Clean-up
clean:
rm -f $(OBJDIR)/*.o
rm -f $(BIN)
rm -f $(BIN).*
# ==================================================================================================
.PHONY: run inspect clean
# ==================================================================================================