Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Adding a VLFeat-compliant feature descriptor #167

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-$

option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON)
option(PopSift_BUILD_DOCS "Build PopSift documentation." OFF)
option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF)
option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF)
option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)
Expand Down Expand Up @@ -99,10 +98,6 @@ find_package(CUDAToolkit)
message(STATUS "CUDA Version is ${CUDAToolkit_VERSION}")
set(CUDA_VERSION ${CUDAToolkit_VERSION})

if(PopSift_USE_NVTX_PROFILING)
message(STATUS "PROFILING CPU CODE: NVTX is in use")
endif()

if(PopSift_ERRCHK_AFTER_KERNEL)
message(STATUS "Synchronizing and checking errors after every kernel call")
list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL")
Expand Down Expand Up @@ -153,13 +148,6 @@ else()
set(DISABLE_GRID_FILTER 0)
endif()

if(PopSift_USE_NVTX_PROFILING)
# library required for NVTX profiling of the CPU
set(PopSift_USE_NVTX 1)
else()
set(PopSift_USE_NVTX 0)
endif()

add_subdirectory(src)

if(PopSift_BUILD_DOCS)
Expand Down Expand Up @@ -197,7 +185,6 @@ message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS})
message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES})
message(STATUS "Build documentation: " ${PopSift_BUILD_DOCS})
message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDENT_CODE})
message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING})
message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL})
message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER})
message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ install:
before_build:
- md build
- cd build
- cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
- cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
- ls -l

build:
Expand Down
1 change: 0 additions & 1 deletion cmake/sift_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@

#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@
#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@
#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@

3 changes: 0 additions & 3 deletions cudaInstallAppveyor.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ echo Downloading CUDA toolkit 12 for Windows 10

appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvcc.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.5.82-archive.zip -Filename cuda_cudart.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvtx.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.5.82-archive.zip -Filename vs_integration.zip
dir

echo Unzipping CUDA toolkit 12
tar -xf cuda_nvcc.zip
tar -xf cuda_cudart.zip
tar -xf cuda_nvtx.zip
tar -xf vs_integration.zip
dir

Expand All @@ -22,7 +20,6 @@ mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras"
echo Copying toolkit files to install dir(s)
xcopy cuda_cudart-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy cuda_nvcc-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy cuda_nvtx-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy visual_studio_integration-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" /s /e /i /y


Expand Down
7 changes: 1 addition & 6 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_library(popsift
popsift/s_desc_grid.cu popsift/s_desc_grid.h
popsift/s_desc_igrid.cu popsift/s_desc_igrid.h
popsift/s_desc_notile.cu popsift/s_desc_notile.h
popsift/s_desc_vlfeat.cu popsift/s_desc_vlfeat.h
popsift/s_desc_norm_rs.h
popsift/s_desc_norm_l2.h
popsift/s_desc_normalize.h
Expand All @@ -45,12 +46,6 @@ target_link_libraries(popsift
CUDA::cudart
Threads::Threads)

if(PopSift_USE_NVTX_PROFILING)
target_link_libraries(popsift
PUBLIC
CUDA::nvtx3)
endif()

set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION})
set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d")
set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
Expand Down
4 changes: 4 additions & 0 deletions src/application/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ endif()

find_package(DevIL COMPONENTS IL ILU) # yields IL_FOUND, IL_LIBRARIES, IL_INCLUDE_DIR

# for newer CMake versions and Boost 1.70 pr newer must use Boost's make file
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30)
cmake_policy(SET CMP0167 NEW)
endif()
if(PopSift_BOOST_USE_STATIC_LIBS)
set(Boost_USE_STATIC_LIBS ON)
endif()
Expand Down
5 changes: 1 addition & 4 deletions src/application/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,7 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& in
// "Choice of span (1-sided) for Gauss filters. Default is VLFeat-like computation depending on sigma. "
// "Options are: vlfeat, relative, relative-all, opencv, fixed9, fixed15"
("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
"Choice of descriptor extraction modes:\n"
"loop, iloop, grid, igrid, notile\n"
"Default is loop\n"
"loop is OpenCV-like horizontal scanning, computing only valid points, grid extracts only useful points but rounds them, iloop uses linear texture and rotated gradiant fetching. igrid is grid with linear interpolation. notile is like igrid but avoids redundant gradiant fetching.")
popsift::Config::getDescModeUsage())
("popsift-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::PopSift); }),
"During the initial upscale, shift pixels by 1. In extrema refinement, steps up to 0.6, do not reject points when reaching max iterations, "
"first contrast threshold is .8 * peak thresh. Shift feature coords octave 0 back to original pos.")
Expand Down
5 changes: 1 addition & 4 deletions src/application/match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,7 @@ static void parseargs(int argc, char** argv, popsift::Config& config, string& lF
( "gauss-mode", value<std::string>()->notifier([&](const std::string& s) { config.setGaussMode(s); }),
popsift::Config::getGaussModeUsage() )
("desc-mode", value<std::string>()->notifier([&](const std::string& s) { config.setDescMode(s); }),
"Choice of descriptor extraction modes:\n"
"loop, iloop, grid, igrid, notile\n"
"Default is loop\n"
"loop is OpenCV-like horizontal scanning, computing only valid points, grid extracts only useful points but rounds them, iloop uses linear texture and rotated gradiant fetching. igrid is grid with linear interpolation. notile is like igrid but avoids redundant gradiant fetching.")
popsift::Config::getDescModeUsage() )
("popsift-mode", bool_switch()->notifier([&](bool b) { if(b) config.setMode(popsift::Config::PopSift); }),
"During the initial upscale, shift pixels by 1. In extrema refinement, steps up to 0.6, do not reject points when reaching max iterations, "
"first contrast threshold is .8 * peak thresh. Shift feature coords octave 0 back to original pos.")
Expand Down
6 changes: 0 additions & 6 deletions src/popsift/popsift.cu
Original file line number Diff line number Diff line change
Expand Up @@ -438,18 +438,12 @@ void SiftJob::setImg( popsift::ImageBase* img )

popsift::ImageBase* SiftJob::getImg()
{
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
_nvtx_id = nvtxRangeStartA( "inserting image" );
#endif
return _img;
}

void SiftJob::setFeatures( popsift::FeaturesBase* f )
{
_p.set_value( f );
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
nvtxRangeEnd( _nvtx_id );
#endif
}

popsift::FeaturesHost* SiftJob::get()
Expand Down
10 changes: 0 additions & 10 deletions src/popsift/popsift.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@
#include <thread>
#include <vector>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangeStartA(a)
#define nvtxRangeEnd(a)
#endif

/* user parameters */
namespace popsift
{
Expand All @@ -50,9 +43,6 @@ class SiftJob
unsigned char* _imageData;
popsift::ImageBase* _img;
std::exception_ptr _err;
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
nvtxRangeId_t _nvtx_id;
#endif

public:

Expand Down
213 changes: 213 additions & 0 deletions src/popsift/s_desc_vlfeat.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
/*
* Copyright 2016-2017, Simula Research Laboratory
* 2018-2020, University of Oslo
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include "popsift/sift_config.h"

#include "common/assist.h"
#include "common/debug_macros.h"
#include "common/vec_macros.h"
#include "s_desc_vlfeat.h"
#include "s_gradiant.h"
#include "sift_constants.h"
#include "sift_pyramid.h"

#include <cstdio>

using namespace popsift;

__device__ static inline
void ext_desc_vlfeat_sub( const float ang,
const Extremum* ext,
float* __restrict__ features,
cudaTextureObject_t layer_tex,
const int width,
const int height )
{
const float x = ext->xpos;
const float y = ext->ypos;
const int level = ext->lpos; // old_level;
const float sig = ext->sigma;
const float SBP = fabsf(DESC_MAGNIFY * sig);

if( SBP == 0 ) {
return;
}

float cos_t;
float sin_t;
__sincosf( ang, &sin_t, &cos_t );

const float csbp = cos_t * SBP;
const float ssbp = sin_t * SBP;
const float crsbp = cos_t / SBP;
const float srsbp = sin_t / SBP;

// We have 4x4*16 bins.
// There centers have the offsets -1.5, -0.5, 0.5, 1.5 from the
// keypoint. The points that support them stretch from -2 to 2
const float2 maxdist = make_float2( -2.0f, -2.0f );

// We rotate the corner of the maximum range by the keypoint orientation.
// const float ptx = csbp * maxdist - ssbp * maxdist;
// const float pty = csbp * maxdist + ssbp * maxdist;
const float ptx = fabsf( ::fmaf( csbp, maxdist.x, -ssbp * maxdist.y ) );
const float pty = fabsf( ::fmaf( csbp, maxdist.y, ssbp * maxdist.x ) );

const float bsz = 2.0f * ( fabsf(csbp) + fabsf(ssbp) );

const int xmin = max(1, (int)floorf(x - ptx - bsz));
const int ymin = max(1, (int)floorf(y - pty - bsz));
const int xmax = min(width - 2, (int)floorf(x + ptx + bsz));
const int ymax = min(height - 2, (int)floorf(y + pty + bsz));

__shared__ float dpt[128];

for( int i=threadIdx.x; i<128; i+=blockDim.x )
{
dpt[i] = 0.0f;
}

__syncthreads();

for( int pix_y = ymin; pix_y <= ymax; pix_y += 1 )
{
for( int base_x = xmin; base_x <= xmax; base_x += 32 )
{
float mod;
float th;

get_gradiant32( mod, th, base_x, pix_y, layer_tex, level );

mod /= 2.0f; // Our mod is double that of vlfeat. Huh.

th -= ang;
while( th > M_PI2 ) th -= M_PI2;
while( th < 0.0f ) th += M_PI2;
__syncthreads();

const int pix_x = base_x + threadIdx.x;

if( ( pix_y <= ymax ) && ( pix_x <= xmax ) )
{
// d : distance from keypoint
const float2 d = make_float2( pix_x - x, pix_y - y );

// n : normalized distance from keypoint
const float2 n = make_float2( ::fmaf( crsbp, d.x, srsbp * d.y ),
::fmaf( crsbp, d.y, -srsbp * d.x ) );

const float ww = __expf( -scalbnf(n.x*n.x + n.y*n.y, -3));

const float nt = 8.0f * th / M_PI2;

// neighbouring tile on the lower side: -2, -1, 0 or 1
// (must use floorf because casting rounds towards zero
const int3 t0 = make_int3( (int)floorf(n.x - 0.5f),
(int)floorf(n.y - 0.5f),
(int)nt );
const float wgt_x = - ( n.x - ( t0.x + 0.5f ) );
const float wgt_y = - ( n.y - ( t0.y + 0.5f ) );
const float wgt_t = - ( nt - t0.z );

for( int tx=0; tx<2; tx++ )
{
for( int ty=0; ty<2; ty++ )
{
for( int tt=0; tt<2; tt++ )
{
if( ( t0.y + ty >= -2 ) &&
( t0.y + ty < 2 ) &&
( t0.x + tx >= -2 ) &&
( t0.x + tx < 2 ) )
{
float i_wgt_x = ( tx == 0 ) ? 1.0f + wgt_x : wgt_x;
float i_wgt_y = ( ty == 0 ) ? 1.0f + wgt_y : wgt_y;
float i_wgt_t = ( tt == 0 ) ? 1.0f + wgt_t : wgt_t;

i_wgt_x = fabsf( i_wgt_x );
i_wgt_y = fabsf( i_wgt_y );
i_wgt_t = fabsf( i_wgt_t );

const float val = ww
* mod
* i_wgt_x
* i_wgt_y
* i_wgt_t;

const int offset = 80
+ ( t0.y + ty ) * 32
+ ( t0.x + tx ) * 8
+ ( t0.z + tt ) % 8;

atomicAdd( &dpt[offset], val );
}
}
}
}
}
__syncthreads();
}
}

for( int i=threadIdx.x; i<128; i+=blockDim.x )
{
features[i] = dpt[i];
}
}

__global__ void ext_desc_vlfeat( int octave, cudaTextureObject_t layer_tex, int w, int h)
{
const int o_offset = dct.ori_ps[octave] + blockIdx.x;
Descriptor* desc = &dbuf.desc [o_offset];
const int ext_idx = dobuf.feat_to_ext_map[o_offset];
Extremum* ext = dobuf.extrema + ext_idx;

const int ext_base = ext->idx_ori;
const int ori_num = o_offset - ext_base;
const float ang = ext->orientation[ori_num];

ext_desc_vlfeat_sub( ang,
ext,
desc->features,
layer_tex,
w,
h );
}

namespace popsift
{

bool start_ext_desc_vlfeat( const int octave, Octave& oct_obj )
{
dim3 block;
dim3 grid;
grid.x = hct.ori_ct[octave];
grid.y = 1;
grid.z = 1;

if( grid.x == 0 ) return false;

block.x = 32;
block.y = 1;
block.z = 1;

size_t shared_size = 4 * 128 * sizeof(float);

ext_desc_vlfeat
<<<grid,block,shared_size,oct_obj.getStream()>>>
( octave,
oct_obj.getDataTexPoint( ),
oct_obj.getWidth(),
oct_obj.getHeight() );

POP_SYNC_CHK;

return true;
}

}; // namespace popsift
Loading
Loading