RandomBallCover  1.2.1
 Hosted by GitHub
algorithms.hpp
Go to the documentation of this file.
1 
32 #ifndef RBC_ALGORITHMS_HPP
33 #define RBC_ALGORITHMS_HPP
34 
35 #include <CLUtils.hpp>
36 #include <RBC/data_types.hpp>
37 #include <RBC/common.hpp>
38 
39 
43 namespace cl_algo
44 {
47 namespace RBC
48 {
49 
53  enum class KernelTypeC : uint8_t
54  {
55  SHARED_NONE,
56  SHARED_R,
57  SHARED_X_R,
58  KINECT,
59  KINECT_R,
60  KINECT_X_R
61  };
62 
63 
91  template <KernelTypeC K = KernelTypeC::SHARED_NONE>
93  {
94  public:
99  enum class Memory : uint8_t
100  {
101  H_IN_X,
102  H_IN_R,
103  H_OUT_D,
104  D_IN_X,
105  D_IN_R,
106  D_OUT_D
107  };
108 
110  RBCComputeDists (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
112  cl::Memory& get (RBCComputeDists::Memory mem);
114  void init (unsigned int _nx, unsigned int _nr, unsigned int _d = 8, float _a = 1.f, Staging _staging = Staging::IO);
116  void write (RBCComputeDists::Memory mem = RBCComputeDists::Memory::D_IN_X, void *ptr = nullptr, bool block = CL_FALSE,
117  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
119  void* read (RBCComputeDists::Memory mem = RBCComputeDists::Memory::H_OUT_D, bool block = CL_TRUE,
120  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
122  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
124  float getAlpha ();
126  void setAlpha (float _a);
127 
128  cl_float *hPtrInX;
129  cl_float *hPtrInR;
130  cl_float *hPtrOutD;
132  private:
133  clutils::CLEnv &env;
134  clutils::CLEnvInfo<1> info;
135  cl::Context context;
136  cl::CommandQueue queue;
137  cl::Kernel kernel;
138  cl::NDRange global, local;
139  Staging staging;
140  float a;
141  unsigned int nx, nr, d;
142  unsigned int bufferXSize, bufferRSize, bufferDSize;
143  cl::Buffer hBufferInX, hBufferInR, hBufferOutD;
144  cl::Buffer dBufferInX, dBufferInR, dBufferOutD;
145 
146  public:
154  template <typename period>
155  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
156  {
157  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, local, events, &timer.event ());
158  queue.flush (); timer.wait ();
159 
160  return timer.duration ();
161  }
162 
163  };
164 
165 
167  enum class ReduceConfig : uint8_t
168  {
169  MIN,
170  MAX
171  };
172 
173 
197  template <ReduceConfig C, typename T = cl_float>
198  class Reduce
199  {
200  public:
205  enum class Memory : uint8_t
206  {
207  H_IN,
208  H_OUT,
209  D_IN,
210  D_RED,
211  D_OUT
212  };
213 
215  Reduce (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
217  cl::Memory& get (Reduce::Memory mem);
219  void init (unsigned int _cols, unsigned int _rows, Staging _staging = Staging::IO);
221  void write (Reduce::Memory mem = Reduce::Memory::D_IN, void *ptr = nullptr, bool block = CL_FALSE,
222  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
224  void* read (Reduce::Memory mem = Reduce::Memory::H_OUT, bool block = CL_TRUE,
225  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
227  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
228 
229  T *hPtrIn;
230  T *hPtrOut;
232  private:
233  clutils::CLEnv &env;
234  clutils::CLEnvInfo<1> info;
235  cl::Context context;
236  cl::CommandQueue queue;
237  cl::Kernel recKernel, groupRecKernel;
238  cl::NDRange globalR, globalGR, local;
239  Staging staging;
240  size_t wgMultiple, wgXdim;
241  unsigned int cols, rows;
242  unsigned int bufferInSize, bufferGRSize, bufferOutSize;
243  cl::Buffer hBufferIn, hBufferOut;
244  cl::Buffer dBufferIn, dBufferR, dBufferOut;
245 
246  public:
254  template <typename period>
255  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
256  {
257  double pTime;
258 
259  if (wgXdim == 1)
260  {
261  queue.enqueueNDRangeKernel (recKernel, cl::NullRange, globalR, local, events, &timer.event ());
262  queue.flush (); timer.wait ();
263  pTime = timer.duration ();
264  }
265  else
266  {
267  queue.enqueueNDRangeKernel (recKernel, cl::NullRange, globalR, local, events, &timer.event ());
268  queue.flush (); timer.wait ();
269  pTime = timer.duration ();
270 
271  queue.enqueueNDRangeKernel (groupRecKernel, cl::NullRange, globalGR, local, nullptr, &timer.event ());
272  queue.flush (); timer.wait ();
273  pTime += timer.duration ();
274  }
275 
276  return pTime;
277  }
278 
279  };
280 
281 
311  class RBCMin
312  {
313  public:
318  enum class Memory : uint8_t
319  {
320  H_IN_D,
321  H_OUT_ID,
322  H_OUT_RNK,
323  H_OUT_N,
324  D_IN_D,
325  D_MINS,
326  D_OUT_ID,
327  D_OUT_RNK,
328  D_OUT_N
329  };
330 
332  RBCMin (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
334  cl::Memory& get (RBCMin::Memory mem);
336  void init (unsigned int _cols, unsigned int _rows, int _accCounters = 1, Staging _staging = Staging::IO);
338  void write (RBCMin::Memory mem = RBCMin::Memory::D_IN_D, void *ptr = nullptr, bool block = CL_FALSE,
339  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
341  void* read (RBCMin::Memory mem = RBCMin::Memory::H_OUT_ID, bool block = CL_TRUE,
342  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
344  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
345 
346  cl_float *hPtrInD;
349  cl_uint *hPtrOutRnk;
351  cl_uint *hPtrOutN;
354  private:
355  clutils::CLEnv &env;
356  clutils::CLEnvInfo<1> info;
357  cl::Context context;
358  cl::CommandQueue queue;
359  cl::Kernel initKernel, minsKernel, groupMinsKernel;
360  cl::NDRange globalInit, globalM, globalGM, local;
361  Staging staging;
362  size_t wgMultiple, wgXdim;
363  int accCounters;
364  unsigned int cols, rows;
365  unsigned int bufferDSize, bufferGMSize, bufferIDSize, bufferRnkSize, bufferNSize;
366  cl::Buffer hBufferInD, hBufferOutID, hBufferOutRnk, hBufferOutN;
367  cl::Buffer dBufferInD, dBufferGM, dBufferOutID, dBufferOutRnk, dBufferOutN;
368 
369  public:
377  template <typename period>
378  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
379  {
380  double pTime = 0.0;
381 
382  if (accCounters == 1)
383  {
384  queue.enqueueNDRangeKernel (initKernel, cl::NullRange, globalInit, cl::NullRange, events, &timer.event ());
385  queue.flush (); timer.wait ();
386  pTime = timer.duration ();
387  }
388 
389  if (wgXdim == 1)
390  {
391  queue.enqueueNDRangeKernel (minsKernel, cl::NullRange, globalM, local, events, &timer.event ());
392  queue.flush (); timer.wait ();
393  pTime += timer.duration ();
394  }
395  else
396  {
397  queue.enqueueNDRangeKernel (minsKernel, cl::NullRange, globalM, local, events, &timer.event ());
398  queue.flush (); timer.wait ();
399  pTime += timer.duration ();
400 
401  queue.enqueueNDRangeKernel (groupMinsKernel, cl::NullRange, globalGM, local, nullptr, &timer.event ());
402  queue.flush (); timer.wait ();
403  pTime += timer.duration ();
404  }
405 
406  return pTime;
407  }
408 
409  };
410 
411 
413  enum class ScanConfig : uint8_t
414  {
415  INCLUSIVE,
416  EXCLUSIVE
417  };
418 
419 
443  template <ScanConfig C, typename T = cl_int>
444  class Scan
445  {
446  public:
451  enum class Memory : uint8_t
452  {
453  H_IN,
454  H_OUT,
455  D_IN,
456  D_SUMS,
457  D_OUT
458  };
459 
461  Scan (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
463  cl::Memory& get (Scan::Memory mem);
465  void init (unsigned int _cols, unsigned int _rows, Staging _staging = Staging::IO);
467  void write (Scan::Memory mem = Scan::Memory::D_IN, void *ptr = nullptr, bool block = CL_FALSE,
468  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
470  void* read (Scan::Memory mem = Scan::Memory::H_OUT, bool block = CL_TRUE,
471  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
473  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
474 
475  T *hPtrIn;
476  T *hPtrOut;
478  private:
479  clutils::CLEnv &env;
480  clutils::CLEnvInfo<1> info;
481  cl::Context context;
482  cl::CommandQueue queue;
483  cl::Kernel kernelScan, kernelSumsScan, kernelAddSums;
484  cl::NDRange globalScan, globalSumsScan, localScan;
485  cl::NDRange globalAddSums, localAddSums, offsetAddSums;
486  Staging staging;
487  size_t wgMultiple, wgXdim;
488  unsigned int cols, rows, bufferSize, bufferSumsSize;
489  cl::Buffer hBufferIn, hBufferOut;
490  cl::Buffer dBufferIn, dBufferOut, dBufferSums;
491 
492  public:
500  template <typename period>
501  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
502  {
503  double pTime;
504 
505  if (wgXdim == 1)
506  {
507  queue.enqueueNDRangeKernel (
508  kernelScan, cl::NullRange, globalScan, localScan, events, &timer.event ());
509  queue.flush (); timer.wait ();
510  pTime = timer.duration ();
511  }
512  else
513  {
514  queue.enqueueNDRangeKernel (
515  kernelScan, cl::NullRange, globalScan, localScan, events, &timer.event ());
516  queue.flush (); timer.wait ();
517  pTime = timer.duration ();
518 
519  queue.enqueueNDRangeKernel (
520  kernelSumsScan, cl::NullRange, globalSumsScan, localScan, nullptr, &timer.event ());
521  queue.flush (); timer.wait ();
522  pTime += timer.duration ();
523 
524  queue.enqueueNDRangeKernel (
525  kernelAddSums, offsetAddSums, globalAddSums, localAddSums, nullptr, &timer.event ());
526  queue.flush (); timer.wait ();
527  pTime += timer.duration ();
528  }
529 
530  return pTime;
531  }
532 
533  };
534 
535 
537  enum class RBCPermuteConfig : uint8_t
538  {
539  GENERIC,
540  KINECT
541  };
542 
543 
575  template <RBCPermuteConfig C = RBCPermuteConfig::GENERIC>
577  {
578  public:
583  enum class Memory : uint8_t
584  {
585  H_IN_X,
586  H_IN_ID,
587  H_IN_RNK,
588  H_IN_O,
589  H_OUT_X_P,
590  H_OUT_ID_P,
591  D_IN_X,
592  D_IN_ID,
593  D_IN_RNK,
594  D_IN_O,
595  D_OUT_X_P,
596  D_OUT_ID_P
597  };
598 
600  RBCPermute (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
602  cl::Memory& get (RBCPermute::Memory mem);
604  void init (unsigned int _nx, unsigned int _nr, unsigned int _d = 8, int _permID = 0, Staging _staging = Staging::IO);
606  void write (RBCPermute::Memory mem = RBCPermute::Memory::D_IN_X, void *ptr = nullptr, bool block = CL_FALSE,
607  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
609  void* read (RBCPermute::Memory mem = RBCPermute::Memory::H_OUT_X_P, bool block = CL_TRUE,
610  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
612  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
613 
614  cl_float *hPtrInX;
617  cl_uint *hPtrInRnk;
619  cl_uint *hPtrInO;
621  cl_float *hPtrOutXp;
625  private:
626  clutils::CLEnv &env;
627  clutils::CLEnvInfo<1> info;
628  cl::Context context;
629  cl::CommandQueue queue;
630  cl::Kernel kernel;
631  cl::NDRange global;
632  Staging staging;
633  int permID;
634  unsigned int nx, nr, d;
635  unsigned int bufferXSize, bufferIDSize, bufferRnkSize, bufferOSize;
636  cl::Buffer hBufferInX, hBufferInID, hBufferInRnk, hBufferInO, hBufferOutXp, hBufferOutIDp;
637  cl::Buffer dBufferInX, dBufferInID, dBufferInRnk, dBufferInO, dBufferOutXp, dBufferOutIDp;
638 
639  public:
647  template <typename period>
648  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
649  {
650  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, cl::NullRange, events, &timer.event ());
651  queue.flush (); timer.wait ();
652 
653  return timer.duration ();
654  }
655 
656  };
657 
658 
698  template <KernelTypeC K, RBCPermuteConfig P>
700  {
701  public:
706  enum class Memory : uint8_t
707  {
708  H_IN_X,
709  H_IN_R,
710  H_OUT_ID,
711  H_OUT_RNK,
712  H_OUT_N,
713  H_OUT_O,
714  H_OUT_X_P,
715  H_OUT_ID_P,
716  D_IN_X,
717  D_IN_R,
718  D_OUT_D,
719  D_OUT_ID,
720  D_OUT_RNK,
721  D_OUT_N,
722  D_OUT_O,
723  D_OUT_X_P,
724  D_OUT_ID_P
725  };
726 
728  RBCConstruct (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
730  cl::Memory& get (RBCConstruct::Memory mem);
732  void init (unsigned int _nx, unsigned int _nr, unsigned int _d, float _a = 1.f, int _permID = 0, Staging _staging = Staging::IO);
734  void write (RBCConstruct::Memory mem = RBCConstruct::Memory::D_IN_X, void *ptr = nullptr, bool block = CL_FALSE,
735  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
737  void* read (RBCConstruct::Memory mem = RBCConstruct::Memory::H_OUT_X_P, bool block = CL_TRUE,
738  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
740  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
742  float getAlpha ();
744  void setAlpha (float _a);
745 
746  cl_float *hPtrInX;
747  cl_float *hPtrInR;
750  cl_uint *hPtrOutRnk;
752  cl_uint *hPtrOutN;
754  cl_uint *hPtrOutO;
756  cl_float *hPtrOutXp;
760  private:
761  clutils::CLEnv &env;
762  clutils::CLEnvInfo<1> info;
763  cl::Context context;
764  cl::CommandQueue queue;
765  RBCComputeDists<K> rbcCompDists;
766  RBCMin rbcMinDists;
767  Scan<ScanConfig::EXCLUSIVE> rbcScanNLists;
768  RBCPermute<P> rbcPermDB;
769  Staging staging;
770  int permID;
771  unsigned int nx, nr, d;
772  unsigned int bufferXSize, bufferRSize, bufferDSize, bufferIDSize;
773  unsigned int bufferNSize, bufferOSize, bufferRnkSize;
774  cl::Buffer hBufferInX, hBufferInR, hBufferOutID, hBufferOutRnk, hBufferOutN, hBufferOutO, hBufferOutXp, hBufferOutIDp;
775  cl::Buffer dBufferInX, dBufferInR, dBufferOutID, dBufferOutRnk, dBufferOutN, dBufferOutO, dBufferOutXp, dBufferOutIDp;
776 
777  public:
785  template <typename period>
786  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
787  {
788  double pTime;
789 
790  pTime = rbcCompDists.run (timer, events);
791  pTime += rbcMinDists.run (timer);
792  pTime += rbcScanNLists.run (timer);
793  pTime += rbcPermDB.run (timer);
794 
795  return pTime;
796  }
797 
798  };
799 
800 
803  enum class KernelTypeS : uint8_t
804  {
805  GENERIC,
806  KINECT
807  };
808 
809 
825  template <KernelTypeC K, RBCPermuteConfig P, KernelTypeS S>
826  class RBCSearch;
827 
828 
870  template <KernelTypeC K, RBCPermuteConfig P>
872  {
873  public:
878  enum class Memory : uint8_t
879  {
880  H_IN_Q,
881  H_IN_R,
882  H_IN_X_P,
883  H_IN_O,
884  H_IN_N,
885  H_OUT_R_ID,
886  H_OUT_Q_P,
887  H_OUT_NN_ID,
889  H_OUT_NN,
891  D_IN_Q,
892  D_IN_R,
893  D_IN_X_P,
894  D_IN_O,
895  D_IN_N,
896  D_OUT_R_ID,
897  D_OUT_Q_P,
898  D_OUT_NN_ID,
899  D_OUT_NN,
901  D_QR_D,
902  D_QX_D
903  };
904 
906  RBCSearch (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
908  cl::Memory& get (RBCSearch::Memory mem);
910  void init (unsigned int _nq, unsigned int _nr, unsigned int _nx, unsigned int _d, Staging _staging = Staging::IO);
912  void write (RBCSearch::Memory mem = RBCSearch::Memory::D_IN_Q, void *ptr = nullptr, bool block = CL_FALSE,
913  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
915  void* read (RBCSearch::Memory mem = RBCSearch::Memory::H_OUT_NN, bool block = CL_TRUE,
916  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
918  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr, bool config = false);
919 
920  cl_float *hPtrInQ;
921  cl_float *hPtrInR;
922  cl_float *hPtrInXp;
923  cl_uint *hPtrInO;
925  cl_uint *hPtrInN;
929  cl_float *hPtrOutQp;
931  cl_float *hPtrOutNN;
933  unsigned int max_n;
935  private:
936  void setExecParams (const std::vector<cl::Event> *events = nullptr);
937 
938  clutils::CLEnv &env;
939  clutils::CLEnvInfo<1> info;
940  cl::Context context;
941  cl::CommandQueue queue;
942  cl::NDRange globalQXD, localQXD, globalNNID, globalGNNID, globalNN, local;
943  cl::Kernel rbcCompQXDistsKernel, nnidMinsKernel, nnidGroupMinsKernel, rbcNNKernel;
944  RBCConstruct<K, P> rbcCompRIDs;
946  Staging staging;
947  size_t wgMultiple, wgXdim;
948  unsigned int nq, nr, nx, d;
949  unsigned int bufferQSize, bufferRSize, bufferXSize, bufferOSize, bufferNSize;
950  unsigned int bufferQXDSize, bufferRIDSize;
951  unsigned int bufferNNIDSize, bufferGNNIDSize, bufferNNSize;
952  cl::Buffer hBufferInQ, hBufferInR, hBufferInXp, hBufferInO, hBufferInN;
953  cl::Buffer dBufferInQ, dBufferInR, dBufferInXp, dBufferInO, dBufferInN;
954  cl::Buffer hBufferOutRID, hBufferOutQp, hBufferOutNNID, hBufferOutNN;
955  cl::Buffer dBufferOutRID, dBufferOutQp, dBufferOutNNID, dBufferOutNN;
956  cl::Buffer dBufferQXD, dBufferOutGNNID;
957 
958  public:
968  template <typename period>
969  double run (clutils::GPUTimer<period> &timer,
970  const std::vector<cl::Event> *events = nullptr, bool config = false)
971  {
972  double pTime;
973 
974  // compMaxN is not profiled. Its cost is incurred
975  // only once, and it's expected to be insignificant
976  if (config) setExecParams (events);
977 
978  // Compute nearest representatives
979  pTime = rbcCompRIDs.run (timer, events);
980 
981  // Compute distances from the points in the representative lists
982  queue.enqueueNDRangeKernel (rbcCompQXDistsKernel,
983  cl::NullRange, globalQXD, localQXD, nullptr, &timer.event ());
984  queue.flush (); timer.wait ();
985  pTime += timer.duration ();
986 
987  // Compute NN ids
988  queue.enqueueNDRangeKernel (nnidMinsKernel,
989  cl::NullRange, globalNNID, local, nullptr, &timer.event ());
990  queue.flush (); timer.wait ();
991  pTime += timer.duration ();
992 
993  if (wgXdim > 1)
994  {
995  queue.enqueueNDRangeKernel (nnidGroupMinsKernel,
996  cl::NullRange, globalGNNID, local, nullptr, &timer.event ());
997  queue.flush (); timer.wait ();
998  pTime += timer.duration ();
999  }
1000 
1001  // Collect NNs
1002  queue.enqueueNDRangeKernel (rbcNNKernel,
1003  cl::NullRange, globalNN, cl::NullRange, nullptr, &timer.event ());
1004  queue.flush (); timer.wait ();
1005  pTime += timer.duration ();
1006 
1007  return pTime;
1008  }
1009 
1010  };
1011 
1012 
1054  template <KernelTypeC K, RBCPermuteConfig P>
1056  {
1057  public:
1062  enum class Memory : uint8_t
1063  {
1064  H_IN_Q,
1065  H_IN_R,
1066  H_IN_X_P,
1067  H_IN_O,
1068  H_IN_N,
1069  H_OUT_R_ID,
1070  H_OUT_Q_P,
1071  H_OUT_NN_ID,
1073  H_OUT_NN,
1075  D_IN_Q,
1076  D_IN_R,
1077  D_IN_X_P,
1078  D_IN_O,
1079  D_IN_N,
1080  D_OUT_R_ID,
1081  D_OUT_Q_P,
1082  D_OUT_NN_ID,
1083  D_OUT_NN,
1085  D_QR_D,
1086  D_QX_D
1087  };
1088 
1090  RBCSearch (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
1092  cl::Memory& get (RBCSearch::Memory mem);
1094  void init (unsigned int _nq, unsigned int _nr, unsigned int _nx, float _a = 1.f, Staging _staging = Staging::IO);
1096  void write (RBCSearch::Memory mem = RBCSearch::Memory::D_IN_Q, void *ptr = nullptr, bool block = CL_FALSE,
1097  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1099  void* read (RBCSearch::Memory mem = RBCSearch::Memory::H_OUT_NN, bool block = CL_TRUE,
1100  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1102  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr, bool config = false);
1104  float getAlpha ();
1106  void setAlpha (float _a);
1107 
1108  cl_float *hPtrInQ;
1109  cl_float *hPtrInR;
1110  cl_float *hPtrInXp;
1111  cl_uint *hPtrInO;
1113  cl_uint *hPtrInN;
1117  cl_float *hPtrOutQp;
1119  cl_float *hPtrOutNN;
1121  unsigned int max_n;
1123  private:
1124  void setExecParams (const std::vector<cl::Event> *events = nullptr);
1125 
1126  clutils::CLEnv &env;
1127  clutils::CLEnvInfo<1> info;
1128  cl::Context context;
1129  cl::CommandQueue queue;
1130  cl::NDRange globalQXD, localQXD, globalNNID, globalGNNID, globalNN, local;
1131  cl::Kernel rbcCompQXDistsKernel, nnidMinsKernel, nnidGroupMinsKernel, rbcNNKernel;
1132  RBCConstruct<K, P> rbcCompRIDs;
1134  Staging staging;
1135  float a;
1136  size_t wgMultiple, wgXdim;
1137  unsigned int nq, nr, nx, d;
1138  unsigned int bufferQSize, bufferRSize, bufferXSize, bufferOSize, bufferNSize;
1139  unsigned int bufferQXDSize, bufferRIDSize;
1140  unsigned int bufferNNIDSize, bufferGNNIDSize, bufferNNSize;
1141  cl::Buffer hBufferInQ, hBufferInR, hBufferInXp, hBufferInO, hBufferInN;
1142  cl::Buffer dBufferInQ, dBufferInR, dBufferInXp, dBufferInO, dBufferInN;
1143  cl::Buffer hBufferOutRID, hBufferOutQp, hBufferOutNNID, hBufferOutNN;
1144  cl::Buffer dBufferOutRID, dBufferOutQp, dBufferOutNNID, dBufferOutNN;
1145  cl::Buffer dBufferQXD, dBufferOutGNNID;
1146 
1147  public:
1157  template <typename period>
1158  double run (clutils::GPUTimer<period> &timer,
1159  const std::vector<cl::Event> *events = nullptr, bool config = false)
1160  {
1161  double pTime;
1162 
1163  // compMaxN is not profiled. Its cost is incurred
1164  // only once, and it's expected to be insignificant
1165  if (config) setExecParams (events);
1166 
1167  // Compute nearest representatives
1168  pTime = rbcCompRIDs.run (timer, events);
1169 
1170  // Compute distances from the points in the representative lists
1171  queue.enqueueNDRangeKernel (rbcCompQXDistsKernel,
1172  cl::NullRange, globalQXD, localQXD, nullptr, &timer.event ());
1173  queue.flush (); timer.wait ();
1174  pTime += timer.duration ();
1175 
1176  // Compute NN ids
1177  queue.enqueueNDRangeKernel (nnidMinsKernel,
1178  cl::NullRange, globalNNID, local, nullptr, &timer.event ());
1179  queue.flush (); timer.wait ();
1180  pTime += timer.duration ();
1181 
1182  if (wgXdim > 1)
1183  {
1184  queue.enqueueNDRangeKernel (nnidGroupMinsKernel,
1185  cl::NullRange, globalGNNID, local, nullptr, &timer.event ());
1186  queue.flush (); timer.wait ();
1187  pTime += timer.duration ();
1188  }
1189 
1190  // Collect NNs
1191  queue.enqueueNDRangeKernel (rbcNNKernel,
1192  cl::NullRange, globalNN, cl::NullRange, nullptr, &timer.event ());
1193  queue.flush (); timer.wait ();
1194  pTime += timer.duration ();
1195 
1196  return pTime;
1197  }
1198 
1199  };
1200 
1201 }
1202 }
1203 
1204 #endif // RBC_ALGORITHMS_HPP
cl_float * hPtrInXp
Definition: algorithms.hpp:922
Reduce(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
void setAlpha(float _a)
Sets the scaling factor .
Definition: algorithms.cpp:404
cl_float * hPtrInR
Definition: algorithms.hpp:747
cl_float * hPtrInR
Definition: algorithms.hpp:921
T * hPtrOut
Definition: algorithms.hpp:230
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:878
Interface class for the rbcPermute kernel.
Definition: algorithms.hpp:576
RBCPermute(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:1297
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:155
void write(RBCPermute::Memory mem=RBCPermute::Memory::D_IN_X, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:1507
rbc_dist_id * hPtrOutRID
Definition: algorithms.hpp:927
void init(unsigned int _cols, unsigned int _rows, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:1078
cl_uint * hPtrInO
Definition: algorithms.hpp:619
cl_float * hPtrInR
Definition: algorithms.hpp:1109
rbc_dist_id * hPtrOutIDp
Definition: algorithms.hpp:622
Interface class for the scan kernel.
Definition: algorithms.hpp:444
T * hPtrIn
Definition: algorithms.hpp:229
void * read(RBCPermute::Memory mem=RBCPermute::Memory::H_OUT_X_P, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:1551
Scan(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
void init(unsigned int _nx, unsigned int _nr, unsigned int _d, float _a=1.f, int _permID=0, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:1673
RBCComputeDists(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:54
Interface class for the reduce kernels.
Definition: algorithms.hpp:198
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:501
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:205
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:451
cl_float * hPtrOutQp
Definition: algorithms.hpp:929
cl_float * hPtrOutD
Definition: algorithms.hpp:130
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:255
void * read(RBCMin::Memory mem=RBCMin::Memory::H_OUT_ID, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:960
cl_float * hPtrOutXp
Definition: algorithms.hpp:756
cl_float * hPtrInX
Definition: algorithms.hpp:128
T * hPtrOut
Definition: algorithms.hpp:476
cl_uint * hPtrOutRnk
Definition: algorithms.hpp:750
Declarations of data types used by the Random Ball Cover data structure.
Offers classes which set up kernel execution parameters and provide interfaces for the handling of me...
Definition: algorithms.hpp:43
cl_float * hPtrOutQp
Definition: algorithms.hpp:1117
cl_float * hPtrOutNN
Definition: algorithms.hpp:1119
void write(RBCComputeDists::Memory mem=RBCComputeDists::Memory::D_IN_X, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:319
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:377
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:689
cl_uint * hPtrOutN
Definition: algorithms.hpp:351
Declares classes used by the OpenCL interface classes in cl_algo.
ReduceConfig
Enumerates configurations for the Reduce class.
Definition: algorithms.hpp:167
cl_float * hPtrInQ
Definition: algorithms.hpp:920
Interface class for searching for nearest neighbors, of a set of queries, in the Random Ball Cover da...
Definition: algorithms.hpp:826
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:1910
float getAlpha()
Gets the scaling factor .
Definition: algorithms.cpp:390
cl_uint * hPtrOutRnk
Definition: algorithms.hpp:349
Staging
Enumerates staging buffer configurations.
Definition: common.hpp:43
void write(RBCConstruct::Memory mem=RBCConstruct::Memory::D_IN_X, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:1836
rbc_dist_id * hPtrOutIDp
Definition: algorithms.hpp:757
void setAlpha(float _a)
Sets the scaling factor .
Definition: algorithms.cpp:1940
Struct holding a value and a key.
Definition: data_types.hpp:43
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:786
T * hPtrIn
Definition: algorithms.hpp:475
void * read(Scan::Memory mem=Scan::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:1242
void * read(RBCConstruct::Memory mem=RBCConstruct::Memory::H_OUT_X_P, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:1870
cl_uint * hPtrOutN
Definition: algorithms.hpp:752
cl_float * hPtrOutXp
Definition: algorithms.hpp:621
rbc_dist_id * hPtrOutID
Definition: algorithms.hpp:347
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr, bool config=false)
Executes the necessary kernels.
Definition: algorithms.hpp:969
cl_uint * hPtrInO
Definition: algorithms.hpp:923
cl_uint * hPtrInO
Definition: algorithms.hpp:1111
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:583
Definition: helper_funcs.hpp:44
ScanConfig
Enumerates configurations for the Scan class.
Definition: algorithms.hpp:413
void init(unsigned int _nx, unsigned int _nr, unsigned int _d=8, int _permID=0, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:1363
RBCConstruct(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:1595
rbc_dist_id * hPtrOutID
Definition: algorithms.hpp:748
unsigned int max_n
Definition: algorithms.hpp:933
rbc_dist_id * hPtrInID
Definition: algorithms.hpp:615
Interface class for the rbcComputeDists kernels.
Definition: algorithms.hpp:92
void * read(RBCComputeDists::Memory mem=RBCComputeDists::Memory::H_OUT_D, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:353
cl_float * hPtrInR
Definition: algorithms.hpp:129
Interface class for the rbcMinDists kernel.
Definition: algorithms.hpp:311
cl_float * hPtrInQ
Definition: algorithms.hpp:1108
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr, bool config=false)
Executes the necessary kernels.
Definition: algorithms.hpp:1158
void init(unsigned int _nx, unsigned int _nr, unsigned int _d=8, float _a=1.f, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:131
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:99
cl_float * hPtrOutNN
Definition: algorithms.hpp:931
cl_uint * hPtrInRnk
Definition: algorithms.hpp:617
rbc_dist_id * hPtrOutNNID
Definition: algorithms.hpp:930
cl_uint * hPtrInN
Definition: algorithms.hpp:1113
void * read(Reduce::Memory mem=Reduce::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:665
unsigned int max_n
Definition: algorithms.hpp:1121
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:648
cl_uint * hPtrOutO
Definition: algorithms.hpp:754
rbc_dist_id * hPtrOutNNID
Definition: algorithms.hpp:1118
cl_uint * hPtrInN
Definition: algorithms.hpp:925
cl_float * hPtrInD
Definition: algorithms.hpp:346
void write(Scan::Memory mem=Scan::Memory::D_IN, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:1213
void write(RBCMin::Memory mem=RBCMin::Memory::D_IN_D, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:932
Interface class for constructing the Random Ball Cover data structure.
Definition: algorithms.hpp:699
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:1266
float getAlpha()
Gets the scaling factor .
Definition: algorithms.cpp:1926
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:1579
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:991
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:378
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:318
RBCMin(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:712
rbc_dist_id * hPtrOutRID
Definition: algorithms.hpp:1115
void write(Reduce::Memory mem=Reduce::Memory::D_IN, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:636
void init(unsigned int _cols, unsigned int _rows, int _accCounters=1, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:767
KernelTypeC
Enumerates the kernels available for computing the array of distances in the construction step...
Definition: algorithms.hpp:53
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:706
cl_float * hPtrInXp
Definition: algorithms.hpp:1110
void init(unsigned int _cols, unsigned int _rows, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:510
RBCPermuteConfig
Enumerates configurations for the RBCPermute class.
Definition: algorithms.hpp:537
cl_float * hPtrInX
Definition: algorithms.hpp:746
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:1062
KernelTypeS
Enumerates the kernels available for computing the array of distances (Q-X[L]) during search...
Definition: algorithms.hpp:803
cl_float * hPtrInX
Definition: algorithms.hpp:614