|
Removed
Link Here
|
| 1 |
--- vcl/inc/unx/saltype.h 2019-12-05 20:59:23.000000000 +0100 |
| 2 |
+++ vcl/inc/unx/saltype.h 2020-03-17 18:23:05.585171000 +0100 |
| 3 |
@@ -18,8 +18,8 @@ |
| 4 |
public: |
| 5 |
explicit SalX11Screen(unsigned int nXScreen) : mnXScreen( nXScreen ) {} |
| 6 |
unsigned int getXScreen() const { return mnXScreen; } |
| 7 |
- bool operator==(const SalX11Screen &rOther) { return rOther.mnXScreen == mnXScreen; } |
| 8 |
- bool operator!=(const SalX11Screen &rOther) { return rOther.mnXScreen != mnXScreen; } |
| 9 |
+ bool operator==(const SalX11Screen &rOther) const { return rOther.mnXScreen == mnXScreen; } |
| 10 |
+ bool operator!=(const SalX11Screen &rOther) const { return rOther.mnXScreen != mnXScreen; } |
| 11 |
}; |
| 12 |
|
| 13 |
#endif // INCLUDED_VCL_INC_UNX_SALTYPE_H |
| 14 |
--- sd/source/ui/framework/factories/BasicPaneFactory.cxx 2019-12-05 20:59:23.000000000 +0100 |
| 15 |
+++ sd/source/ui/framework/factories/BasicPaneFactory.cxx 2020-03-17 20:51:22.331805000 +0100 |
| 16 |
@@ -324,7 +324,7 @@ |
| 17 |
void SAL_CALL BasicPaneFactory::disposing ( |
| 18 |
const lang::EventObject& rEventObject) |
| 19 |
{ |
| 20 |
- if (mxConfigurationControllerWeak == rEventObject.Source) |
| 21 |
+ if (mxConfigurationControllerWeak.get() == rEventObject.Source) |
| 22 |
{ |
| 23 |
mxConfigurationControllerWeak.clear(); |
| 24 |
} |
| 25 |
--- sd/inc/OutlinerIterator.hxx 2019-12-05 20:59:23.000000000 +0100 |
| 26 |
+++ sd/inc/OutlinerIterator.hxx 2020-03-17 21:20:36.906085000 +0100 |
| 27 |
@@ -122,7 +122,7 @@ |
| 28 |
@return |
| 29 |
Returns <TRUE/> when both iterators point to the same object. |
| 30 |
*/ |
| 31 |
- bool operator== (const Iterator& rIterator); |
| 32 |
+ bool operator== (const Iterator& rIterator) const; |
| 33 |
/** Test whether two iterators point to different objects. This is just |
| 34 |
the negation of the result of the equality operator. |
| 35 |
@param rIterator |
| 36 |
@@ -130,7 +130,7 @@ |
| 37 |
@return |
| 38 |
Returns <TRUE/> when both iterators point to the different objects. |
| 39 |
*/ |
| 40 |
- bool operator!= (const Iterator& rIterator); |
| 41 |
+ bool operator!= (const Iterator& rIterator) const; |
| 42 |
/** Reverse the direction of iteration. The position of the iterator is |
| 43 |
not changed. Thus calling this method twice returns to the old state. |
| 44 |
*/ |
| 45 |
--- sd/source/ui/view/OutlinerIterator.cxx.orig 2019-12-05 20:59:23.000000000 +0100 |
| 46 |
+++ sd/source/ui/view/OutlinerIterator.cxx 2020-03-17 21:24:11.082383000 +0100 |
| 47 |
@@ -110,7 +110,7 @@ |
| 48 |
return *this; |
| 49 |
} |
| 50 |
|
| 51 |
-bool Iterator::operator== (const Iterator& rIterator) |
| 52 |
+bool Iterator::operator== (const Iterator& rIterator) const |
| 53 |
{ |
| 54 |
if (!mxIterator || !rIterator.mxIterator) |
| 55 |
return mxIterator.get() == rIterator.mxIterator.get(); |
| 56 |
@@ -118,7 +118,7 @@ |
| 57 |
return *mxIterator == *rIterator.mxIterator; |
| 58 |
} |
| 59 |
|
| 60 |
-bool Iterator::operator!= (const Iterator& rIterator) |
| 61 |
+bool Iterator::operator!= (const Iterator& rIterator) const |
| 62 |
{ |
| 63 |
return ! operator==(rIterator); |
| 64 |
} |
| 65 |
--- compilerplugins/clang/simplifybool.cxx 2019-12-05 20:59:23.000000000 +0100 |
| 66 |
+++ compilerplugins/clang/simplifybool.cxx 2020-03-17 22:03:11.369300000 +0100 |
| 67 |
@@ -241,7 +241,30 @@ |
| 68 |
<< expr->getSourceRange(); |
| 69 |
return true; |
| 70 |
} |
| 71 |
- if (auto binaryOp = dyn_cast<BinaryOperator>(expr->getSubExpr()->IgnoreParenImpCasts())) { |
| 72 |
+ auto sub = expr->getSubExpr()->IgnoreParenImpCasts(); |
| 73 |
+ auto reversed = false; |
| 74 |
+#if CLANG_VERSION >= 100000 |
| 75 |
+ if (auto const rewritten = dyn_cast<CXXRewrittenBinaryOperator>(sub)) { |
| 76 |
+ if (rewritten->isReversed()) { |
| 77 |
+ if (rewritten->getOperator() == BO_EQ) { |
| 78 |
+ auto const sem = rewritten->getSemanticForm(); |
| 79 |
+ bool match; |
| 80 |
+ if (auto const op1 = dyn_cast<BinaryOperator>(sem)) { |
| 81 |
+ match = op1->getOpcode() == BO_EQ; |
| 82 |
+ } else if (auto const op2 = dyn_cast<CXXOperatorCallExpr>(sem)) { |
| 83 |
+ match = op2->getOperator() == OO_EqualEqual; |
| 84 |
+ } else { |
| 85 |
+ match = false; |
| 86 |
+ } |
| 87 |
+ if (match) { |
| 88 |
+ sub = sem; |
| 89 |
+ reversed = true; |
| 90 |
+ } |
| 91 |
+ } |
| 92 |
+ } |
| 93 |
+ } |
| 94 |
+#endif |
| 95 |
+ if (auto binaryOp = dyn_cast<BinaryOperator>(sub)) { |
| 96 |
// Ignore macros, otherwise |
| 97 |
// OSL_ENSURE(!b, ...); |
| 98 |
// triggers. |
| 99 |
@@ -289,7 +312,7 @@ |
| 100 |
<< binaryOp->getSourceRange(); |
| 101 |
} |
| 102 |
} |
| 103 |
- if (auto binaryOp = dyn_cast<CXXOperatorCallExpr>(expr->getSubExpr()->IgnoreParenImpCasts())) { |
| 104 |
+ if (auto binaryOp = dyn_cast<CXXOperatorCallExpr>(sub)) { |
| 105 |
// Ignore macros, otherwise |
| 106 |
// OSL_ENSURE(!b, ...); |
| 107 |
// triggers. |
| 108 |
@@ -301,8 +324,8 @@ |
| 109 |
if (!(op == OO_EqualEqual || op == OO_ExclaimEqual)) |
| 110 |
return true; |
| 111 |
BinaryOperator::Opcode negatedOpcode = BinaryOperator::negateComparisonOp(BinaryOperator::getOverloadedOpcode(op)); |
| 112 |
- auto lhs = binaryOp->getArg(0)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType(); |
| 113 |
- auto rhs = binaryOp->getArg(1)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType(); |
| 114 |
+ auto lhs = binaryOp->getArg(reversed ? 1 : 0)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType(); |
| 115 |
+ auto rhs = binaryOp->getArg(reversed ? 0 : 1)->IgnoreImpCasts()->getType()->getUnqualifiedDesugaredType(); |
| 116 |
auto const negOp = findOperator(compiler, negatedOpcode, lhs, rhs); |
| 117 |
if (!negOp) |
| 118 |
return true; |
| 119 |
@@ -323,8 +346,10 @@ |
| 120 |
<< expr->getSourceRange(); |
| 121 |
if (negOp != ASSUME_OPERATOR_EXISTS) |
| 122 |
report( |
| 123 |
- DiagnosticsEngine::Note, "the presumed corresponding negated operator is declared here", |
| 124 |
+ DiagnosticsEngine::Note, "the presumed corresponding negated operator for %0 and %1 is declared here", |
| 125 |
negOp->getLocation()) |
| 126 |
+ << binaryOp->getArg(reversed ? 1 : 0)->IgnoreImpCasts()->getType() |
| 127 |
+ << binaryOp->getArg(reversed ? 0 : 1)->IgnoreImpCasts()->getType() |
| 128 |
<< negOp->getSourceRange(); |
| 129 |
} |
| 130 |
return true; |
| 131 |
--- cui/source/tabpages/tpline.cxx 2019-12-05 20:59:23.000000000 +0100 |
| 132 |
+++ cui/source/tabpages/tpline.cxx 2020-03-17 22:06:49.493222000 +0100 |
| 133 |
@@ -491,7 +491,7 @@ |
| 134 |
else if( m_pLineEndList->Count() > static_cast<long>( nPos - 1 ) ) |
| 135 |
pItem.reset(new XLineStartItem( m_xLbStartStyle->get_active_text(), m_pLineEndList->GetLineEnd( nPos - 1 )->GetLineEnd() )); |
| 136 |
pOld = GetOldItem( *rAttrs, XATTR_LINESTART ); |
| 137 |
- if( pItem && ( !pOld || !( *static_cast<const XLineEndItem*>(pOld) == *pItem ) ) ) |
| 138 |
+ if( pItem && ( !pOld || *pOld != *pItem ) ) |
| 139 |
{ |
| 140 |
rAttrs->Put( *pItem ); |
| 141 |
bModified = true; |
| 142 |
--- sc/source/ui/view/viewfunc.cxx.orig 2019-12-05 20:59:23.000000000 +0100 |
| 143 |
+++ sc/source/ui/view/viewfunc.cxx 2020-03-17 23:58:50.978995000 +0100 |
| 144 |
@@ -958,7 +958,7 @@ |
| 145 |
|
| 146 |
// this should be intercepted by the pool: ?!??!?? |
| 147 |
|
| 148 |
- if (bFrame && rNewOuter == rOldOuter && rNewInner == rOldInner) |
| 149 |
+ if (bFrame && &rNewOuter == &rOldOuter && &rNewInner == &rOldInner) |
| 150 |
bFrame = false; |
| 151 |
|
| 152 |
bFrame = bFrame |
| 153 |
--- sc/source/core/opencl/formulagroupcl.cxx 2019-12-05 20:59:23.000000000 +0100 |
| 154 |
+++ sc/source/core/opencl/formulagroupcl.cxx 2020-03-18 00:44:08.091710000 +0100 |
| 155 |
@@ -1026,9 +1026,6 @@ |
| 156 |
/// Handling a Double Vector that is used as a sliding window input |
| 157 |
/// to either a sliding window average or sum-of-products |
| 158 |
/// Generate a sequential loop for reductions |
| 159 |
-class OpAverage; |
| 160 |
-class OpCount; |
| 161 |
- |
| 162 |
template<class Base> |
| 163 |
class DynamicKernelSlidingArgument : public Base |
| 164 |
{ |
| 165 |
@@ -1335,186 +1332,8 @@ |
| 166 |
} |
| 167 |
|
| 168 |
/// Emit the definition for the auxiliary reduction kernel |
| 169 |
- virtual void GenSlidingWindowFunction( std::stringstream& ss ) |
| 170 |
- { |
| 171 |
- if (!dynamic_cast<OpAverage*>(mpCodeGen.get())) |
| 172 |
- { |
| 173 |
- std::string name = Base::GetName(); |
| 174 |
- ss << "__kernel void " << name; |
| 175 |
- ss << "_reduction(__global double* A, " |
| 176 |
- "__global double *result,int arrayLength,int windowSize){\n"; |
| 177 |
- ss << " double tmp, current_result =" << |
| 178 |
- mpCodeGen->GetBottom(); |
| 179 |
- ss << ";\n"; |
| 180 |
- ss << " int writePos = get_group_id(1);\n"; |
| 181 |
- ss << " int lidx = get_local_id(0);\n"; |
| 182 |
- ss << " __local double shm_buf[256];\n"; |
| 183 |
- if (mpDVR->IsStartFixed()) |
| 184 |
- ss << " int offset = 0;\n"; |
| 185 |
- else // if (!mpDVR->IsStartFixed()) |
| 186 |
- ss << " int offset = get_group_id(1);\n"; |
| 187 |
- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 188 |
- ss << " int end = windowSize;\n"; |
| 189 |
- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 190 |
- ss << " int end = offset + windowSize;\n"; |
| 191 |
- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 192 |
- ss << " int end = windowSize + get_group_id(1);\n"; |
| 193 |
- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 194 |
- ss << " int end = windowSize;\n"; |
| 195 |
- ss << " end = min(end, arrayLength);\n"; |
| 196 |
+ virtual void GenSlidingWindowFunction( std::stringstream& ss ); |
| 197 |
|
| 198 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 199 |
- ss << " int loop = arrayLength/512 + 1;\n"; |
| 200 |
- ss << " for (int l=0; l<loop; l++){\n"; |
| 201 |
- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; |
| 202 |
- ss << " int loopOffset = l*512;\n"; |
| 203 |
- ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; |
| 204 |
- ss << " tmp = legalize(" << mpCodeGen->Gen2( |
| 205 |
- "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n"; |
| 206 |
- ss << " tmp = legalize(" << mpCodeGen->Gen2( |
| 207 |
- "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n"; |
| 208 |
- ss << " } else if ((loopOffset + lidx + offset) < end)\n"; |
| 209 |
- ss << " tmp = legalize(" << mpCodeGen->Gen2( |
| 210 |
- "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n"; |
| 211 |
- ss << " shm_buf[lidx] = tmp;\n"; |
| 212 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 213 |
- ss << " for (int i = 128; i >0; i/=2) {\n"; |
| 214 |
- ss << " if (lidx < i)\n"; |
| 215 |
- ss << " shm_buf[lidx] = "; |
| 216 |
- // Special case count |
| 217 |
- if (dynamic_cast<OpCount*>(mpCodeGen.get())) |
| 218 |
- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; |
| 219 |
- else |
| 220 |
- ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n"; |
| 221 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 222 |
- ss << " }\n"; |
| 223 |
- ss << " if (lidx == 0)\n"; |
| 224 |
- ss << " current_result ="; |
| 225 |
- if (dynamic_cast<OpCount*>(mpCodeGen.get())) |
| 226 |
- ss << "current_result + shm_buf[0]"; |
| 227 |
- else |
| 228 |
- ss << mpCodeGen->Gen2("current_result", "shm_buf[0]"); |
| 229 |
- ss << ";\n"; |
| 230 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 231 |
- ss << " }\n"; |
| 232 |
- ss << " if (lidx == 0)\n"; |
| 233 |
- ss << " result[writePos] = current_result;\n"; |
| 234 |
- ss << "}\n"; |
| 235 |
- } |
| 236 |
- else |
| 237 |
- { |
| 238 |
- std::string name = Base::GetName(); |
| 239 |
- /*sum reduction*/ |
| 240 |
- ss << "__kernel void " << name << "_sum"; |
| 241 |
- ss << "_reduction(__global double* A, " |
| 242 |
- "__global double *result,int arrayLength,int windowSize){\n"; |
| 243 |
- ss << " double tmp, current_result =" << |
| 244 |
- mpCodeGen->GetBottom(); |
| 245 |
- ss << ";\n"; |
| 246 |
- ss << " int writePos = get_group_id(1);\n"; |
| 247 |
- ss << " int lidx = get_local_id(0);\n"; |
| 248 |
- ss << " __local double shm_buf[256];\n"; |
| 249 |
- if (mpDVR->IsStartFixed()) |
| 250 |
- ss << " int offset = 0;\n"; |
| 251 |
- else // if (!mpDVR->IsStartFixed()) |
| 252 |
- ss << " int offset = get_group_id(1);\n"; |
| 253 |
- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 254 |
- ss << " int end = windowSize;\n"; |
| 255 |
- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 256 |
- ss << " int end = offset + windowSize;\n"; |
| 257 |
- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 258 |
- ss << " int end = windowSize + get_group_id(1);\n"; |
| 259 |
- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 260 |
- ss << " int end = windowSize;\n"; |
| 261 |
- ss << " end = min(end, arrayLength);\n"; |
| 262 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 263 |
- ss << " int loop = arrayLength/512 + 1;\n"; |
| 264 |
- ss << " for (int l=0; l<loop; l++){\n"; |
| 265 |
- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; |
| 266 |
- ss << " int loopOffset = l*512;\n"; |
| 267 |
- ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; |
| 268 |
- ss << " tmp = legalize("; |
| 269 |
- ss << "(A[loopOffset + lidx + offset]+ tmp)"; |
| 270 |
- ss << ", tmp);\n"; |
| 271 |
- ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)"; |
| 272 |
- ss << ", tmp);\n"; |
| 273 |
- ss << " } else if ((loopOffset + lidx + offset) < end)\n"; |
| 274 |
- ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)"; |
| 275 |
- ss << ", tmp);\n"; |
| 276 |
- ss << " shm_buf[lidx] = tmp;\n"; |
| 277 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 278 |
- ss << " for (int i = 128; i >0; i/=2) {\n"; |
| 279 |
- ss << " if (lidx < i)\n"; |
| 280 |
- ss << " shm_buf[lidx] = "; |
| 281 |
- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; |
| 282 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 283 |
- ss << " }\n"; |
| 284 |
- ss << " if (lidx == 0)\n"; |
| 285 |
- ss << " current_result ="; |
| 286 |
- ss << "current_result + shm_buf[0]"; |
| 287 |
- ss << ";\n"; |
| 288 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 289 |
- ss << " }\n"; |
| 290 |
- ss << " if (lidx == 0)\n"; |
| 291 |
- ss << " result[writePos] = current_result;\n"; |
| 292 |
- ss << "}\n"; |
| 293 |
- /*count reduction*/ |
| 294 |
- ss << "__kernel void " << name << "_count"; |
| 295 |
- ss << "_reduction(__global double* A, " |
| 296 |
- "__global double *result,int arrayLength,int windowSize){\n"; |
| 297 |
- ss << " double tmp, current_result =" << |
| 298 |
- mpCodeGen->GetBottom(); |
| 299 |
- ss << ";\n"; |
| 300 |
- ss << " int writePos = get_group_id(1);\n"; |
| 301 |
- ss << " int lidx = get_local_id(0);\n"; |
| 302 |
- ss << " __local double shm_buf[256];\n"; |
| 303 |
- if (mpDVR->IsStartFixed()) |
| 304 |
- ss << " int offset = 0;\n"; |
| 305 |
- else // if (!mpDVR->IsStartFixed()) |
| 306 |
- ss << " int offset = get_group_id(1);\n"; |
| 307 |
- if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 308 |
- ss << " int end = windowSize;\n"; |
| 309 |
- else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 310 |
- ss << " int end = offset + windowSize;\n"; |
| 311 |
- else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 312 |
- ss << " int end = windowSize + get_group_id(1);\n"; |
| 313 |
- else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 314 |
- ss << " int end = windowSize;\n"; |
| 315 |
- ss << " end = min(end, arrayLength);\n"; |
| 316 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 317 |
- ss << " int loop = arrayLength/512 + 1;\n"; |
| 318 |
- ss << " for (int l=0; l<loop; l++){\n"; |
| 319 |
- ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; |
| 320 |
- ss << " int loopOffset = l*512;\n"; |
| 321 |
- ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; |
| 322 |
- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)"; |
| 323 |
- ss << ", tmp);\n"; |
| 324 |
- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)"; |
| 325 |
- ss << ", tmp);\n"; |
| 326 |
- ss << " } else if ((loopOffset + lidx + offset) < end)\n"; |
| 327 |
- ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)"; |
| 328 |
- ss << ", tmp);\n"; |
| 329 |
- ss << " shm_buf[lidx] = tmp;\n"; |
| 330 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 331 |
- ss << " for (int i = 128; i >0; i/=2) {\n"; |
| 332 |
- ss << " if (lidx < i)\n"; |
| 333 |
- ss << " shm_buf[lidx] = "; |
| 334 |
- ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; |
| 335 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 336 |
- ss << " }\n"; |
| 337 |
- ss << " if (lidx == 0)\n"; |
| 338 |
- ss << " current_result ="; |
| 339 |
- ss << "current_result + shm_buf[0];"; |
| 340 |
- ss << ";\n"; |
| 341 |
- ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 342 |
- ss << " }\n"; |
| 343 |
- ss << " if (lidx == 0)\n"; |
| 344 |
- ss << " result[writePos] = current_result;\n"; |
| 345 |
- ss << "}\n"; |
| 346 |
- } |
| 347 |
- |
| 348 |
- } |
| 349 |
- |
| 350 |
virtual std::string GenSlidingWindowDeclRef( bool ) const |
| 351 |
{ |
| 352 |
std::stringstream ss; |
| 353 |
@@ -1527,195 +1346,10 @@ |
| 354 |
|
| 355 |
/// Controls how the elements in the DoubleVectorRef are traversed |
| 356 |
size_t GenReductionLoopHeader( |
| 357 |
- std::stringstream& ss, int nResultSize, bool& needBody ) |
| 358 |
- { |
| 359 |
- assert(mpDVR); |
| 360 |
- size_t nCurWindowSize = mpDVR->GetRefRowSize(); |
| 361 |
- std::string temp = Base::GetName() + "[gid0]"; |
| 362 |
- ss << "tmp = "; |
| 363 |
- // Special case count |
| 364 |
- if (dynamic_cast<OpAverage*>(mpCodeGen.get())) |
| 365 |
- { |
| 366 |
- ss << mpCodeGen->Gen2(temp, "tmp") << ";\n"; |
| 367 |
- ss << "nCount = nCount-1;\n"; |
| 368 |
- ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/ |
| 369 |
- ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n"; |
| 370 |
- } |
| 371 |
- else if (dynamic_cast<OpCount*>(mpCodeGen.get())) |
| 372 |
- ss << temp << "+ tmp"; |
| 373 |
- else |
| 374 |
- ss << mpCodeGen->Gen2(temp, "tmp"); |
| 375 |
- ss << ";\n\t"; |
| 376 |
- needBody = false; |
| 377 |
- return nCurWindowSize; |
| 378 |
- } |
| 379 |
+ std::stringstream& ss, int nResultSize, bool& needBody ); |
| 380 |
|
| 381 |
- virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram ) |
| 382 |
- { |
| 383 |
- assert(Base::mpClmem == nullptr); |
| 384 |
+ virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram ); |
| 385 |
|
| 386 |
- openclwrapper::KernelEnv kEnv; |
| 387 |
- openclwrapper::setKernelEnv(&kEnv); |
| 388 |
- cl_int err; |
| 389 |
- size_t nInput = mpDVR->GetArrayLength(); |
| 390 |
- size_t nCurWindowSize = mpDVR->GetRefRowSize(); |
| 391 |
- // create clmem buffer |
| 392 |
- if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr) |
| 393 |
- throw Unhandled(__FILE__, __LINE__); |
| 394 |
- double* pHostBuffer = const_cast<double*>( |
| 395 |
- mpDVR->GetArrays()[Base::mnIndex].mpNumericArray); |
| 396 |
- size_t szHostBuffer = nInput * sizeof(double); |
| 397 |
- Base::mpClmem = clCreateBuffer(kEnv.mpkContext, |
| 398 |
- cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR, |
| 399 |
- szHostBuffer, |
| 400 |
- pHostBuffer, &err); |
| 401 |
- SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer); |
| 402 |
- |
| 403 |
- mpClmem2 = clCreateBuffer(kEnv.mpkContext, |
| 404 |
- CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, |
| 405 |
- sizeof(double) * w, nullptr, nullptr); |
| 406 |
- if (CL_SUCCESS != err) |
| 407 |
- throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); |
| 408 |
- SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w)); |
| 409 |
- |
| 410 |
- // reproduce the reduction function name |
| 411 |
- std::string kernelName; |
| 412 |
- if (!dynamic_cast<OpAverage*>(mpCodeGen.get())) |
| 413 |
- kernelName = Base::GetName() + "_reduction"; |
| 414 |
- else |
| 415 |
- kernelName = Base::GetName() + "_sum_reduction"; |
| 416 |
- cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err); |
| 417 |
- if (err != CL_SUCCESS) |
| 418 |
- throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); |
| 419 |
- SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram); |
| 420 |
- |
| 421 |
- // set kernel arg of reduction kernel |
| 422 |
- // TODO(Wei Wei): use unique name for kernel |
| 423 |
- cl_mem buf = Base::GetCLBuffer(); |
| 424 |
- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); |
| 425 |
- err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), |
| 426 |
- static_cast<void*>(&buf)); |
| 427 |
- if (CL_SUCCESS != err) |
| 428 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 429 |
- |
| 430 |
- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); |
| 431 |
- err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2); |
| 432 |
- if (CL_SUCCESS != err) |
| 433 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 434 |
- |
| 435 |
- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); |
| 436 |
- err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput)); |
| 437 |
- if (CL_SUCCESS != err) |
| 438 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 439 |
- |
| 440 |
- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); |
| 441 |
- err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize)); |
| 442 |
- if (CL_SUCCESS != err) |
| 443 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 444 |
- |
| 445 |
- // set work group size and execute |
| 446 |
- size_t global_work_size[] = { 256, static_cast<size_t>(w) }; |
| 447 |
- size_t const local_work_size[] = { 256, 1 }; |
| 448 |
- SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); |
| 449 |
- err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, |
| 450 |
- global_work_size, local_work_size, 0, nullptr, nullptr); |
| 451 |
- if (CL_SUCCESS != err) |
| 452 |
- throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); |
| 453 |
- err = clFinish(kEnv.mpkCmdQueue); |
| 454 |
- if (CL_SUCCESS != err) |
| 455 |
- throw OpenCLError("clFinish", err, __FILE__, __LINE__); |
| 456 |
- if (dynamic_cast<OpAverage*>(mpCodeGen.get())) |
| 457 |
- { |
| 458 |
- /*average need more reduction kernel for count computing*/ |
| 459 |
- std::unique_ptr<double[]> pAllBuffer(new double[2 * w]); |
| 460 |
- double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue, |
| 461 |
- mpClmem2, |
| 462 |
- CL_TRUE, CL_MAP_READ, 0, |
| 463 |
- sizeof(double) * w, 0, nullptr, nullptr, |
| 464 |
- &err)); |
| 465 |
- if (err != CL_SUCCESS) |
| 466 |
- throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__); |
| 467 |
- |
| 468 |
- for (int i = 0; i < w; i++) |
| 469 |
- pAllBuffer[i] = resbuf[i]; |
| 470 |
- err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr); |
| 471 |
- if (err != CL_SUCCESS) |
| 472 |
- throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__); |
| 473 |
- |
| 474 |
- kernelName = Base::GetName() + "_count_reduction"; |
| 475 |
- redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err); |
| 476 |
- if (err != CL_SUCCESS) |
| 477 |
- throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); |
| 478 |
- SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram); |
| 479 |
- |
| 480 |
- // set kernel arg of reduction kernel |
| 481 |
- buf = Base::GetCLBuffer(); |
| 482 |
- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); |
| 483 |
- err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), |
| 484 |
- static_cast<void*>(&buf)); |
| 485 |
- if (CL_SUCCESS != err) |
| 486 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 487 |
- |
| 488 |
- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); |
| 489 |
- err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2); |
| 490 |
- if (CL_SUCCESS != err) |
| 491 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 492 |
- |
| 493 |
- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); |
| 494 |
- err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput)); |
| 495 |
- if (CL_SUCCESS != err) |
| 496 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 497 |
- |
| 498 |
- SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); |
| 499 |
- err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize)); |
| 500 |
- if (CL_SUCCESS != err) |
| 501 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 502 |
- |
| 503 |
- // set work group size and execute |
| 504 |
- size_t global_work_size1[] = { 256, static_cast<size_t>(w) }; |
| 505 |
- size_t const local_work_size1[] = { 256, 1 }; |
| 506 |
- SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel); |
| 507 |
- err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, |
| 508 |
- global_work_size1, local_work_size1, 0, nullptr, nullptr); |
| 509 |
- if (CL_SUCCESS != err) |
| 510 |
- throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); |
| 511 |
- err = clFinish(kEnv.mpkCmdQueue); |
| 512 |
- if (CL_SUCCESS != err) |
| 513 |
- throw OpenCLError("clFinish", err, __FILE__, __LINE__); |
| 514 |
- resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue, |
| 515 |
- mpClmem2, |
| 516 |
- CL_TRUE, CL_MAP_READ, 0, |
| 517 |
- sizeof(double) * w, 0, nullptr, nullptr, |
| 518 |
- &err)); |
| 519 |
- if (err != CL_SUCCESS) |
| 520 |
- throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__); |
| 521 |
- for (int i = 0; i < w; i++) |
| 522 |
- pAllBuffer[i + w] = resbuf[i]; |
| 523 |
- err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr); |
| 524 |
- // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails? |
| 525 |
- if (CL_SUCCESS != err) |
| 526 |
- SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err)); |
| 527 |
- if (mpClmem2) |
| 528 |
- { |
| 529 |
- err = clReleaseMemObject(mpClmem2); |
| 530 |
- SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err)); |
| 531 |
- mpClmem2 = nullptr; |
| 532 |
- } |
| 533 |
- mpClmem2 = clCreateBuffer(kEnv.mpkContext, |
| 534 |
- cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR, |
| 535 |
- w * sizeof(double) * 2, pAllBuffer.get(), &err); |
| 536 |
- if (CL_SUCCESS != err) |
| 537 |
- throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); |
| 538 |
- SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get()); |
| 539 |
- } |
| 540 |
- // set kernel arg |
| 541 |
- SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2); |
| 542 |
- err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2); |
| 543 |
- if (CL_SUCCESS != err) |
| 544 |
- throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 545 |
- return 1; |
| 546 |
- } |
| 547 |
- |
| 548 |
~ParallelReductionVectorRef() |
| 549 |
{ |
| 550 |
if (mpClmem2) |
| 551 |
@@ -2314,6 +1948,380 @@ |
| 552 |
} |
| 553 |
virtual std::string BinFuncName() const override { return "fsop"; } |
| 554 |
}; |
| 555 |
+ |
| 556 |
+template<class Base> |
| 557 |
+void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss ) |
| 558 |
+{ |
| 559 |
+ if (!dynamic_cast<OpAverage*>(mpCodeGen.get())) |
| 560 |
+ { |
| 561 |
+ std::string name = Base::GetName(); |
| 562 |
+ ss << "__kernel void " << name; |
| 563 |
+ ss << "_reduction(__global double* A, " |
| 564 |
+ "__global double *result,int arrayLength,int windowSize){\n"; |
| 565 |
+ ss << " double tmp, current_result =" << |
| 566 |
+ mpCodeGen->GetBottom(); |
| 567 |
+ ss << ";\n"; |
| 568 |
+ ss << " int writePos = get_group_id(1);\n"; |
| 569 |
+ ss << " int lidx = get_local_id(0);\n"; |
| 570 |
+ ss << " __local double shm_buf[256];\n"; |
| 571 |
+ if (mpDVR->IsStartFixed()) |
| 572 |
+ ss << " int offset = 0;\n"; |
| 573 |
+ else // if (!mpDVR->IsStartFixed()) |
| 574 |
+ ss << " int offset = get_group_id(1);\n"; |
| 575 |
+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 576 |
+ ss << " int end = windowSize;\n"; |
| 577 |
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 578 |
+ ss << " int end = offset + windowSize;\n"; |
| 579 |
+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 580 |
+ ss << " int end = windowSize + get_group_id(1);\n"; |
| 581 |
+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 582 |
+ ss << " int end = windowSize;\n"; |
| 583 |
+ ss << " end = min(end, arrayLength);\n"; |
| 584 |
+ |
| 585 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 586 |
+ ss << " int loop = arrayLength/512 + 1;\n"; |
| 587 |
+ ss << " for (int l=0; l<loop; l++){\n"; |
| 588 |
+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; |
| 589 |
+ ss << " int loopOffset = l*512;\n"; |
| 590 |
+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; |
| 591 |
+ ss << " tmp = legalize(" << mpCodeGen->Gen2( |
| 592 |
+ "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n"; |
| 593 |
+ ss << " tmp = legalize(" << mpCodeGen->Gen2( |
| 594 |
+ "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n"; |
| 595 |
+ ss << " } else if ((loopOffset + lidx + offset) < end)\n"; |
| 596 |
+ ss << " tmp = legalize(" << mpCodeGen->Gen2( |
| 597 |
+ "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n"; |
| 598 |
+ ss << " shm_buf[lidx] = tmp;\n"; |
| 599 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 600 |
+ ss << " for (int i = 128; i >0; i/=2) {\n"; |
| 601 |
+ ss << " if (lidx < i)\n"; |
| 602 |
+ ss << " shm_buf[lidx] = "; |
| 603 |
+ // Special case count |
| 604 |
+ if (dynamic_cast<OpCount*>(mpCodeGen.get())) |
| 605 |
+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; |
| 606 |
+ else |
| 607 |
+ ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n"; |
| 608 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 609 |
+ ss << " }\n"; |
| 610 |
+ ss << " if (lidx == 0)\n"; |
| 611 |
+ ss << " current_result ="; |
| 612 |
+ if (dynamic_cast<OpCount*>(mpCodeGen.get())) |
| 613 |
+ ss << "current_result + shm_buf[0]"; |
| 614 |
+ else |
| 615 |
+ ss << mpCodeGen->Gen2("current_result", "shm_buf[0]"); |
| 616 |
+ ss << ";\n"; |
| 617 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 618 |
+ ss << " }\n"; |
| 619 |
+ ss << " if (lidx == 0)\n"; |
| 620 |
+ ss << " result[writePos] = current_result;\n"; |
| 621 |
+ ss << "}\n"; |
| 622 |
+ } |
| 623 |
+ else |
| 624 |
+ { |
| 625 |
+ std::string name = Base::GetName(); |
| 626 |
+ /*sum reduction*/ |
| 627 |
+ ss << "__kernel void " << name << "_sum"; |
| 628 |
+ ss << "_reduction(__global double* A, " |
| 629 |
+ "__global double *result,int arrayLength,int windowSize){\n"; |
| 630 |
+ ss << " double tmp, current_result =" << |
| 631 |
+ mpCodeGen->GetBottom(); |
| 632 |
+ ss << ";\n"; |
| 633 |
+ ss << " int writePos = get_group_id(1);\n"; |
| 634 |
+ ss << " int lidx = get_local_id(0);\n"; |
| 635 |
+ ss << " __local double shm_buf[256];\n"; |
| 636 |
+ if (mpDVR->IsStartFixed()) |
| 637 |
+ ss << " int offset = 0;\n"; |
| 638 |
+ else // if (!mpDVR->IsStartFixed()) |
| 639 |
+ ss << " int offset = get_group_id(1);\n"; |
| 640 |
+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 641 |
+ ss << " int end = windowSize;\n"; |
| 642 |
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 643 |
+ ss << " int end = offset + windowSize;\n"; |
| 644 |
+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 645 |
+ ss << " int end = windowSize + get_group_id(1);\n"; |
| 646 |
+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 647 |
+ ss << " int end = windowSize;\n"; |
| 648 |
+ ss << " end = min(end, arrayLength);\n"; |
| 649 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 650 |
+ ss << " int loop = arrayLength/512 + 1;\n"; |
| 651 |
+ ss << " for (int l=0; l<loop; l++){\n"; |
| 652 |
+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; |
| 653 |
+ ss << " int loopOffset = l*512;\n"; |
| 654 |
+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; |
| 655 |
+ ss << " tmp = legalize("; |
| 656 |
+ ss << "(A[loopOffset + lidx + offset]+ tmp)"; |
| 657 |
+ ss << ", tmp);\n"; |
| 658 |
+ ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)"; |
| 659 |
+ ss << ", tmp);\n"; |
| 660 |
+ ss << " } else if ((loopOffset + lidx + offset) < end)\n"; |
| 661 |
+ ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)"; |
| 662 |
+ ss << ", tmp);\n"; |
| 663 |
+ ss << " shm_buf[lidx] = tmp;\n"; |
| 664 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 665 |
+ ss << " for (int i = 128; i >0; i/=2) {\n"; |
| 666 |
+ ss << " if (lidx < i)\n"; |
| 667 |
+ ss << " shm_buf[lidx] = "; |
| 668 |
+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; |
| 669 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 670 |
+ ss << " }\n"; |
| 671 |
+ ss << " if (lidx == 0)\n"; |
| 672 |
+ ss << " current_result ="; |
| 673 |
+ ss << "current_result + shm_buf[0]"; |
| 674 |
+ ss << ";\n"; |
| 675 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 676 |
+ ss << " }\n"; |
| 677 |
+ ss << " if (lidx == 0)\n"; |
| 678 |
+ ss << " result[writePos] = current_result;\n"; |
| 679 |
+ ss << "}\n"; |
| 680 |
+ /*count reduction*/ |
| 681 |
+ ss << "__kernel void " << name << "_count"; |
| 682 |
+ ss << "_reduction(__global double* A, " |
| 683 |
+ "__global double *result,int arrayLength,int windowSize){\n"; |
| 684 |
+ ss << " double tmp, current_result =" << |
| 685 |
+ mpCodeGen->GetBottom(); |
| 686 |
+ ss << ";\n"; |
| 687 |
+ ss << " int writePos = get_group_id(1);\n"; |
| 688 |
+ ss << " int lidx = get_local_id(0);\n"; |
| 689 |
+ ss << " __local double shm_buf[256];\n"; |
| 690 |
+ if (mpDVR->IsStartFixed()) |
| 691 |
+ ss << " int offset = 0;\n"; |
| 692 |
+ else // if (!mpDVR->IsStartFixed()) |
| 693 |
+ ss << " int offset = get_group_id(1);\n"; |
| 694 |
+ if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 695 |
+ ss << " int end = windowSize;\n"; |
| 696 |
+ else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 697 |
+ ss << " int end = offset + windowSize;\n"; |
| 698 |
+ else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) |
| 699 |
+ ss << " int end = windowSize + get_group_id(1);\n"; |
| 700 |
+ else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) |
| 701 |
+ ss << " int end = windowSize;\n"; |
| 702 |
+ ss << " end = min(end, arrayLength);\n"; |
| 703 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 704 |
+ ss << " int loop = arrayLength/512 + 1;\n"; |
| 705 |
+ ss << " for (int l=0; l<loop; l++){\n"; |
| 706 |
+ ss << " tmp = " << mpCodeGen->GetBottom() << ";\n"; |
| 707 |
+ ss << " int loopOffset = l*512;\n"; |
| 708 |
+ ss << " if((loopOffset + lidx + offset + 256) < end) {\n"; |
| 709 |
+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)"; |
| 710 |
+ ss << ", tmp);\n"; |
| 711 |
+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)"; |
| 712 |
+ ss << ", tmp);\n"; |
| 713 |
+ ss << " } else if ((loopOffset + lidx + offset) < end)\n"; |
| 714 |
+ ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)"; |
| 715 |
+ ss << ", tmp);\n"; |
| 716 |
+ ss << " shm_buf[lidx] = tmp;\n"; |
| 717 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 718 |
+ ss << " for (int i = 128; i >0; i/=2) {\n"; |
| 719 |
+ ss << " if (lidx < i)\n"; |
| 720 |
+ ss << " shm_buf[lidx] = "; |
| 721 |
+ ss << "shm_buf[lidx] + shm_buf[lidx + i];\n"; |
| 722 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 723 |
+ ss << " }\n"; |
| 724 |
+ ss << " if (lidx == 0)\n"; |
| 725 |
+ ss << " current_result ="; |
| 726 |
+ ss << "current_result + shm_buf[0];"; |
| 727 |
+ ss << ";\n"; |
| 728 |
+ ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; |
| 729 |
+ ss << " }\n"; |
| 730 |
+ ss << " if (lidx == 0)\n"; |
| 731 |
+ ss << " result[writePos] = current_result;\n"; |
| 732 |
+ ss << "}\n"; |
| 733 |
+ } |
| 734 |
+ |
| 735 |
+} |
| 736 |
+ |
| 737 |
+template<class Base> |
| 738 |
+size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader( |
| 739 |
+ std::stringstream& ss, int nResultSize, bool& needBody ) |
| 740 |
+{ |
| 741 |
+ assert(mpDVR); |
| 742 |
+ size_t nCurWindowSize = mpDVR->GetRefRowSize(); |
| 743 |
+ std::string temp = Base::GetName() + "[gid0]"; |
| 744 |
+ ss << "tmp = "; |
| 745 |
+ // Special case count |
| 746 |
+ if (dynamic_cast<OpAverage*>(mpCodeGen.get())) |
| 747 |
+ { |
| 748 |
+ ss << mpCodeGen->Gen2(temp, "tmp") << ";\n"; |
| 749 |
+ ss << "nCount = nCount-1;\n"; |
| 750 |
+ ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/ |
| 751 |
+ ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n"; |
| 752 |
+ } |
| 753 |
+ else if (dynamic_cast<OpCount*>(mpCodeGen.get())) |
| 754 |
+ ss << temp << "+ tmp"; |
| 755 |
+ else |
| 756 |
+ ss << mpCodeGen->Gen2(temp, "tmp"); |
| 757 |
+ ss << ";\n\t"; |
| 758 |
+ needBody = false; |
| 759 |
+ return nCurWindowSize; |
| 760 |
+} |
| 761 |
+ |
| 762 |
+template<class Base> |
| 763 |
+size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram ) |
| 764 |
+{ |
| 765 |
+ assert(Base::mpClmem == nullptr); |
| 766 |
+ |
| 767 |
+ openclwrapper::KernelEnv kEnv; |
| 768 |
+ openclwrapper::setKernelEnv(&kEnv); |
| 769 |
+ cl_int err; |
| 770 |
+ size_t nInput = mpDVR->GetArrayLength(); |
| 771 |
+ size_t nCurWindowSize = mpDVR->GetRefRowSize(); |
| 772 |
+ // create clmem buffer |
| 773 |
+ if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr) |
| 774 |
+ throw Unhandled(__FILE__, __LINE__); |
| 775 |
+ double* pHostBuffer = const_cast<double*>( |
| 776 |
+ mpDVR->GetArrays()[Base::mnIndex].mpNumericArray); |
| 777 |
+ size_t szHostBuffer = nInput * sizeof(double); |
| 778 |
+ Base::mpClmem = clCreateBuffer(kEnv.mpkContext, |
| 779 |
+ cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR, |
| 780 |
+ szHostBuffer, |
| 781 |
+ pHostBuffer, &err); |
| 782 |
+ SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer); |
| 783 |
+ |
| 784 |
+ mpClmem2 = clCreateBuffer(kEnv.mpkContext, |
| 785 |
+ CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, |
| 786 |
+ sizeof(double) * w, nullptr, nullptr); |
| 787 |
+ if (CL_SUCCESS != err) |
| 788 |
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); |
| 789 |
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w)); |
| 790 |
+ |
| 791 |
+ // reproduce the reduction function name |
| 792 |
+ std::string kernelName; |
| 793 |
+ if (!dynamic_cast<OpAverage*>(mpCodeGen.get())) |
| 794 |
+ kernelName = Base::GetName() + "_reduction"; |
| 795 |
+ else |
| 796 |
+ kernelName = Base::GetName() + "_sum_reduction"; |
| 797 |
+ cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err); |
| 798 |
+ if (err != CL_SUCCESS) |
| 799 |
+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); |
| 800 |
+ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram); |
| 801 |
+ |
| 802 |
+ // set kernel arg of reduction kernel |
| 803 |
+ // TODO(Wei Wei): use unique name for kernel |
| 804 |
+ cl_mem buf = Base::GetCLBuffer(); |
| 805 |
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); |
| 806 |
+ err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), |
| 807 |
+ static_cast<void*>(&buf)); |
| 808 |
+ if (CL_SUCCESS != err) |
| 809 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 810 |
+ |
| 811 |
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); |
| 812 |
+ err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2); |
| 813 |
+ if (CL_SUCCESS != err) |
| 814 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 815 |
+ |
| 816 |
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); |
| 817 |
+ err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput)); |
| 818 |
+ if (CL_SUCCESS != err) |
| 819 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 820 |
+ |
| 821 |
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); |
| 822 |
+ err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize)); |
| 823 |
+ if (CL_SUCCESS != err) |
| 824 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 825 |
+ |
| 826 |
+ // set work group size and execute |
| 827 |
+ size_t global_work_size[] = { 256, static_cast<size_t>(w) }; |
| 828 |
+ size_t const local_work_size[] = { 256, 1 }; |
| 829 |
+ SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel); |
| 830 |
+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, |
| 831 |
+ global_work_size, local_work_size, 0, nullptr, nullptr); |
| 832 |
+ if (CL_SUCCESS != err) |
| 833 |
+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); |
| 834 |
+ err = clFinish(kEnv.mpkCmdQueue); |
| 835 |
+ if (CL_SUCCESS != err) |
| 836 |
+ throw OpenCLError("clFinish", err, __FILE__, __LINE__); |
| 837 |
+ if (dynamic_cast<OpAverage*>(mpCodeGen.get())) |
| 838 |
+ { |
| 839 |
+ /*average need more reduction kernel for count computing*/ |
| 840 |
+ std::unique_ptr<double[]> pAllBuffer(new double[2 * w]); |
| 841 |
+ double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue, |
| 842 |
+ mpClmem2, |
| 843 |
+ CL_TRUE, CL_MAP_READ, 0, |
| 844 |
+ sizeof(double) * w, 0, nullptr, nullptr, |
| 845 |
+ &err)); |
| 846 |
+ if (err != CL_SUCCESS) |
| 847 |
+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__); |
| 848 |
+ |
| 849 |
+ for (int i = 0; i < w; i++) |
| 850 |
+ pAllBuffer[i] = resbuf[i]; |
| 851 |
+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr); |
| 852 |
+ if (err != CL_SUCCESS) |
| 853 |
+ throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__); |
| 854 |
+ |
| 855 |
+ kernelName = Base::GetName() + "_count_reduction"; |
| 856 |
+ redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err); |
| 857 |
+ if (err != CL_SUCCESS) |
| 858 |
+ throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); |
| 859 |
+ SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram); |
| 860 |
+ |
| 861 |
+ // set kernel arg of reduction kernel |
| 862 |
+ buf = Base::GetCLBuffer(); |
| 863 |
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf); |
| 864 |
+ err = clSetKernelArg(redKernel, 0, sizeof(cl_mem), |
| 865 |
+ static_cast<void*>(&buf)); |
| 866 |
+ if (CL_SUCCESS != err) |
| 867 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 868 |
+ |
| 869 |
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2); |
| 870 |
+ err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2); |
| 871 |
+ if (CL_SUCCESS != err) |
| 872 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 873 |
+ |
| 874 |
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput); |
| 875 |
+ err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput)); |
| 876 |
+ if (CL_SUCCESS != err) |
| 877 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 878 |
+ |
| 879 |
+ SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize); |
| 880 |
+ err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize)); |
| 881 |
+ if (CL_SUCCESS != err) |
| 882 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 883 |
+ |
| 884 |
+ // set work group size and execute |
| 885 |
+ size_t global_work_size1[] = { 256, static_cast<size_t>(w) }; |
| 886 |
+ size_t const local_work_size1[] = { 256, 1 }; |
| 887 |
+ SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel); |
| 888 |
+ err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, |
| 889 |
+ global_work_size1, local_work_size1, 0, nullptr, nullptr); |
| 890 |
+ if (CL_SUCCESS != err) |
| 891 |
+ throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); |
| 892 |
+ err = clFinish(kEnv.mpkCmdQueue); |
| 893 |
+ if (CL_SUCCESS != err) |
| 894 |
+ throw OpenCLError("clFinish", err, __FILE__, __LINE__); |
| 895 |
+ resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue, |
| 896 |
+ mpClmem2, |
| 897 |
+ CL_TRUE, CL_MAP_READ, 0, |
| 898 |
+ sizeof(double) * w, 0, nullptr, nullptr, |
| 899 |
+ &err)); |
| 900 |
+ if (err != CL_SUCCESS) |
| 901 |
+ throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__); |
| 902 |
+ for (int i = 0; i < w; i++) |
| 903 |
+ pAllBuffer[i + w] = resbuf[i]; |
| 904 |
+ err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr); |
| 905 |
+ // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails? |
| 906 |
+ if (CL_SUCCESS != err) |
| 907 |
+ SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err)); |
| 908 |
+ if (mpClmem2) |
| 909 |
+ { |
| 910 |
+ err = clReleaseMemObject(mpClmem2); |
| 911 |
+ SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err)); |
| 912 |
+ mpClmem2 = nullptr; |
| 913 |
+ } |
| 914 |
+ mpClmem2 = clCreateBuffer(kEnv.mpkContext, |
| 915 |
+ cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR, |
| 916 |
+ w * sizeof(double) * 2, pAllBuffer.get(), &err); |
| 917 |
+ if (CL_SUCCESS != err) |
| 918 |
+ throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); |
| 919 |
+ SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get()); |
| 920 |
+ } |
| 921 |
+ // set kernel arg |
| 922 |
+ SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2); |
| 923 |
+ err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2); |
| 924 |
+ if (CL_SUCCESS != err) |
| 925 |
+ throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); |
| 926 |
+ return 1; |
| 927 |
+} |
| 928 |
+ |
| 929 |
namespace { |
| 930 |
struct SumIfsArgs |
| 931 |
{ |