mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
fix issue 320
This commit is contained in:
parent
4fe9204683
commit
df2edbe3ab
@ -47,7 +47,7 @@ __global__ void sumAlongColumns(Tensor<T, 1, true> input,
|
|||||||
|
|
||||||
if (endRow) {
|
if (endRow) {
|
||||||
for (int row = rowStart; row < output.getSize(0); ++row) {
|
for (int row = rowStart; row < output.getSize(0); ++row) {
|
||||||
T out = output[row][col].ldg();
|
T out = output[row][col];
|
||||||
out = Math<T>::add(out, val);
|
out = Math<T>::add(out, val);
|
||||||
output[row][col] = out;
|
output[row][col] = out;
|
||||||
}
|
}
|
||||||
@ -57,7 +57,7 @@ __global__ void sumAlongColumns(Tensor<T, 1, true> input,
|
|||||||
for (int row = rowStart; row < rowEnd; row += kRowUnroll) {
|
for (int row = rowStart; row < rowEnd; row += kRowUnroll) {
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < kRowUnroll; ++i) {
|
for (int i = 0; i < kRowUnroll; ++i) {
|
||||||
rows[i] = output[row + i][col].ldg();
|
rows[i] = output[row + i][col];
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
@ -86,7 +86,7 @@ __global__ void sumAlongColumns(Tensor<T, 1, true> input,
|
|||||||
for (int row = rowStart; row < output.getSize(0); ++row) {
|
for (int row = rowStart; row < output.getSize(0); ++row) {
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < kColLoad; ++i) {
|
for (int i = 0; i < kColLoad; ++i) {
|
||||||
T out = output[row][col + i * blockDim.x].ldg();
|
T out = output[row][col + i * blockDim.x];
|
||||||
out = Math<T>::add(out, val[i]);
|
out = Math<T>::add(out, val[i]);
|
||||||
output[row][col + i * blockDim.x] = out;
|
output[row][col + i * blockDim.x] = out;
|
||||||
}
|
}
|
||||||
@ -100,7 +100,7 @@ __global__ void sumAlongColumns(Tensor<T, 1, true> input,
|
|||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int j = 0; j < kColLoad; ++j) {
|
for (int j = 0; j < kColLoad; ++j) {
|
||||||
rows[i * kColLoad + j] =
|
rows[i * kColLoad + j] =
|
||||||
output[row + i][col + j * blockDim.x].ldg();
|
output[row + i][col + j * blockDim.x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,6 +310,11 @@ Tensor<T, Dim, InnerContig, IndexT, PtrTraits>::canCastResize() const {
|
|||||||
static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
|
static_assert(sizeof(U) >= sizeof(T), "only handles greater sizes");
|
||||||
constexpr int kMultiple = sizeof(U) / sizeof(T);
|
constexpr int kMultiple = sizeof(U) / sizeof(T);
|
||||||
|
|
||||||
|
// Ensure that the base pointer is sizeof(U) aligned
|
||||||
|
if (((uintptr_t) data_) % sizeof(U) != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Check all outer strides
|
// Check all outer strides
|
||||||
for (int i = 0; i < Dim - 1; ++i) {
|
for (int i = 0; i < Dim - 1; ++i) {
|
||||||
if (stride_[i] % kMultiple != 0) {
|
if (stride_[i] % kMultiple != 0) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user