1. 概述
1.1 什么是前缀和
前缀和(Prefix Sum)是一种重要的预处理技巧,通过预先计算数组的累积和,可以在O(1)时间内快速计算任意区间的和。这种技术在处理区间查询、子数组和计算等问题时非常有效。
1.2 核心思想
前缀和的核心思想是空间换时间:
- 预处理阶段:O(n)时间构建前缀和数组
- 查询阶段:O(1)时间计算区间和
- 总体效果:将多次区间查询的时间复杂度从O(mn)降低到O(m+n)
2. 基础概念
2.1 前缀和定义
对于数组 arr[0...n-1],前缀和数组 prefixSum[0...n] 定义为:
prefixSum[0] = 0
prefixSum[i] = arr[0] + arr[1] + ... + arr[i-1] (i >= 1)
这里前缀和数组长度为 n 是为了方便在计算 prefixSum[right + 1] - prefixSum[left] 时的数组越界问题
2.2 区间和计算
利用前缀和计算区间 [left, right] 的和:
sum(left, right) = prefixSum[right+1] - prefixSum[left]
2.3 图解示例
原数组: [2, 1, 3, 4, 5]
索引: 0 1 2 3 4
前缀和数组: [0, 2, 3, 6, 10, 15]
索引: 0 1 2 3 4 5
计算区间[1,3]的和:
sum(1,3) = prefixSum[4] - prefixSum[1] = 10 - 2 = 8
验证: arr[1] + arr[2] + arr[3] = 1 + 3 + 4 = 8 ✓
3. 一维前缀和
3.1 基本实现
def build_prefix_sum(arr):
"""
构建一维前缀和数组
Args:
arr: 原始数组
Returns:
前缀和数组
"""
n = len(arr)
prefix_sum = [0] * (n + 1)
for i in range(n):
prefix_sum[i + 1] = prefix_sum[i] + arr[i]
return prefix_sum
def range_sum(prefix_sum, left, right):
"""
计算区间和
Args:
prefix_sum: 前缀和数组
left: 左边界(包含)
right: 右边界(包含)
Returns:
区间和
"""
return prefix_sum[right + 1] - prefix_sum[left]
3.2 完整示例
class PrefixSum:
def __init__(self, nums):
"""
初始化前缀和数据结构
Args:
nums: 原始数组
"""
self.prefix_sum = [0]
for num in nums:
self.prefix_sum.append(self.prefix_sum[-1] + num)
def sum_range(self, left, right):
"""
计算区间[left, right]的和
Args:
left: 左边界
right: 右边界
Returns:
区间和
"""
return self.prefix_sum[right + 1] - self.prefix_sum[left]
# 使用示例
nums = [2, 1, 3, 4, 5]
ps = PrefixSum(nums)
print(ps.sum_range(0, 2)) # 输出: 6 (2+1+3)
print(ps.sum_range(1, 3)) # 输出: 8 (1+3+4)
print(ps.sum_range(2, 4)) # 输出: 12 (3+4+5)
4. 二维前缀和
4.1 基本概念
二维前缀和用于快速计算矩形区域的元素和。对于二维数组 matrix[m][n],前缀和 prefixSum[i][j] 表示从 (0,0) 到 (i-1,j-1) 矩形区域的元素和。
4.2 构建二维前缀和
def build_2d_prefix_sum(matrix):
"""
构建二维前缀和数组
Args:
matrix: 二维数组
Returns:
二维前缀和数组
"""
if not matrix or not matrix[0]:
return [[0]]
m, n = len(matrix), len(matrix[0])
prefix_sum = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
prefix_sum[i][j] = (
matrix[i-1][j-1] +
prefix_sum[i-1][j] +
prefix_sum[i][j-1] -
prefix_sum[i-1][j-1]
)
return prefix_sum
4.3 矩形区域和计算
def rectangle_sum(prefix_sum, row1, col1, row2, col2):
"""
计算矩形区域 (row1,col1) 到 (row2,col2) 的和
Args:
prefix_sum: 二维前缀和数组
row1, col1: 左上角坐标
row2, col2: 右下角坐标
Returns:
矩形区域和
"""
return (
prefix_sum[row2 + 1][col2 + 1] -
prefix_sum[row1][col2 + 1] -
prefix_sum[row2 + 1][col1] +
prefix_sum[row1][col1]
)
4.4 完整的二维前缀和类
class Matrix2DPrefixSum:
def __init__(self, matrix):
"""
初始化二维前缀和
Args:
matrix: 二维数组
"""
if not matrix or not matrix[0]:
self.prefix_sum = [[0]]
return
m, n = len(matrix), len(matrix[0])
self.prefix_sum = [[0] * (n + 1) for _ in range(m + 1)]
# 构建前缀和数组
for i in range(1, m + 1):
for j in range(1, n + 1):
self.prefix_sum[i][j] = (
matrix[i-1][j-1] +
self.prefix_sum[i-1][j] +
self.prefix_sum[i][j-1] -
self.prefix_sum[i-1][j-1]
)
def sum_region(self, row1, col1, row2, col2):
"""
计算矩形区域和
"""
return (
self.prefix_sum[row2 + 1][col2 + 1] -
self.prefix_sum[row1][col2 + 1] -
self.prefix_sum[row2 + 1][col1] +
self.prefix_sum[row1][col1]
)
# 使用示例
matrix = [
[3, 0, 1, 4, 2],
[5, 6, 3, 2, 1],
[1, 2, 0, 1, 5],
[4, 1, 0, 1, 7],
[1, 0, 3, 0, 5]
]
matrix_ps = Matrix2DPrefixSum(matrix)
print(matrix_ps.sum_region(2, 1, 4, 3)) # 计算子矩阵和
5. 差分数组
5.1 差分数组概念
差分数组是前缀和的逆运算,主要用于处理区间更新操作。如果原数组是前缀和数组,那么差分数组就是原始数组。
5.2 差分数组的应用
class DifferenceArray:
def __init__(self, nums):
"""
初始化差分数组
Args:
nums: 原始数组
"""
n = len(nums)
self.diff = [0] * n
# 构建差分数组
self.diff[0] = nums[0]
for i in range(1, n):
self.diff[i] = nums[i] - nums[i-1]
def range_add(self, left, right, val):
"""
对区间[left, right]的所有元素加上val
Args:
left: 左边界
right: 右边界
val: 增加的值
"""
self.diff[left] += val
if right + 1 < len(self.diff):
self.diff[right + 1] -= val
def get_array(self):
"""
从差分数组恢复原数组
Returns:
恢复后的数组
"""
result = [0] * len(self.diff)
result[0] = self.diff[0]
for i in range(1, len(self.diff)):
result[i] = result[i-1] + self.diff[i]
return result
# 使用示例
nums = [1, 3, 5, 7, 9]
diff_arr = DifferenceArray(nums)
# 对区间[1,3]的所有元素加2
diff_arr.range_add(1, 3, 2)
print(diff_arr.get_array()) # [1, 5, 7, 9, 9]
5.3 二维差分数组
class Matrix2DDifference:
def __init__(self, matrix):
"""
初始化二维差分数组
"""
if not matrix or not matrix[0]:
return
m, n = len(matrix), len(matrix[0])
self.diff = [[0] * n for _ in range(m)]
# 构建二维差分数组
for i in range(m):
for j in range(n):
self.diff[i][j] = matrix[i][j]
if i > 0:
self.diff[i][j] -= matrix[i-1][j]
if j > 0:
self.diff[i][j] -= matrix[i][j-1]
if i > 0 and j > 0:
self.diff[i][j] += matrix[i-1][j-1]
def range_add(self, row1, col1, row2, col2, val):
"""
对矩形区域的所有元素加上val
"""
m, n = len(self.diff), len(self.diff[0])
self.diff[row1][col1] += val
if row2 + 1 < m:
self.diff[row2 + 1][col1] -= val
if col2 + 1 < n:
self.diff[row1][col2 + 1] -= val
if row2 + 1 < m and col2 + 1 < n:
self.diff[row2 + 1][col2 + 1] += val
def get_matrix(self):
"""
从差分数组恢复原矩阵
"""
m, n = len(self.diff), len(self.diff[0])
result = [[0] * n for _ in range(m)]
for i in range(m):
for j in range(n):
result[i][j] = self.diff[i][j]
if i > 0:
result[i][j] += result[i-1][j]
if j > 0:
result[i][j] += result[i][j-1]
if i > 0 and j > 0:
result[i][j] -= result[i-1][j-1]
return result
6. 代码实现
import java.util.*;
public class PrefixSumSolution {
/**
* 构建一维前缀和数组
*/
public static int[] buildPrefixSum(int[] nums) {
int n = nums.length;
int[] prefixSum = new int[n + 1];
for (int i = 0; i < n; i++) {
prefixSum[i + 1] = prefixSum[i] + nums[i];
}
return prefixSum;
}
/**
* 计算区间和
*/
public static int rangeSum(int[] prefixSum, int left, int right) {
return prefixSum[right + 1] - prefixSum[left];
}
/**
* LeetCode 560: 和为K的子数组
*/
public static int subarraySum(int[] nums, int k) {
int count = 0;
int prefixSum = 0;
Map<Integer, Integer> sumCount = new HashMap<>();
sumCount.put(0, 1);
for (int num : nums) {
prefixSum += num;
if (sumCount.containsKey(prefixSum - k)) {
count += sumCount.get(prefixSum - k);
}
sumCount.put(prefixSum, sumCount.getOrDefault(prefixSum, 0) + 1);
}
return count;
}
/**
* 二维前缀和实现
*/
public static class NumMatrix {
private int[][] prefixSum;
public NumMatrix(int[][] matrix) {
if (matrix == null || matrix.length == 0 || matrix[0].length == 0) {
return;
}
int m = matrix.length;
int n = matrix[0].length;
prefixSum = new int[m + 1][n + 1];
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
prefixSum[i][j] = matrix[i-1][j-1] +
prefixSum[i-1][j] +
prefixSum[i][j-1] -
prefixSum[i-1][j-1];
}
}
}
public int sumRegion(int row1, int col1, int row2, int col2) {
return prefixSum[row2 + 1][col2 + 1] -
prefixSum[row1][col2 + 1] -
prefixSum[row2 + 1][col1] +
prefixSum[row1][col1];
}
}
}
7. 复杂度分析
7.1 时间复杂度
| 操作 | 朴素方法 | 前缀和方法 | 优化效果 |
|---|---|---|---|
| 预处理 | O(1) | O(n) | 需要额外预处理 |
| 单次区间查询 | O(n) | O(1) | 显著提升 |
| m次区间查询 | O(mn) | O(m+n) | 大幅优化 |
7.2 空间复杂度
| 维度 | 空间复杂度 | 说明 |
|---|---|---|
| 一维前缀和 | O(n) | 额外数组存储前缀和 |
| 二维前缀和 | O(mn) | 二维数组存储前缀和 |
| 差分数组 | O(n) | 与原数组相同大小 |
7.3 性能对比分析
import time
import random
def performance_comparison():
"""前缀和性能对比测试"""
# 生成测试数据
n = 100000
nums = [random.randint(-100, 100) for _ in range(n)]
queries = [(random.randint(0, n//2), random.randint(n//2, n-1))
for _ in range(1000)]
# 朴素方法
start_time = time.time()
for left, right in queries:
sum(nums[left:right+1])
naive_time = time.time() - start_time
# 前缀和方法
start_time = time.time()
prefix_sum = [0]
for num in nums:
prefix_sum.append(prefix_sum[-1] + num)
for left, right in queries:
prefix_sum[right + 1] - prefix_sum[left]
prefix_time = time.time() - start_time
print(f"朴素方法耗时: {naive_time:.4f}s")
print(f"前缀和方法耗时: {prefix_time:.4f}s")
print(f"性能提升: {naive_time / prefix_time:.2f}x")
# 运行性能测试
performance_comparison()
8. 经典应用场景
8.1 区间查询优化
场景:频繁查询数组区间和
def optimize_range_queries(nums, queries):
"""
优化多次区间查询
Args:
nums: 原数组
queries: 查询列表 [(left1, right1), (left2, right2), ...]
Returns:
查询结果列表
"""
# 构建前缀和
prefix_sum = [0]
for num in nums:
prefix_sum.append(prefix_sum[-1] + num)
# 处理查询
results = []
for left, right in queries:
result = prefix_sum[right + 1] - prefix_sum[left]
results.append(result)
return results
8.2 子数组问题
场景:寻找满足特定条件的子数组
def count_subarrays_with_sum(nums, target):
"""
统计和为target的子数组个数
"""
count = 0
prefix_sum = 0
sum_count = {0: 1}
for num in nums:
prefix_sum += num
if prefix_sum - target in sum_count:
count += sum_count[prefix_sum - target]
sum_count[prefix_sum] = sum_count.get(prefix_sum, 0) + 1
return count
def max_subarray_sum_in_range(nums, k):
"""
寻找长度不超过k的最大子数组和
"""
n = len(nums)
prefix_sum = [0]
for num in nums:
prefix_sum.append(prefix_sum[-1] + num)
max_sum = float('-inf')
for i in range(n):
for j in range(i, min(i + k, n)):
current_sum = prefix_sum[j + 1] - prefix_sum[i]
max_sum = max(max_sum, current_sum)
return max_sum
8.3 二维矩阵应用
场景:图像处理中的积分图
def integral_image(image):
"""
计算图像的积分图(用于快速计算矩形区域像素和)
"""
if not image or not image[0]:
return []
m, n = len(image), len(image[0])
integral = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
integral[i][j] = (
image[i-1][j-1] +
integral[i-1][j] +
integral[i][j-1] -
integral[i-1][j-1]
)
return integral
def calculate_region_average(integral, row1, col1, row2, col2):
"""
计算矩形区域的平均值
"""
region_sum = (
integral[row2 + 1][col2 + 1] -
integral[row1][col2 + 1] -
integral[row2 + 1][col1] +
integral[row1][col1]
)
area = (row2 - row1 + 1) * (col2 - col1 + 1)
return region_sum / area
8.4 数据分析应用
场景:时间序列数据的滑动窗口统计
def sliding_window_statistics(data, window_size):
"""
计算滑动窗口统计信息
"""
n = len(data)
if n < window_size:
return []
# 构建前缀和
prefix_sum = [0]
for value in data:
prefix_sum.append(prefix_sum[-1] + value)
# 计算滑动窗口和
window_sums = []
for i in range(n - window_size + 1):
window_sum = prefix_sum[i + window_size] - prefix_sum[i]
window_sums.append(window_sum)
# 计算统计信息
statistics = {
'sums': window_sums,
'averages': [s / window_size for s in window_sums],
'max_sum': max(window_sums),
'min_sum': min(window_sums)
}
return statistics
9. LeetCode题目解析
9.1 基础题目
题目1: LeetCode 303 - 区域和检索 - 数组不可变
class NumArray:
"""
LeetCode 303: Range Sum Query - Immutable
给定一个整数数组nums,求出数组从索引i到j(i ≤ j)范围内元素的总和,包含i,j两点。
"""
def __init__(self, nums: List[int]):
"""
初始化数据结构
时间复杂度: O(n)
空间复杂度: O(n)
"""
self.prefix_sum = [0]
for num in nums:
self.prefix_sum.append(self.prefix_sum[-1] + num)
def sumRange(self, left: int, right: int) -> int:
"""
计算区间和
时间复杂度: O(1)
空间复杂度: O(1)
"""
return self.prefix_sum[right + 1] - self.prefix_sum[left]
# 测试用例
nums = [-2, 0, 3, -5, 2, -1]
num_array = NumArray(nums)
print(num_array.sumRange(0, 2)) # 输出: 1
print(num_array.sumRange(2, 5)) # 输出: -1
print(num_array.sumRange(0, 5)) # 输出: -3
题目2: LeetCode 304 - 二维区域和检索 - 矩阵不可变
class NumMatrix:
"""
LeetCode 304: Range Sum Query 2D - Immutable
给定一个二维矩阵,计算其子矩形范围内元素的总和
"""
def __init__(self, matrix: List[List[int]]):
"""
初始化二维前缀和
时间复杂度: O(mn)
空间复杂度: O(mn)
"""
if not matrix or not matrix[0]:
return
m, n = len(matrix), len(matrix[0])
self.prefix_sum = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
self.prefix_sum[i][j] = (
matrix[i-1][j-1] +
self.prefix_sum[i-1][j] +
self.prefix_sum[i][j-1] -
self.prefix_sum[i-1][j-1]
)
def sumRegion(self, row1: int, col1: int, row2: int, col2: int) -> int:
"""
计算矩形区域和
时间复杂度: O(1)
空间复杂度: O(1)
"""
return (
self.prefix_sum[row2 + 1][col2 + 1] -
self.prefix_sum[row1][col2 + 1] -
self.prefix_sum[row2 + 1][col1] +
self.prefix_sum[row1][col1]
)
# 测试用例
matrix = [
[3, 0, 1, 4, 2],
[5, 6, 3, 2, 1],
[1, 2, 0, 1, 5],
[4, 1, 0, 1, 7],
[1, 0, 3, 0, 5]
]
num_matrix = NumMatrix(matrix)
print(num_matrix.sumRegion(2, 1, 4, 3)) # 输出: 8
print(num_matrix.sumRegion(1, 1, 2, 2)) # 输出: 11
9.2 进阶题目
题目3: LeetCode 560 - 和为K的子数组
def subarraySum(nums: List[int], k: int) -> int:
"""
LeetCode 560: Subarray Sum Equals K
给定一个整数数组和一个整数k,找到该数组中和为k的连续子数组的个数
解题思路:
1. 使用前缀和 + 哈希表
2. 对于每个位置i,查找是否存在前缀和为 prefix_sum[i] - k
3. 如果存在,说明从某个位置到i的子数组和为k
时间复杂度: O(n)
空间复杂度: O(n)
"""
count = 0
prefix_sum = 0
sum_count = {0: 1} # 前缀和 -> 出现次数
for num in nums:
prefix_sum += num
# 查找是否存在前缀和为 prefix_sum - k
if prefix_sum - k in sum_count:
count += sum_count[prefix_sum - k]
# 更新当前前缀和的计数
sum_count[prefix_sum] = sum_count.get(prefix_sum, 0) + 1
return count
# 测试用例
print(subarraySum([1, 1, 1], 2)) # 输出: 2
print(subarraySum([1, 2, 3], 3)) # 输出: 2
print(subarraySum([1, -1, 0], 0)) # 输出: 3
题目4: LeetCode 724 - 寻找数组的中心索引
def pivotIndex(nums: List[int]) -> int:
"""
LeetCode 724: Find Pivot Index
给定一个整数类型的数组nums,请编写一个能够返回数组"中心索引"的方法
中心索引是数组的一个索引,其左侧所有元素相加的和等于右侧所有元素相加的和
解题思路:
1. 计算数组总和
2. 遍历数组,维护左侧和
3. 检查左侧和是否等于右侧和
时间复杂度: O(n)
空间复杂度: O(1)
"""
total_sum = sum(nums)
left_sum = 0
for i, num in enumerate(nums):
# 右侧和 = 总和 - 左侧和 - 当前元素
right_sum = total_sum - left_sum - num
if left_sum == right_sum:
return i
left_sum += num
return -1
# 测试用例
print(pivotIndex([1, 7, 3, 6, 5, 6])) # 输出: 3
print(pivotIndex([1, 2, 3])) # 输出: -1
print(pivotIndex([2, 1, -1])) # 输出: 0
9.3 困难题目
题目5: LeetCode 1074 - 元素和为目标值的子矩阵数量
def numSubmatrixSumTarget(matrix: List[List[int]], target: int) -> int:
"""
LeetCode 1074: Number of Submatrices That Sum to Target
给出矩阵matrix和目标值target,返回元素总和等于目标值的非空子矩阵的数量
解题思路:
1. 枚举上下边界
2. 将二维问题转化为一维问题
3. 使用前缀和 + 哈希表求解
时间复杂度: O(m²n)
空间复杂度: O(n)
"""
m, n = len(matrix), len(matrix[0])
count = 0
# 枚举上边界
for top in range(m):
# 压缩为一维数组
compressed = [0] * n
# 枚举下边界
for bottom in range(top, m):
# 更新压缩数组
for j in range(n):
compressed[j] += matrix[bottom][j]
# 在一维数组中寻找和为target的子数组
count += subarraySum(compressed, target)
return count
# 辅助函数:和为K的子数组个数
def subarraySum(nums: List[int], k: int) -> int:
count = 0
prefix_sum = 0
sum_count = {0: 1}
for num in nums:
prefix_sum += num
if prefix_sum - k in sum_count:
count += sum_count[prefix_sum - k]
sum_count[prefix_sum] = sum_count.get(prefix_sum, 0) + 1
return count
# 测试用例
matrix1 = [[0,1,0],[1,1,1],[0,1,0]]
print(numSubmatrixSumTarget(matrix1, 0)) # 输出: 4
matrix2 = [[1,-1],[-1,1]]
print(numSubmatrixSumTarget(matrix2, 0)) # 输出: 5
题目6: LeetCode 1248 - 统计「优美子数组」
def numberOfSubarrays(nums: List[int], k: int) -> int:
"""
LeetCode 1248: Count Number of Nice Subarrays
给定一个由整数组成的数组nums和一个整数k,返回「优美子数组」的数目
「优美子数组」定义为某个子数组中奇数的个数恰好为k
解题思路:
1. 将奇数看作1,偶数看作0
2. 问题转化为寻找和为k的子数组个数
3. 使用前缀和 + 哈希表
时间复杂度: O(n)
空间复杂度: O(n)
"""
count = 0
prefix_sum = 0
sum_count = {0: 1}
for num in nums:
# 奇数贡献1,偶数贡献0
prefix_sum += num % 2
# 查找前缀和为 prefix_sum - k 的个数
if prefix_sum - k in sum_count:
count += sum_count[prefix_sum - k]
sum_count[prefix_sum] = sum_count.get(prefix_sum, 0) + 1
return count
# 测试用例
print(numberOfSubarrays([1,1,2,1,1], 3)) # 输出: 2
print(numberOfSubarrays([2,4,6], 1)) # 输出: 0
print(numberOfSubarrays([2,2,2,1,2,2,1,2,2,2], 2)) # 输出: 16
10. 面试问题解析
10.1 常见面试问题
问题1:前缀和的基本原理是什么?
回答要点:
- 核心思想:空间换时间,通过预处理降低查询复杂度
- 数学原理:利用区间和的差值性质
- 适用场景:频繁的区间查询操作
详细解答:
def explain_prefix_sum():
"""
前缀和原理解释
"""
# 原数组
arr = [2, 1, 3, 4, 5]
# 构建前缀和数组
prefix_sum = [0] # 添加哨兵元素
for num in arr:
prefix_sum.append(prefix_sum[-1] + num)
print(f"原数组: {arr}")
print(f"前缀和数组: {prefix_sum}")
# 计算区间[1,3]的和
left, right = 1, 3
result = prefix_sum[right + 1] - prefix_sum[left]
print(f"区间[{left},{right}]的和: {result}")
print(f"验证: {sum(arr[left:right+1])}")
return result
问题2:如何处理负数数组的前缀和?
回答要点:
- 前缀和算法对负数完全适用
- 需要注意溢出问题
- 哈希表中可能出现负的前缀和
代码示例:
def handle_negative_numbers():
"""
处理包含负数的前缀和
"""
nums = [-2, 1, -3, 4, -1, 2, 1, -5, 4]
# 构建前缀和
prefix_sum = [0]
for num in nums:
prefix_sum.append(prefix_sum[-1] + num)
print(f"原数组: {nums}")
print(f"前缀和数组: {prefix_sum}")
# 寻找和为0的子数组
target = 0
sum_indices = {0: [-1]} # 前缀和 -> 索引列表
for i, ps in enumerate(prefix_sum[1:], 0):
if ps in sum_indices:
for start_idx in sum_indices[ps]:
print(f"找到和为{target}的子数组: [{start_idx+1}, {i}]")
if ps not in sum_indices:
sum_indices[ps] = []
sum_indices[ps].append(i)
问题3:二维前缀和的空间优化方法?
回答要点:
- 原地构建前缀和(如果允许修改原数组)
- 滚动数组优化
- 按需计算部分区域
优化实现:
def optimized_2d_prefix_sum(matrix):
"""
空间优化的二维前缀和
"""
if not matrix or not matrix[0]:
return matrix
m, n = len(matrix), len(matrix[0])
# 原地构建前缀和(修改原数组)
# 第一行
for j in range(1, n):
matrix[0][j] += matrix[0][j-1]
# 第一列
for i in range(1, m):
matrix[i][0] += matrix[i-1][0]
# 其他位置
for i in range(1, m):
for j in range(1, n):
matrix[i][j] += matrix[i-1][j] + matrix[i][j-1] - matrix[i-1][j-1]
return matrix
def query_with_boundary(matrix, row1, col1, row2, col2):
"""
带边界检查的查询
"""
def get_value(r, c):
if r < 0 or c < 0:
return 0
return matrix[r][c]
return (get_value(row2, col2) -
get_value(row1-1, col2) -
get_value(row2, col1-1) +
get_value(row1-1, col1-1))
10.2 进阶面试问题
问题4:如何在前缀和基础上支持动态更新?
回答要点:
- 单点更新:重新计算后续所有前缀和 O(n)
- 区间更新:使用差分数组 O(1)
- 更高级:线段树、树状数组
实现方案:
class DynamicPrefixSum:
"""
支持动态更新的前缀和
"""
def __init__(self, nums):
self.nums = nums[:]
self.prefix_sum = self._build_prefix_sum()
def _build_prefix_sum(self):
prefix_sum = [0]
for num in self.nums:
prefix_sum.append(prefix_sum[-1] + num)
return prefix_sum
def update(self, index, val):
"""
单点更新
时间复杂度: O(n)
"""
old_val = self.nums[index]
self.nums[index] = val
diff = val - old_val
# 更新后续所有前缀和
for i in range(index + 1, len(self.prefix_sum)):
self.prefix_sum[i] += diff
def range_sum(self, left, right):
"""
区间查询
时间复杂度: O(1)
"""
return self.prefix_sum[right + 1] - self.prefix_sum[left]
# 使用差分数组支持区间更新
class RangeUpdatePrefixSum:
"""
支持区间更新的前缀和
"""
def __init__(self, nums):
self.n = len(nums)
self.diff = [0] * self.n
# 构建差分数组
self.diff[0] = nums[0]
for i in range(1, self.n):
self.diff[i] = nums[i] - nums[i-1]
def range_update(self, left, right, val):
"""
区间更新
时间复杂度: O(1)
"""
self.diff[left] += val
if right + 1 < self.n:
self.diff[right + 1] -= val
def get_array(self):
"""
获取当前数组
时间复杂度: O(n)
"""
result = [0] * self.n
result[0] = self.diff[0]
for i in range(1, self.n):
result[i] = result[i-1] + self.diff[i]
return result
def range_sum(self, left, right):
"""
区间查询(需要先恢复数组)
"""
arr = self.get_array()
return sum(arr[left:right+1])
问题5:前缀和在分布式系统中的应用?
回答要点:
- 数据分片:每个分片维护局部前缀和
- 全局查询:合并多个分片的结果
- 一致性:处理数据更新的一致性问题
设计方案:
class DistributedPrefixSum:
"""
分布式前缀和系统设计
"""
def __init__(self, data, num_shards=4):
self.num_shards = num_shards
self.shards = self._partition_data(data)
self.shard_prefix_sums = []
self.global_offsets = [0] # 每个分片的全局偏移
# 构建每个分片的前缀和
for shard in self.shards:
shard_ps = self._build_prefix_sum(shard)
self.shard_prefix_sums.append(shard_ps)
# 计算全局偏移
if shard_ps:
self.global_offsets.append(
self.global_offsets[-1] + shard_ps[-1]
)
def _partition_data(self, data):
"""数据分片"""
n = len(data)
shard_size = (n + self.num_shards - 1) // self.num_shards
shards = []
for i in range(0, n, shard_size):
shards.append(data[i:i + shard_size])
return shards
def _build_prefix_sum(self, arr):
"""构建单个分片的前缀和"""
if not arr:
return []
prefix_sum = [arr[0]]
for i in range(1, len(arr)):
prefix_sum.append(prefix_sum[-1] + arr[i])
return prefix_sum
def range_sum(self, left, right):
"""
分布式区间查询
"""
# 确定涉及的分片
left_shard = left // len(self.shards[0]) if self.shards[0] else 0
right_shard = right // len(self.shards[0]) if self.shards[0] else 0
total_sum = 0
for shard_id in range(left_shard, right_shard + 1):
if shard_id >= len(self.shards):
break
shard = self.shards[shard_id]
if not shard:
continue
# 计算在当前分片中的索引范围
shard_start = shard_id * len(self.shards[0])
local_left = max(0, left - shard_start)
local_right = min(len(shard) - 1, right - shard_start)
if local_left <= local_right:
# 计算分片内的区间和
shard_sum = self.shard_prefix_sums[shard_id][local_right]
if local_left > 0:
shard_sum -= self.shard_prefix_sums[shard_id][local_left - 1]
total_sum += shard_sum
return total_sum
10.3 面试技巧总结
回答框架
- 理解问题:确认题目要求和约束条件
- 分析复杂度:说明朴素方法的时间复杂度
- 提出优化:解释前缀和的优化思路
- 编码实现:写出清晰的代码
- 测试验证:考虑边界情况和测试用例
- 扩展讨论:讨论相关变种和优化
常见陷阱
- 索引边界:注意前缀和数组的索引偏移
- 空数组处理:考虑输入为空的情况
- 整数溢出:大数相加可能溢出
- 负数处理:前缀和可能为负数
11. 性能优化技巧
11.1 空间优化
原地构建前缀和
def inplace_prefix_sum(nums):
"""
原地构建前缀和(修改原数组)
空间复杂度: O(1)
"""
for i in range(1, len(nums)):
nums[i] += nums[i-1]
return nums
def inplace_2d_prefix_sum(matrix):
"""
原地构建二维前缀和
"""
if not matrix or not matrix[0]:
return matrix
m, n = len(matrix), len(matrix[0])
# 处理第一行
for j in range(1, n):
matrix[0][j] += matrix[0][j-1]
# 处理第一列
for i in range(1, m):
matrix[i][0] += matrix[i-1][0]
# 处理其他位置
for i in range(1, m):
for j in range(1, n):
matrix[i][j] += (matrix[i-1][j] +
matrix[i][j-1] -
matrix[i-1][j-1])
return matrix
滚动数组优化
def rolling_array_2d_prefix_sum(matrix, queries):
"""
使用滚动数组优化二维前缀和
适用于查询较少的情况
"""
if not matrix or not matrix[0]:
return []
m, n = len(matrix), len(matrix[0])
results = []
for row1, col1, row2, col2 in queries:
# 为当前查询构建局部前缀和
local_sum = 0
for i in range(row1, row2 + 1):
row_sum = 0
for j in range(col1, col2 + 1):
row_sum += matrix[i][j]
local_sum += row_sum
results.append(local_sum)
return results
11.2 时间优化
并行计算前缀和
import concurrent.futures
import threading
class ParallelPrefixSum:
"""
并行计算前缀和
"""
def __init__(self, nums, num_threads=4):
self.nums = nums
self.num_threads = num_threads
self.n = len(nums)
def parallel_build(self):
"""
并行构建前缀和
"""
if self.n <= 1000: # 小数组直接计算
return self._sequential_build()
# 分块计算
chunk_size = (self.n + self.num_threads - 1) // self.num_threads
chunks = []
for i in range(0, self.n, chunk_size):
chunks.append(self.nums[i:i + chunk_size])
# 并行计算每块的前缀和
with concurrent.futures.ThreadPoolExecutor(max_workers=self.num_threads) as executor:
chunk_results = list(executor.map(self._build_chunk_prefix_sum, chunks))
# 合并结果
return self._merge_chunks(chunk_results)
def _sequential_build(self):
"""顺序构建前缀和"""
prefix_sum = [0]
for num in self.nums:
prefix_sum.append(prefix_sum[-1] + num)
return prefix_sum
def _build_chunk_prefix_sum(self, chunk):
"""构建单个块的前缀和"""
if not chunk:
return []
chunk_prefix = [chunk[0]]
for i in range(1, len(chunk)):
chunk_prefix.append(chunk_prefix[-1] + chunk[i])
return chunk_prefix
def _merge_chunks(self, chunk_results):
"""合并块结果"""
merged = [0]
global_offset = 0
for chunk_prefix in chunk_results:
for val in chunk_prefix:
merged.append(global_offset + val)
if chunk_prefix:
global_offset += chunk_prefix[-1]
return merged
缓存优化
from functools import lru_cache
class CachedPrefixSum:
"""
带缓存的前缀和查询
"""
def __init__(self, nums):
self.nums = nums
self.prefix_sum = self._build_prefix_sum()
def _build_prefix_sum(self):
prefix_sum = [0]
for num in self.nums:
prefix_sum.append(prefix_sum[-1] + num)
return prefix_sum
@lru_cache(maxsize=1000)
def range_sum(self, left, right):
"""
带缓存的区间查询
"""
return self.prefix_sum[right + 1] - self.prefix_sum[left]
@lru_cache(maxsize=500)
def subarray_count(self, target):
"""
缓存子数组计数结果
"""
count = 0
prefix_sum = 0
sum_count = {0: 1}
for num in self.nums:
prefix_sum += num
if prefix_sum - target in sum_count:
count += sum_count[prefix_sum - target]
sum_count[prefix_sum] = sum_count.get(prefix_sum, 0) + 1
return count
11.3 内存优化
压缩存储
class CompressedPrefixSum:
"""
压缩存储的前缀和
适用于稀疏数组
"""
def __init__(self, nums):
self.n = len(nums)
self.non_zero_indices = []
self.prefix_sums = []
current_sum = 0
for i, num in enumerate(nums):
if num != 0:
current_sum += num
self.non_zero_indices.append(i)
self.prefix_sums.append(current_sum)
def range_sum(self, left, right):
"""
计算区间和(压缩版本)
"""
if not self.non_zero_indices:
return 0
# 找到左边界和右边界在压缩数组中的位置
left_idx = self._binary_search_left(left)
right_idx = self._binary_search_right(right)
if left_idx > right_idx:
return 0
result = self.prefix_sums[right_idx]
if left_idx > 0:
result -= self.prefix_sums[left_idx - 1]
return result
def _binary_search_left(self, target):
"""二分查找左边界"""
left, right = 0, len(self.non_zero_indices) - 1
result = len(self.non_zero_indices)
while left <= right:
mid = (left + right) // 2
if self.non_zero_indices[mid] >= target:
result = mid
right = mid - 1
else:
left = mid + 1
return result
def _binary_search_right(self, target):
"""二分查找右边界"""
left, right = 0, len(self.non_zero_indices) - 1
result = -1
while left <= right:
mid = (left + right) // 2
if self.non_zero_indices[mid] <= target:
result = mid
left = mid + 1
else:
right = mid - 1
return result
懒加载优化
class LazyPrefixSum:
"""
懒加载前缀和
只在需要时计算部分前缀和
"""
def __init__(self, nums):
self.nums = nums
self.n = len(nums)
self.computed = [False] * (self.n + 1)
self.prefix_sum = [0] * (self.n + 1)
self.computed[0] = True
def _ensure_computed(self, index):
"""确保到指定索引的前缀和已计算"""
if self.computed[index]:
return
# 找到最近的已计算位置
start = index - 1
while start >= 0 and not self.computed[start]:
start -= 1
# 从已计算位置开始计算
for i in range(start + 1, index + 1):
self.prefix_sum[i] = self.prefix_sum[i-1] + self.nums[i-1]
self.computed[i] = True
def range_sum(self, left, right):
"""
懒加载区间查询
"""
self._ensure_computed(right + 1)
self._ensure_computed(left)
return self.prefix_sum[right + 1] - self.prefix_sum[left]
11.4 算法优化
分块前缀和
import math
class BlockPrefixSum:
"""
分块前缀和
平衡查询和更新的复杂度
"""
def __init__(self, nums):
self.nums = nums[:]
self.n = len(nums)
self.block_size = int(math.sqrt(self.n)) + 1
self.num_blocks = (self.n + self.block_size - 1) // self.block_size
# 构建块前缀和
self.block_sums = [0] * self.num_blocks
self._build_blocks()
def _build_blocks(self):
"""构建块前缀和"""
for i in range(self.num_blocks):
start = i * self.block_size
end = min(start + self.block_size, self.n)
block_sum = 0
for j in range(start, end):
block_sum += self.nums[j]
self.block_sums[i] = block_sum
def update(self, index, val):
"""
单点更新
时间复杂度: O(1)
"""
old_val = self.nums[index]
self.nums[index] = val
# 更新对应块的和
block_id = index // self.block_size
self.block_sums[block_id] += val - old_val
def range_sum(self, left, right):
"""
区间查询
时间复杂度: O(√n)
"""
left_block = left // self.block_size
right_block = right // self.block_size
if left_block == right_block:
# 在同一块内
return sum(self.nums[left:right+1])
result = 0
# 左边不完整的块
left_end = (left_block + 1) * self.block_size
result += sum(self.nums[left:left_end])
# 中间完整的块
for block_id in range(left_block + 1, right_block):
result += self.block_sums[block_id]
# 右边不完整的块
right_start = right_block * self.block_size
result += sum(self.nums[right_start:right+1])
return result
12. 最佳实践总结
12.1 设计原则
1. 选择合适的数据结构
def choose_prefix_sum_structure(requirements):
"""
根据需求选择合适的前缀和结构
Args:
requirements: 需求字典
- query_frequency: 查询频率
- update_frequency: 更新频率
- space_constraint: 空间限制
- data_sparsity: 数据稀疏度
Returns:
推荐的数据结构
"""
if requirements.get('update_frequency', 0) == 0:
# 静态数据,优先选择标准前缀和
if requirements.get('space_constraint', False):
return "InPlacePrefixSum"
else:
return "StandardPrefixSum"
elif requirements.get('update_frequency', 0) > requirements.get('query_frequency', 0):
# 更新频繁,选择差分数组
return "DifferenceArray"
elif requirements.get('data_sparsity', 0) > 0.8:
# 稀疏数据,选择压缩存储
return "CompressedPrefixSum"
else:
# 平衡查询和更新,选择分块结构
return "BlockPrefixSum"
2. 错误处理和边界检查
class RobustPrefixSum:
"""
健壮的前缀和实现
包含完整的错误处理和边界检查
"""
def __init__(self, nums):
if not isinstance(nums, (list, tuple)):
raise TypeError("Input must be a list or tuple")
if not nums:
self.prefix_sum = [0]
self.n = 0
return
# 检查数值类型
if not all(isinstance(x, (int, float)) for x in nums):
raise ValueError("All elements must be numbers")
self.n = len(nums)
self.prefix_sum = [0]
try:
for num in nums:
self.prefix_sum.append(self.prefix_sum[-1] + num)
except OverflowError:
raise ValueError("Numeric overflow detected")
def range_sum(self, left, right):
"""
安全的区间查询
"""
# 参数类型检查
if not isinstance(left, int) or not isinstance(right, int):
raise TypeError("Indices must be integers")
# 边界检查
if left < 0 or right >= self.n:
raise IndexError(f"Index out of range: [{left}, {right}], valid range: [0, {self.n-1}]")
if left > right:
raise ValueError(f"Invalid range: left ({left}) > right ({right})")
return self.prefix_sum[right + 1] - self.prefix_sum[left]
def __len__(self):
return self.n
def __repr__(self):
return f"RobustPrefixSum(length={self.n})"
12.2 性能优化指南
1. 内存使用优化
class MemoryOptimizedPrefixSum:
"""
内存优化的前缀和实现
"""
def __init__(self, nums, use_numpy=False):
self.n = len(nums)
if use_numpy and self._has_numpy():
import numpy as np
# 使用numpy可以减少内存开销
self.prefix_sum = np.cumsum([0] + nums, dtype=np.int64)
else:
# 标准实现
self.prefix_sum = [0]
for num in nums:
self.prefix_sum.append(self.prefix_sum[-1] + num)
def _has_numpy(self):
"""检查是否有numpy"""
try:
import numpy
return True
except ImportError:
return False
def range_sum(self, left, right):
return self.prefix_sum[right + 1] - self.prefix_sum[left]
def memory_usage(self):
"""估算内存使用量"""
import sys
return sys.getsizeof(self.prefix_sum)
2. 缓存策略
from functools import lru_cache
from collections import OrderedDict
class CacheOptimizedPrefixSum:
"""
缓存优化的前缀和
"""
def __init__(self, nums, cache_size=1000):
self.nums = nums
self.prefix_sum = self._build_prefix_sum()
self.cache_size = cache_size
self.query_cache = OrderedDict()
def _build_prefix_sum(self):
prefix_sum = [0]
for num in self.nums:
prefix_sum.append(prefix_sum[-1] + num)
return prefix_sum
def range_sum(self, left, right):
"""
带缓存的区间查询
"""
cache_key = (left, right)
if cache_key in self.query_cache:
# 缓存命中,移到末尾(LRU)
self.query_cache.move_to_end(cache_key)
return self.query_cache[cache_key]
# 计算结果
result = self.prefix_sum[right + 1] - self.prefix_sum[left]
# 添加到缓存
if len(self.query_cache) >= self.cache_size:
# 移除最久未使用的项
self.query_cache.popitem(last=False)
self.query_cache[cache_key] = result
return result
def cache_stats(self):
"""缓存统计信息"""
return {
'cache_size': len(self.query_cache),
'max_size': self.cache_size,
'usage_ratio': len(self.query_cache) / self.cache_size
}
12.3 测试和验证
1. 单元测试框架
import unittest
import random
class TestPrefixSum(unittest.TestCase):
"""
前缀和算法的完整测试套件
"""
def setUp(self):
"""测试准备"""
self.test_cases = [
[], # 空数组
[1], # 单元素
[1, 2, 3, 4, 5], # 正数
[-1, -2, -3], # 负数
[1, -1, 2, -2], # 混合
[0, 0, 0], # 零
]
def test_basic_functionality(self):
"""测试基本功能"""
for nums in self.test_cases:
if not nums:
continue
ps = PrefixSum(nums)
# 测试所有可能的区间
for i in range(len(nums)):
for j in range(i, len(nums)):
expected = sum(nums[i:j+1])
actual = ps.sum_range(i, j)
self.assertEqual(actual, expected,
f"Failed for nums={nums}, range=[{i},{j}]")
def test_edge_cases(self):
"""测试边界情况"""
nums = [1, 2, 3, 4, 5]
ps = PrefixSum(nums)
# 测试边界
self.assertEqual(ps.sum_range(0, 0), 1) # 单个元素
self.assertEqual(ps.sum_range(0, 4), 15) # 整个数组
self.assertEqual(ps.sum_range(4, 4), 5) # 最后一个元素
def test_performance(self):
"""性能测试"""
# 大数组测试
large_nums = [random.randint(-100, 100) for _ in range(10000)]
ps = PrefixSum(large_nums)
# 大量查询测试
import time
start_time = time.time()
for _ in range(1000):
left = random.randint(0, len(large_nums) // 2)
right = random.randint(len(large_nums) // 2, len(large_nums) - 1)
ps.sum_range(left, right)
elapsed = time.time() - start_time
self.assertLess(elapsed, 1.0, "Performance test failed")
def test_memory_usage(self):
"""内存使用测试"""
import sys
nums = list(range(10000))
ps = PrefixSum(nums)
memory_usage = sys.getsizeof(ps.prefix_sum)
expected_memory = len(nums) * 8 * 1.5 # 估算值
self.assertLess(memory_usage, expected_memory, "Memory usage too high")
if __name__ == '__main__':
unittest.main()
2. 基准测试
import time
import random
import matplotlib.pyplot as plt
class PrefixSumBenchmark:
"""
前缀和算法基准测试
"""
def __init__(self):
self.results = {}
def benchmark_construction(self, sizes):
"""测试构建时间"""
construction_times = []
for size in sizes:
nums = [random.randint(-1000, 1000) for _ in range(size)]
start_time = time.time()
PrefixSum(nums)
end_time = time.time()
construction_times.append(end_time - start_time)
self.results['construction'] = {
'sizes': sizes,
'times': construction_times
}
def benchmark_queries(self, array_size, num_queries_list):
"""测试查询时间"""
nums = [random.randint(-1000, 1000) for _ in range(array_size)]
ps = PrefixSum(nums)
query_times = []
for num_queries in num_queries_list:
queries = [(random.randint(0, array_size//2),
random.randint(array_size//2, array_size-1))
for _ in range(num_queries)]
start_time = time.time()
for left, right in queries:
ps.sum_range(left, right)
end_time = time.time()
query_times.append(end_time - start_time)
self.results['queries'] = {
'num_queries': num_queries_list,
'times': query_times
}
def plot_results(self):
"""绘制基准测试结果"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# 构建时间图
if 'construction' in self.results:
data = self.results['construction']
ax1.plot(data['sizes'], data['times'], 'b-o')
ax1.set_xlabel('Array Size')
ax1.set_ylabel('Construction Time (s)')
ax1.set_title('Prefix Sum Construction Time')
ax1.grid(True)
# 查询时间图
if 'queries' in self.results:
data = self.results['queries']
ax2.plot(data['num_queries'], data['times'], 'r-o')
ax2.set_xlabel('Number of Queries')
ax2.set_ylabel('Query Time (s)')
ax2.set_title('Prefix Sum Query Time')
ax2.grid(True)
plt.tight_layout()
plt.show()
# 运行基准测试
def run_benchmark():
benchmark = PrefixSumBenchmark()
# 测试构建时间
sizes = [1000, 5000, 10000, 50000, 100000]
benchmark.benchmark_construction(sizes)
# 测试查询时间
num_queries_list = [100, 500, 1000, 5000, 10000]
benchmark.benchmark_queries(10000, num_queries_list)
# 绘制结果
benchmark.plot_results()
12.4 实际应用建议
1. 选择标准
- 静态数据 + 频繁查询:使用标准前缀和
- 动态数据 + 区间更新:使用差分数组
- 稀疏数据:使用压缩存储
- 平衡查询更新:使用分块或树状数组
2. 优化策略
- 内存受限:考虑原地构建或压缩存储
- 查询热点:添加缓存机制
- 大数据量:考虑并行计算
- 实时性要求:使用懒加载或增量更新
3. 常见陷阱
- 整数溢出:使用适当的数据类型
- 索引错误:仔细处理边界条件
- 内存泄漏:及时清理缓存
- 并发安全:多线程环境下的同步
12.5 扩展学习
相关算法
- 线段树:支持更复杂的区间操作
- 树状数组:平衡查询和更新复杂度
- 稀疏表:处理区间最值查询
- 莫队算法:离线区间查询优化
进阶应用
- 计算几何:积分图在图像处理中的应用
- 数据库:列存储中的前缀和索引
- 机器学习:特征工程中的累积统计
- 分布式系统:分片数据的聚合查询
总结
前缀和算法是一种简单而强大的优化技术,通过预处理实现了从O(n)到O(1)的查询优化。掌握前缀和不仅能帮助解决大量算法问题,更重要的是培养了空间换时间的优化思维。
核心要点回顾
- 基本原理:利用累积和的差值计算区间和
- 时间复杂度:预处理O(n),查询O(1)
- 空间复杂度:额外O(n)存储空间
- 适用场景:频繁的区间查询操作
- 扩展应用:二维前缀和、差分数组、哈希表优化
学习建议
- 理解原理:深入理解前缀和的数学基础
- 熟练实现:能够快速实现一维和二维前缀和
- 灵活应用:结合哈希表解决复杂问题
- 性能优化:根据实际需求选择合适的优化策略
- 扩展学习:学习相关的高级数据结构
通过系统学习和大量练习,前缀和算法将成为你算法工具箱中的重要武器,为解决更复杂的问题奠定坚实基础。
文章标签
冬眠
博主专注于技术、阅读与思考。在这里记录学习、思考与生活。
系列:前缀和
第 1 篇,共 2 篇
