识别部分

由于底层逻辑是识别黑胶带轮廓来确定A4纸的位置, 而题目描述中有提到一条宽为5mm黑色基准线, 当A4纸目标物紧贴地面, 即紧贴基准线时, 黑胶带外轮廓会与基准线重合, 这时可能就会导致无法正确识别A4纸目标物, 所以我们识别黑胶带的内轮廓来确定A4纸的位置。

所以先定义黑胶带内轮廓的数据

1
2
A4_WIDTH_MM = 170
A4_HEIGHT_MM = 257

识别黑胶带内轮廓

首先我们需要对输入图像进行预处理:

  1. 转换为灰度图像
  2. 应用高斯模糊
  3. 使用边缘检测算法

接着使用cv2.findContours(edged, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)来寻找所有轮廓并存放进valid_contours数组中以便后续采用第二大的轮廓作为Target的内轮廓

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def find_a4_contour(image):
"""寻找矩形框,总计两个矩形框"""`
roi_x, roi_y, roi_w, roi_h = (526, 222, 227, 276)
roi_image = image[roi_y:roi_y+roi_h, roi_x:roi_x+roi_w]

cv2.rectangle(image, (roi_x, roi_y), (roi_x + roi_w, roi_y + roi_h), (255, 0, 0), 2)

gray = cv2.cvtColor(roi_image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
edged=cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
contours, _ = cv2.findContours(edged, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#cv2.imshow("Edges", edged)

# 先筛选出四边形轮廓
valid_contours = []
for cnt in contours:
area = cv2.contourArea(cnt)
if area > 5000: # 只保留四边形
valid_contours.append(cnt)
#print(f"Contour Area: {area}, Approx Length: {len(approx_1)}")

# 对筛选后的四边形轮廓进行排序
if len(valid_contours) < 2:
return None
sorted_contours = sorted(valid_contours, key=cv2.contourArea, reverse=True)
target = sorted_contours[1]
# cv2.drawContours(roi_image, [target], -1, (0, 255, 0), 2) # 绘制目标轮廓
# cv2.drawContours(roi_image, [sorted_contours[0]], -1, (255, 255, 0), 2) # 绘制最大轮廓
# cv2.imshow("Target Contour", roi_image) # 显示目标轮廓

#对target轮廓进行多边形逼近
epsilon = 0.02 * cv2.arcLength(target, True)
approx = cv2.approxPolyDP(target, epsilon, True)
# cv2.imshow("Approx Contour", roi_image) # 显示逼近后的轮廓
if len(approx) == 4:
x, y, w, h = cv2.boundingRect(approx)
aspect_ratio = float(w) / h
if 0.35 < aspect_ratio < 0.75: #筛选掉过宽或过窄的轮廓
# 对target进行透视变换
warped = get_topdown_view(roi_image, order_points(approx.reshape(4, 2)))
#显示变换后的宽高
cv2.putText(image, f"W: {warped.shape[1]}, H: {warped.shape[0]}", (roi_x, roi_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# cv2.imshow("Topdown View", warped)
cv2.drawContours(roi_image, [approx], -1, (0, 255, 0), 2)
return warped
return None

透视变换

首先需要对识别到的矩形轮廓进行角点排序, 以便后续的透视变换

1
2
3
4
5
6
7
8
9
10
11
def order_points(pts):
"""对A4角点排序为:左上,右上,右下,左下"""
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
diff = np.diff(pts, axis=1)

rect[0] = pts[np.argmin(s)] # 左上
rect[2] = pts[np.argmax(s)] # 右下
rect[1] = pts[np.argmin(diff)] # 右上
rect[3] = pts[np.argmax(diff)] # 左下
return rect

当A4纸水平旋转时, 在镜头看起来, A4纸的宽会随着旋转的角度产生明显的偏差, 而A4纸的高只会因为两个边距离镜头的距离不同产生细微的变化。所以我们可以通过已知的A4纸比例和测算的像素高推算出矫正后的像素宽应当为多少

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def get_topdown_view(image, pts):
"""透视变换,将A4纸变为标准正视图,保持原始像素尺寸"""
# 计算A4纸在图像中的实际像素尺寸
width_top = np.linalg.norm(pts[1] - pts[0]) # 上边长度
width_bottom = np.linalg.norm(pts[2] - pts[3]) # 下边长度
height_left = np.linalg.norm(pts[3] - pts[0]) # 左边长度
height_right = np.linalg.norm(pts[2] - pts[1]) # 右边长度

#对高取平均值,宽通过A4纸比例计算
max_height = int((height_left + height_right) / 2)
max_width = int(max_height * (A4_WIDTH_MM / A4_HEIGHT_MM))

# 目标点坐标,保持检测到的实际像素尺寸
dst = np.array([
[0, 0],
[max_width, 0],
[max_width, max_height],
[0, max_height]
], dtype="float32")

M = cv2.getPerspectiveTransform(pts, dst)
warped = cv2.warpPerspective(image, M, (max_width, max_height))
return warped