some tweaks and algorithm breakdown

main
Antonio De Lucreziis 3 months ago
parent 6637ddba80
commit 5aa06a1af6

@ -0,0 +1,42 @@
# Algorithm Explanation
- Step 001: Load the image
<img src="debug-steps/001_Image.png" alt="step 1 - image" width="500">
- Step 002: Normalize the image
<img src="debug-steps/002_Normalized.png" alt="step 2 - normalized" width="500">
- Step 003: Erode the image
<img src="debug-steps/003_Eroded.png" alt="step 3 - eroded" width="500">
- Step 004: Threshold the image
<img src="debug-steps/004_Threshold.png" alt="step 4 - threshold" width="500">
- Step 005: Dilate the image
<img src="debug-steps/005_Dilated.png" alt="step 5 - dilated" width="500">
- Step 006: Crop the image
<img src="debug-steps/006_Cropped.png" alt="step 6 - cropped" width="500">
- Step 007: Show the maze with start and end points
<img src="debug-steps/007_Maze.png" alt="step 7 - maze" width="500">
- Step 008: Create the maze bitmap
<img src="debug-steps/008_Maze Bitmap.png" alt="step 8 - maze bitmap" width="500">
- Step 009: Find the maze path
<img src="debug-steps/009_Maze Path.png" alt="step 9 - maze path" width="500">
- Step 010: Show the solution
<img src="debug-steps/010_Solution.png" alt="step 10 - solution" width="500">

@ -53,13 +53,13 @@ def solve_maze(image, debug=False):
# cv2.fillPoly(image, [np.int32(r.corners)], (0, 0, 0))
# erode the image
eroded = cv2.erode(normalized_image, np.ones((7, 7), np.uint8))
eroded = cv2.erode(normalized_image, np.ones((9, 9), np.uint8))
# eroded = cv2.GaussianBlur(eroded, (15, 15), 0)
debug_image("Eroded", eroded)
# binarize the image
_, thresh = cv2.threshold(eroded, 150, 255, cv2.THRESH_BINARY)
_, thresh = cv2.threshold(eroded, 200, 255, cv2.THRESH_BINARY)
thresh = cv2.bitwise_not(thresh)
debug_image("Threshold", thresh)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 MiB

After

Width:  |  Height:  |  Size: 18 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 MiB

After

Width:  |  Height:  |  Size: 2.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 82 KiB

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 112 KiB

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 MiB

After

Width:  |  Height:  |  Size: 18 MiB

@ -0,0 +1,37 @@
import cv2
import numpy as np
import utils
import cv_maze
camera = cv2.VideoCapture(0)
camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
while utils.wait_frame():
ret, frame = camera.read()
if not ret:
break
utils.display_image("Camera", frame)
# convert the image to grayscale
image_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# extract the tags
tag_registry = utils.extract_tags_dict_single(image_gray)
# draw borders around the tags
for r in tag_registry.values():
(ptA, ptB, ptC, ptD) = r.corners
cv2.line(frame, np.int32(ptA), np.int32(ptB), (0, 255, 0), 5)
cv2.line(frame, np.int32(ptB), np.int32(ptC), (0, 255, 0), 5)
cv2.line(frame, np.int32(ptC), np.int32(ptD), (0, 255, 0), 5)
cv2.line(frame, np.int32(ptD), np.int32(ptA), (0, 255, 0), 5)
utils.display_image("Tags", frame)
camera.release()

@ -4,13 +4,22 @@ import time
import numpy as np
import dt_apriltags
from dt_apriltags import Detector, Detection
from threading import Timer
WINDOW_LABELS = set()
def move_window(label, x, y):
"""
Move the window with the given label to the given position
"""
cv2.moveWindow(label, x, y)
def display_image(label, image, default_width: int = 800, default_height: int = 600, interpolation: int = cv2.INTER_LINEAR):
"""
Display an image fitted in a window with the given label and size, optionally setting the interpolation method
@ -22,11 +31,15 @@ def display_image(label, image, default_width: int = 800, default_height: int =
new_width = default_height * width // height
image = cv2.resize(image, (new_width, default_height), interpolation=interpolation)
WINDOW_LABELS.add(label)
cv2.namedWindow(label, cv2.WINDOW_NORMAL)
cv2.resizeWindow(label, default_width, default_height)
cv2.imshow(label, image)
cv2.imwrite(f"debug-steps/{len(WINDOW_LABELS):03}_{label}.png", image)
if label not in WINDOW_LABELS:
cv2.imwrite(f"debug-steps/{len(WINDOW_LABELS)+1:03}_{label}.png", image)
Timer(5.0, move_window, args=(label, 0, 0)).start()
WINDOW_LABELS.add(label)
def wait_frame():

Loading…
Cancel
Save