some tweaks and algorithm breakdown

7 months ago · 5aa06a1af6
parent 6637ddba80
commit 5aa06a1af6
13 changed files with 97 additions and 5 deletions
--- a/ALGORITHM.md
+++ b/ALGORITHM.md
@ -0,0 +1,42 @@
+# Algorithm Explanation
+
+- Step 001: Load the image
+
+    <img src="debug-steps/001_Image.png" alt="step 1 - image" width="500">
+
+- Step 002: Normalize the image
+
+    <img src="debug-steps/002_Normalized.png" alt="step 2 - normalized" width="500">
+
+- Step 003: Erode the image
+
+    <img src="debug-steps/003_Eroded.png" alt="step 3 - eroded" width="500">
+
+- Step 004: Threshold the image
+
+    <img src="debug-steps/004_Threshold.png" alt="step 4 - threshold" width="500">
+
+- Step 005: Dilate the image
+
+    <img src="debug-steps/005_Dilated.png" alt="step 5 - dilated" width="500">
+
+- Step 006: Crop the image
+
+    <img src="debug-steps/006_Cropped.png" alt="step 6 - cropped" width="500">
+
+- Step 007: Show the maze with start and end points
+
+    <img src="debug-steps/007_Maze.png" alt="step 7 - maze" width="500">
+
+- Step 008: Create the maze bitmap
+
+    <img src="debug-steps/008_Maze Bitmap.png" alt="step 8 - maze bitmap" width="500">
+
+- Step 009: Find the maze path
+
+    <img src="debug-steps/009_Maze Path.png" alt="step 9 - maze path" width="500">
+
+- Step 010: Show the solution
+
+    <img src="debug-steps/010_Solution.png" alt="step 10 - solution" width="500">
+
--- a/cv_maze.py
+++ b/cv_maze.py
@ -53,13 +53,13 @@ def solve_maze(image, debug=False):
        # cv2.fillPoly(image, [np.int32(r.corners)], (0, 0, 0))

    # erode the image
-    eroded = cv2.erode(normalized_image, np.ones((7, 7), np.uint8))
+    eroded = cv2.erode(normalized_image, np.ones((9, 9), np.uint8))
    # eroded = cv2.GaussianBlur(eroded, (15, 15), 0)

    debug_image("Eroded", eroded)

    # binarize the image
-    _, thresh = cv2.threshold(eroded, 150, 255, cv2.THRESH_BINARY)
+    _, thresh = cv2.threshold(eroded, 200, 255, cv2.THRESH_BINARY)
    thresh = cv2.bitwise_not(thresh)

    debug_image("Threshold", thresh)
--- a/debug-steps/001_Image.png
+++ b/debug-steps/001_Image.png
--- a/debug-steps/003_Eroded.png
+++ b/debug-steps/003_Eroded.png
--- a/debug-steps/004_Threshold.png
+++ b/debug-steps/004_Threshold.png
--- a/debug-steps/005_Dilated.png
+++ b/debug-steps/005_Dilated.png
--- a/debug-steps/006_Cropped.png
+++ b/debug-steps/006_Cropped.png
--- a/debug-steps/007_Maze.png
+++ b/debug-steps/007_Maze.png
--- a/debug-steps/008_Maze
+++ b/debug-steps/008_Maze
--- a/debug-steps/009_Maze
+++ b/debug-steps/009_Maze
--- a/debug-steps/010_Solution.png
+++ b/debug-steps/010_Solution.png
--- a/main_camera_test.py
+++ b/main_camera_test.py
@ -0,0 +1,37 @@
+import cv2
+
+import numpy as np
+import utils
+import cv_maze
+
+camera = cv2.VideoCapture(0)
+camera.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
+camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
+
+while utils.wait_frame():
+    ret, frame = camera.read()
+
+    if not ret:
+        break
+
+    utils.display_image("Camera", frame)
+
+    # convert the image to grayscale
+    image_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+
+    # extract the tags
+    tag_registry = utils.extract_tags_dict_single(image_gray)
+
+    # draw borders around the tags
+    for r in tag_registry.values():
+        (ptA, ptB, ptC, ptD) = r.corners
+
+        cv2.line(frame, np.int32(ptA), np.int32(ptB), (0, 255, 0), 5)
+        cv2.line(frame, np.int32(ptB), np.int32(ptC), (0, 255, 0), 5)
+        cv2.line(frame, np.int32(ptC), np.int32(ptD), (0, 255, 0), 5)
+        cv2.line(frame, np.int32(ptD), np.int32(ptA), (0, 255, 0), 5)
+
+    utils.display_image("Tags", frame)
+
+
+camera.release()
--- a/utils.py
+++ b/utils.py
@ -4,13 +4,22 @@ import time

 import numpy as np

-import dt_apriltags
 from dt_apriltags import Detector, Detection


+from threading import Timer
+
 WINDOW_LABELS = set()


+def move_window(label, x, y):
+    """
+    Move the window with the given label to the given position
+    """
+
+    cv2.moveWindow(label, x, y)
+
+
 def display_image(label, image, default_width: int = 800, default_height: int = 600, interpolation: int = cv2.INTER_LINEAR):
    """
    Display an image fitted in a window with the given label and size, optionally setting the interpolation method
@ -22,11 +31,15 @@ def display_image(label, image, default_width: int = 800, default_height: int =
        new_width = default_height * width // height
        image = cv2.resize(image, (new_width, default_height), interpolation=interpolation)

-    WINDOW_LABELS.add(label)
    cv2.namedWindow(label, cv2.WINDOW_NORMAL)
    cv2.resizeWindow(label, default_width, default_height)
    cv2.imshow(label, image)
-    cv2.imwrite(f"debug-steps/{len(WINDOW_LABELS):03}_{label}.png", image)
+
+    if label not in WINDOW_LABELS:
+        cv2.imwrite(f"debug-steps/{len(WINDOW_LABELS)+1:03}_{label}.png", image)
+        Timer(5.0, move_window, args=(label, 0, 0)).start()
+
+    WINDOW_LABELS.add(label)


 def wait_frame():