Spaces:
Runtime error
Runtime error
Update panorama_stitch_memsafe.py
Browse files- panorama_stitch_memsafe.py +697 -78
panorama_stitch_memsafe.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
PANORAMIC IMAGE STITCHING
|
| 3 |
|
| 4 |
Permitted library usage
|
| 5 |
- OpenCV: Image I/O, SIFT detection, keypoint matching
|
|
@@ -12,6 +12,24 @@ Manual implementations (NO high-level APIs):
|
|
| 12 |
- Perspective warping (forward + inverse mapping)
|
| 13 |
- Multi-band blending (Laplacian pyramid) & feathered blending
|
| 14 |
- Intensity normalization and final cleanup
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
"""
|
| 17 |
|
|
@@ -21,7 +39,6 @@ import matplotlib.pyplot as plt
|
|
| 21 |
import os
|
| 22 |
import sys
|
| 23 |
import warnings
|
| 24 |
-
import gc
|
| 25 |
warnings.filterwarnings('ignore')
|
| 26 |
|
| 27 |
|
|
@@ -30,6 +47,12 @@ warnings.filterwarnings('ignore')
|
|
| 30 |
|
| 31 |
|
| 32 |
def load_images(image_paths):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
images_bgr = []
|
| 34 |
images_gray = []
|
| 35 |
for p in image_paths:
|
|
@@ -42,7 +65,8 @@ def load_images(image_paths):
|
|
| 42 |
return images_bgr, images_gray
|
| 43 |
|
| 44 |
|
| 45 |
-
def display_images(images_bgr, title="Input Images"
|
|
|
|
| 46 |
n = len(images_bgr)
|
| 47 |
fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
|
| 48 |
if n == 1:
|
|
@@ -53,27 +77,42 @@ def display_images(images_bgr, title="Input Images", output_dir="output"):
|
|
| 53 |
axes[i].axis('off')
|
| 54 |
fig.suptitle(title, fontsize=14)
|
| 55 |
plt.tight_layout()
|
| 56 |
-
plt.savefig(
|
| 57 |
plt.close()
|
| 58 |
|
| 59 |
|
| 60 |
|
| 61 |
-
# STEP 2: SIFT FEATURE EXTRACTION
|
| 62 |
|
| 63 |
|
| 64 |
def extract_sift_features(images_gray):
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
keypoints_list = []
|
| 67 |
descriptors_list = []
|
|
|
|
| 68 |
for idx, gray in enumerate(images_gray):
|
| 69 |
kp, des = sift.detectAndCompute(gray, None)
|
| 70 |
keypoints_list.append(kp)
|
| 71 |
descriptors_list.append(des)
|
| 72 |
print(f" Image {idx}: detected {len(kp)} SIFT keypoints")
|
|
|
|
| 73 |
return keypoints_list, descriptors_list
|
| 74 |
|
| 75 |
|
| 76 |
-
def visualize_keypoints(images_bgr, keypoints_list
|
|
|
|
| 77 |
n = len(images_bgr)
|
| 78 |
fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
|
| 79 |
if n == 1:
|
|
@@ -88,32 +127,55 @@ def visualize_keypoints(images_bgr, keypoints_list, output_dir="output"):
|
|
| 88 |
axes[i].axis('off')
|
| 89 |
fig.suptitle("SIFT Keypoints", fontsize=14)
|
| 90 |
plt.tight_layout()
|
| 91 |
-
plt.savefig(
|
| 92 |
plt.close()
|
| 93 |
|
| 94 |
|
| 95 |
|
| 96 |
-
# STEP 3: FEATURE MATCHING
|
| 97 |
|
| 98 |
|
| 99 |
def match_features(des1, des2, ratio_thresh=0.75):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
bf = cv2.BFMatcher(cv2.NORM_L2)
|
| 101 |
raw_matches = bf.knnMatch(des1, des2, k=2)
|
|
|
|
| 102 |
good_matches = []
|
| 103 |
for m, n in raw_matches:
|
| 104 |
if m.distance < ratio_thresh * n.distance:
|
| 105 |
good_matches.append(m)
|
|
|
|
| 106 |
print(f" Ratio test: {len(raw_matches)} raw -> {len(good_matches)} good matches")
|
| 107 |
return good_matches
|
| 108 |
|
| 109 |
|
| 110 |
def get_matched_points(kp1, kp2, matches):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
pts1 = np.float64([kp1[m.queryIdx].pt for m in matches])
|
| 112 |
pts2 = np.float64([kp2[m.trainIdx].pt for m in matches])
|
| 113 |
return pts1, pts2
|
| 114 |
|
| 115 |
|
| 116 |
-
def visualize_matches(img1, kp1, img2, kp2, matches, title="Feature Matches", fname="matches.png"
|
|
|
|
| 117 |
vis = cv2.drawMatches(
|
| 118 |
img1, kp1, img2, kp2, matches[:80], None,
|
| 119 |
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
|
|
@@ -122,110 +184,226 @@ def visualize_matches(img1, kp1, img2, kp2, matches, title="Feature Matches", fn
|
|
| 122 |
plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
|
| 123 |
plt.title(title)
|
| 124 |
plt.axis('off')
|
| 125 |
-
plt.savefig(
|
| 126 |
plt.close()
|
| 127 |
|
| 128 |
|
| 129 |
|
| 130 |
-
# STEP 4: HOMOGRAPHY ESTIMATION - DLT
|
| 131 |
|
| 132 |
|
| 133 |
def normalize_points(pts):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
centroid = np.mean(pts, axis=0)
|
| 135 |
shifted = pts - centroid
|
| 136 |
avg_dist = np.mean(np.sqrt(np.sum(shifted ** 2, axis=1)))
|
| 137 |
if avg_dist < 1e-10:
|
| 138 |
avg_dist = 1e-10
|
| 139 |
scale = np.sqrt(2.0) / avg_dist
|
|
|
|
| 140 |
T = np.array([
|
| 141 |
[scale, 0, -scale * centroid[0]],
|
| 142 |
[0, scale, -scale * centroid[1]],
|
| 143 |
[0, 0, 1 ]
|
| 144 |
])
|
| 145 |
-
|
| 146 |
-
|
|
|
|
| 147 |
pts_norm = pts_norm_h[:, :2] / pts_norm_h[:, 2:3]
|
|
|
|
| 148 |
return pts_norm, T
|
| 149 |
|
| 150 |
|
| 151 |
def compute_homography_dlt(src_pts, dst_pts):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
assert len(src_pts) >= 4, "Need at least 4 correspondences for homography"
|
|
|
|
|
|
|
| 153 |
src_norm, T_src = normalize_points(src_pts)
|
| 154 |
dst_norm, T_dst = normalize_points(dst_pts)
|
|
|
|
| 155 |
N = len(src_norm)
|
| 156 |
A = np.zeros((2 * N, 9))
|
|
|
|
| 157 |
for i in range(N):
|
| 158 |
x, y = src_norm[i]
|
| 159 |
xp, yp = dst_norm[i]
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
_, S, Vt = np.linalg.svd(A)
|
| 163 |
h = Vt[-1]
|
| 164 |
H_norm = h.reshape(3, 3)
|
|
|
|
|
|
|
|
|
|
| 165 |
H = np.linalg.inv(T_dst) @ H_norm @ T_src
|
|
|
|
|
|
|
| 166 |
if abs(H[2, 2]) > 1e-10:
|
| 167 |
H /= H[2, 2]
|
|
|
|
| 168 |
return H
|
| 169 |
|
| 170 |
|
| 171 |
|
| 172 |
-
# STEP 5: RANSAC
|
| 173 |
|
| 174 |
|
| 175 |
def compute_homography_ransac(src_pts, dst_pts,
|
| 176 |
n_iterations=10000,
|
| 177 |
reprojection_thresh=3.0,
|
| 178 |
min_inliers_ratio=0.1):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
N = len(src_pts)
|
| 180 |
if N < 4:
|
| 181 |
raise ValueError(f"Need >=4 matches for homography, got {N}")
|
|
|
|
| 182 |
best_inlier_count = 0
|
| 183 |
best_inlier_mask = None
|
| 184 |
best_H = None
|
| 185 |
-
|
|
|
|
|
|
|
| 186 |
|
| 187 |
for iteration in range(n_iterations):
|
|
|
|
| 188 |
indices = np.random.choice(N, 4, replace=False)
|
| 189 |
s = src_pts[indices]
|
| 190 |
d = dst_pts[indices]
|
|
|
|
|
|
|
|
|
|
| 191 |
try:
|
| 192 |
H_candidate = compute_homography_dlt(s, d)
|
| 193 |
except np.linalg.LinAlgError:
|
| 194 |
continue
|
|
|
|
| 195 |
if H_candidate is None or np.any(np.isnan(H_candidate)):
|
| 196 |
continue
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
| 198 |
w = projected_h[:, 2:3]
|
| 199 |
w[np.abs(w) < 1e-10] = 1e-10
|
| 200 |
projected = projected_h[:, :2] / w
|
|
|
|
|
|
|
| 201 |
errors = np.sqrt(np.sum((projected - dst_pts) ** 2, axis=1))
|
| 202 |
inlier_mask = errors < reprojection_thresh
|
| 203 |
inlier_count = np.sum(inlier_mask)
|
|
|
|
|
|
|
| 204 |
if inlier_count > best_inlier_count:
|
| 205 |
best_inlier_count = inlier_count
|
| 206 |
best_inlier_mask = inlier_mask.copy()
|
| 207 |
best_H = H_candidate.copy()
|
|
|
|
|
|
|
| 208 |
if inlier_count > 0.9 * N:
|
| 209 |
break
|
| 210 |
|
|
|
|
| 211 |
if best_inlier_count < min_inliers_ratio * N:
|
| 212 |
raise RuntimeError(
|
| 213 |
f"RANSAC failed: only {best_inlier_count}/{N} inliers "
|
| 214 |
f"({100*best_inlier_count/N:.1f}%), threshold was {min_inliers_ratio*100:.0f}%"
|
| 215 |
)
|
|
|
|
|
|
|
| 216 |
best_H = compute_homography_dlt(
|
| 217 |
src_pts[best_inlier_mask],
|
| 218 |
dst_pts[best_inlier_mask]
|
| 219 |
)
|
|
|
|
| 220 |
print(f" RANSAC: {best_inlier_count}/{N} inliers "
|
| 221 |
f"({100*best_inlier_count/N:.1f}%) over {min(iteration+1, n_iterations)} iterations")
|
| 222 |
-
return best_H, best_inlier_mask
|
| 223 |
|
|
|
|
| 224 |
|
| 225 |
-
# STEP 6: IMAGE WARPING
|
| 226 |
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
def apply_homography_to_point(H, pt):
|
|
|
|
| 229 |
p = np.array([pt[0], pt[1], 1.0])
|
| 230 |
q = H @ p
|
| 231 |
if abs(q[2]) < 1e-10:
|
|
@@ -234,76 +412,189 @@ def apply_homography_to_point(H, pt):
|
|
| 234 |
|
| 235 |
|
| 236 |
def compute_warped_bounds(H, h, w):
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
warped_corners = []
|
| 239 |
for c in corners:
|
| 240 |
wx, wy = apply_homography_to_point(H, c)
|
| 241 |
warped_corners.append([wx, wy])
|
| 242 |
warped_corners = np.array(warped_corners)
|
|
|
|
| 243 |
x_min = np.floor(warped_corners[:, 0].min()).astype(int)
|
| 244 |
x_max = np.ceil(warped_corners[:, 0].max()).astype(int)
|
| 245 |
y_min = np.floor(warped_corners[:, 1].min()).astype(int)
|
| 246 |
y_max = np.ceil(warped_corners[:, 1].max()).astype(int)
|
|
|
|
| 247 |
return x_min, y_min, x_max, y_max
|
| 248 |
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
def warp_image(img, H, output_size, offset):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
out_h, out_w = output_size
|
| 252 |
H_inv = np.linalg.inv(H)
|
| 253 |
off_x, off_y = offset
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
try:
|
| 255 |
warped = np.zeros((out_h, out_w, 3), dtype=np.float64)
|
| 256 |
mask = np.zeros((out_h, out_w), dtype=np.float64)
|
| 257 |
except MemoryError:
|
| 258 |
print("\n[!] Critical: System RAM cannot hold the final image.")
|
| 259 |
-
print(" Switching to disk-based memory mapping.")
|
| 260 |
warped = np.memmap('temp_warped.dat', dtype='float64', mode='w+', shape=(out_h, out_w, 3))
|
| 261 |
mask = np.memmap('temp_mask.dat', dtype='float64', mode='w+', shape=(out_h, out_w))
|
| 262 |
|
| 263 |
h_src, w_src = img.shape[:2]
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
print(f" -> Processing {out_h} rows in chunks of {CHUNK_SIZE}...")
|
| 266 |
|
| 267 |
for y_start in range(0, out_h, CHUNK_SIZE):
|
| 268 |
y_end = min(y_start + CHUNK_SIZE, out_h)
|
| 269 |
chunk_h = y_end - y_start
|
|
|
|
|
|
|
|
|
|
| 270 |
ys, xs = np.mgrid[y_start:y_end, 0:out_w]
|
| 271 |
dest_x = xs + off_x
|
| 272 |
dest_y = ys + off_y
|
|
|
|
|
|
|
| 273 |
ones = np.ones(chunk_h * out_w, dtype=np.float64)
|
| 274 |
flat_x = dest_x.reshape(-1)
|
| 275 |
flat_y = dest_y.reshape(-1)
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
src_coords = H_inv @ dest_coords
|
|
|
|
|
|
|
| 278 |
w_vals = src_coords[2, :]
|
| 279 |
w_vals[np.abs(w_vals) < 1e-10] = 1e-10
|
| 280 |
-
src_x =
|
| 281 |
-
src_y =
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
if not np.any(valid):
|
| 284 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
x0 = np.floor(src_x).astype(int)
|
| 286 |
y0 = np.floor(src_y).astype(int)
|
| 287 |
x1 = x0 + 1
|
| 288 |
y1 = y0 + 1
|
|
|
|
| 289 |
dx = src_x - x0
|
| 290 |
dy = src_y - y0
|
|
|
|
|
|
|
| 291 |
x0 = np.clip(x0, 0, w_src - 2)
|
| 292 |
x1 = np.clip(x1, 0, w_src - 1)
|
| 293 |
y0 = np.clip(y0, 0, h_src - 2)
|
| 294 |
y1 = np.clip(y1, 0, h_src - 1)
|
|
|
|
|
|
|
| 295 |
Ia = img[y0, x0].astype(np.float64)
|
| 296 |
Ib = img[y0, x1].astype(np.float64)
|
| 297 |
Ic = img[y1, x0].astype(np.float64)
|
| 298 |
Id = img[y1, x1].astype(np.float64)
|
|
|
|
|
|
|
| 299 |
wa = ((1 - dx) * (1 - dy))[:, :, None]
|
| 300 |
wb = (dx * (1 - dy))[:, :, None]
|
| 301 |
wc = ((1 - dx) * dy)[:, :, None]
|
| 302 |
wd = (dx * dy)[:, :, None]
|
|
|
|
|
|
|
|
|
|
| 303 |
chunk_warped = wa * Ia + wb * Ib + wc * Ic + wd * Id
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
valid_3ch = valid[:, :, None]
|
|
|
|
|
|
|
|
|
|
| 305 |
current_slice_img = warped[y_start:y_end, :]
|
| 306 |
current_slice_mask = mask[y_start:y_end, :]
|
|
|
|
| 307 |
np.copyto(current_slice_img, chunk_warped, where=valid_3ch)
|
| 308 |
np.copyto(current_slice_mask, 1.0, where=valid)
|
| 309 |
|
|
@@ -311,10 +602,15 @@ def warp_image(img, H, output_size, offset):
|
|
| 311 |
|
| 312 |
|
| 313 |
|
| 314 |
-
# STEP 7: BLENDING
|
| 315 |
|
| 316 |
|
| 317 |
def gaussian_blur_manual(img, kernel_size=5, sigma=1.0):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
ax = np.linspace(-(kernel_size // 2), kernel_size // 2, kernel_size)
|
| 319 |
xx, yy = np.meshgrid(ax, ax)
|
| 320 |
kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
|
|
@@ -323,10 +619,12 @@ def gaussian_blur_manual(img, kernel_size=5, sigma=1.0):
|
|
| 323 |
|
| 324 |
|
| 325 |
def build_gaussian_pyramid(img, levels):
|
|
|
|
| 326 |
pyramid = [img.astype(np.float64)]
|
| 327 |
current = img.astype(np.float64)
|
| 328 |
for _ in range(levels - 1):
|
| 329 |
blurred = gaussian_blur_manual(current, kernel_size=5, sigma=1.0)
|
|
|
|
| 330 |
downsampled = blurred[::2, ::2]
|
| 331 |
pyramid.append(downsampled)
|
| 332 |
current = downsampled
|
|
@@ -334,102 +632,204 @@ def build_gaussian_pyramid(img, levels):
|
|
| 334 |
|
| 335 |
|
| 336 |
def upsample(img, target_shape):
|
|
|
|
| 337 |
h, w = target_shape[:2]
|
|
|
|
| 338 |
up_h = np.repeat(img, 2, axis=0)[:h]
|
| 339 |
up_w = np.repeat(up_h, 2, axis=1)[:, :w]
|
|
|
|
| 340 |
result = gaussian_blur_manual(up_w, kernel_size=5, sigma=1.0)
|
| 341 |
return result
|
| 342 |
|
| 343 |
|
| 344 |
def build_laplacian_pyramid(img, levels):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
gauss_pyr = build_gaussian_pyramid(img, levels)
|
| 346 |
lap_pyr = []
|
| 347 |
for i in range(levels - 1):
|
| 348 |
upsampled = upsample(gauss_pyr[i + 1], gauss_pyr[i].shape)
|
| 349 |
lap = gauss_pyr[i] - upsampled
|
| 350 |
lap_pyr.append(lap)
|
| 351 |
-
lap_pyr.append(gauss_pyr[-1])
|
| 352 |
return lap_pyr
|
| 353 |
|
| 354 |
|
| 355 |
def multi_band_blend(img1, img2, mask1, mask2, levels=6):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
h, w = img1.shape[:2]
|
|
|
|
|
|
|
| 357 |
if img1.ndim == 2:
|
| 358 |
img1 = img1[:, :, None]
|
| 359 |
if img2.ndim == 2:
|
| 360 |
img2 = img2[:, :, None]
|
|
|
|
|
|
|
|
|
|
| 361 |
overlap = (mask1 > 0.5) & (mask2 > 0.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
weight1 = np.zeros((h, w), dtype=np.float64)
|
| 363 |
weight2 = np.zeros((h, w), dtype=np.float64)
|
|
|
|
| 364 |
if np.any(mask1 > 0.5):
|
|
|
|
| 365 |
dist1 = cv2.distanceTransform((mask1 > 0.5).astype(np.uint8), cv2.DIST_L2, 5)
|
| 366 |
weight1 = dist1.astype(np.float64)
|
| 367 |
if np.any(mask2 > 0.5):
|
| 368 |
dist2 = cv2.distanceTransform((mask2 > 0.5).astype(np.uint8), cv2.DIST_L2, 5)
|
| 369 |
weight2 = dist2.astype(np.float64)
|
|
|
|
|
|
|
| 370 |
total = weight1 + weight2
|
| 371 |
total[total < 1e-10] = 1e-10
|
| 372 |
-
alpha = weight1 / total
|
|
|
|
|
|
|
| 373 |
alpha_3 = np.stack([alpha] * 3, axis=-1)
|
|
|
|
|
|
|
| 374 |
lap1 = build_laplacian_pyramid(img1, levels)
|
| 375 |
lap2 = build_laplacian_pyramid(img2, levels)
|
|
|
|
|
|
|
| 376 |
mask_pyr = build_gaussian_pyramid(alpha_3, levels)
|
|
|
|
|
|
|
| 377 |
blended_pyr = []
|
| 378 |
for l in range(levels):
|
| 379 |
blended_level = mask_pyr[l] * lap1[l] + (1.0 - mask_pyr[l]) * lap2[l]
|
| 380 |
blended_pyr.append(blended_level)
|
|
|
|
|
|
|
| 381 |
result = blended_pyr[-1]
|
| 382 |
for l in range(levels - 2, -1, -1):
|
| 383 |
upsampled = upsample(result, blended_pyr[l].shape)
|
| 384 |
result = upsampled + blended_pyr[l]
|
|
|
|
| 385 |
return result
|
| 386 |
|
| 387 |
|
| 388 |
def feathered_blend(img1, img2, mask1, mask2):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
h, w = img1.shape[:2]
|
| 390 |
weight1 = np.zeros((h, w), dtype=np.float64)
|
| 391 |
weight2 = np.zeros((h, w), dtype=np.float64)
|
|
|
|
| 392 |
if np.any(mask1 > 0.5):
|
| 393 |
-
weight1 = cv2.distanceTransform(
|
|
|
|
|
|
|
| 394 |
if np.any(mask2 > 0.5):
|
| 395 |
-
weight2 = cv2.distanceTransform(
|
|
|
|
|
|
|
|
|
|
| 396 |
total = weight1 + weight2
|
| 397 |
total[total < 1e-10] = 1e-10
|
|
|
|
| 398 |
alpha1 = (weight1 / total)[:, :, None]
|
| 399 |
alpha2 = (weight2 / total)[:, :, None]
|
|
|
|
| 400 |
blended = alpha1 * img1 + alpha2 * img2
|
| 401 |
return blended
|
| 402 |
|
| 403 |
|
| 404 |
|
| 405 |
-
# STEP 8: EXPOSURE COMPENSATION
|
| 406 |
|
| 407 |
|
| 408 |
def compensate_exposure(img_target, img_source, mask_target, mask_source):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
overlap = (mask_target > 0.5) & (mask_source > 0.5)
|
|
|
|
| 410 |
if np.sum(overlap) < 100:
|
|
|
|
| 411 |
return img_source.copy()
|
|
|
|
| 412 |
gains = []
|
| 413 |
for c in range(3):
|
| 414 |
mean_target = np.mean(img_target[:, :, c][overlap])
|
| 415 |
mean_source = np.mean(img_source[:, :, c][overlap])
|
| 416 |
if mean_source > 1.0:
|
| 417 |
gain = mean_target / mean_source
|
|
|
|
| 418 |
gain = np.clip(gain, 0.5, 2.0)
|
| 419 |
else:
|
| 420 |
gain = 1.0
|
| 421 |
gains.append(gain)
|
|
|
|
| 422 |
compensated = img_source.copy()
|
| 423 |
for c in range(3):
|
| 424 |
compensated[:, :, c] = compensated[:, :, c] * gains[c]
|
|
|
|
| 425 |
print(f" Exposure gains: R={gains[2]:.3f}, G={gains[1]:.3f}, B={gains[0]:.3f}")
|
| 426 |
return compensated
|
| 427 |
|
| 428 |
|
| 429 |
-
# STEP 9: NAIVE STITCH
|
| 430 |
|
| 431 |
|
| 432 |
def naive_stitch(warped_images, masks):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
canvas = np.zeros_like(warped_images[0])
|
| 434 |
for img, mask in zip(warped_images, masks):
|
| 435 |
region = mask > 0.5
|
|
@@ -442,81 +842,145 @@ def naive_stitch(warped_images, masks):
|
|
| 442 |
|
| 443 |
|
| 444 |
|
| 445 |
-
# STEP 10: FINAL CLEANUP
|
| 446 |
|
| 447 |
|
| 448 |
def crop_to_content(panorama, combined_mask):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
if combined_mask.dtype == np.float64 or combined_mask.dtype == np.float32:
|
|
|
|
| 450 |
valid_mask = (combined_mask > 0.01)
|
| 451 |
else:
|
| 452 |
valid_mask = (combined_mask > 1)
|
|
|
|
|
|
|
|
|
|
| 453 |
rows = np.any(valid_mask, axis=1)
|
| 454 |
cols = np.any(valid_mask, axis=0)
|
|
|
|
|
|
|
| 455 |
if not np.any(rows) or not np.any(cols):
|
| 456 |
return panorama
|
|
|
|
| 457 |
y_min, y_max = np.where(rows)[0][[0, -1]]
|
| 458 |
x_min, x_max = np.where(cols)[0][[0, -1]]
|
|
|
|
|
|
|
| 459 |
safe_crop = panorama[y_min:y_max+1, x_min:x_max+1]
|
| 460 |
safe_mask = valid_mask[y_min:y_max+1, x_min:x_max+1]
|
|
|
|
|
|
|
| 461 |
total_content = np.count_nonzero(safe_mask)
|
|
|
|
|
|
|
| 462 |
top, bottom = 0, safe_mask.shape[0] - 1
|
| 463 |
left, right = 0, safe_mask.shape[1] - 1
|
| 464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
max_iters = min(safe_mask.shape[0], safe_mask.shape[1])
|
|
|
|
| 466 |
for _ in range(max_iters):
|
|
|
|
| 467 |
h = bottom - top
|
| 468 |
w = right - left
|
| 469 |
if h <= 0 or w <= 0: break
|
|
|
|
|
|
|
| 470 |
roi = safe_mask[top:bottom+1, left:right+1]
|
|
|
|
|
|
|
| 471 |
if np.count_nonzero(roi) < min_content_threshold:
|
| 472 |
print(" [Crop] Stopping to preserve image content.")
|
| 473 |
break
|
|
|
|
|
|
|
| 474 |
row_top = roi[0, :]
|
| 475 |
row_btm = roi[-1, :]
|
| 476 |
col_lft = roi[:, 0]
|
| 477 |
col_rgt = roi[:, -1]
|
| 478 |
-
|
|
|
|
|
|
|
| 479 |
bad_b = np.sum(~row_btm)
|
| 480 |
bad_l = np.sum(~col_lft)
|
| 481 |
bad_r = np.sum(~col_rgt)
|
|
|
|
|
|
|
| 482 |
if (bad_t + bad_b + bad_l + bad_r) == 0:
|
| 483 |
print(f" [Crop] Found Clean Rect: {w}x{h} px")
|
| 484 |
return safe_crop[top:bottom+1, left:right+1]
|
|
|
|
|
|
|
| 485 |
max_bad = max(bad_t, bad_b, bad_l, bad_r)
|
|
|
|
| 486 |
if max_bad == bad_t: top += 1
|
| 487 |
elif max_bad == bad_b: bottom -= 1
|
| 488 |
elif max_bad == bad_l: left += 1
|
| 489 |
elif max_bad == bad_r: right -= 1
|
|
|
|
|
|
|
| 490 |
return safe_crop[top:bottom+1, left:right+1]
|
| 491 |
|
|
|
|
|
|
|
| 492 |
|
| 493 |
def memory_safe_linear_blend(img1, img2, mask1, mask2):
|
| 494 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
h, w = img1.shape[:2]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
try:
|
|
|
|
| 497 |
print(" Calculating weight map 1...")
|
| 498 |
if np.any(mask1 > 0):
|
|
|
|
| 499 |
m1_bin = (mask1 > 127).astype(np.uint8)
|
| 500 |
w_ram = cv2.distanceTransform(m1_bin, cv2.DIST_L2, 5)
|
| 501 |
else:
|
| 502 |
w_ram = np.zeros((h, w), dtype=np.float32)
|
|
|
|
|
|
|
| 503 |
fp1 = np.memmap('temp_w1.dat', dtype='float32', mode='w+', shape=(h, w))
|
| 504 |
fp1[:] = w_ram[:]
|
| 505 |
-
del w_ram, m1_bin
|
| 506 |
-
gc.collect()
|
| 507 |
|
|
|
|
| 508 |
print(" Calculating weight map 2...")
|
| 509 |
if np.any(mask2 > 0):
|
| 510 |
m2_bin = (mask2 > 127).astype(np.uint8)
|
| 511 |
w_ram = cv2.distanceTransform(m2_bin, cv2.DIST_L2, 5)
|
| 512 |
else:
|
| 513 |
w_ram = np.zeros((h, w), dtype=np.float32)
|
|
|
|
|
|
|
| 514 |
fp2 = np.memmap('temp_w2.dat', dtype='float32', mode='w+', shape=(h, w))
|
| 515 |
fp2[:] = w_ram[:]
|
| 516 |
del w_ram, m2_bin
|
| 517 |
gc.collect()
|
| 518 |
-
|
|
|
|
| 519 |
print(" Blending chunks...")
|
|
|
|
| 520 |
try:
|
| 521 |
result = np.zeros(img1.shape, dtype=np.uint8)
|
| 522 |
except MemoryError:
|
|
@@ -525,102 +989,185 @@ def memory_safe_linear_blend(img1, img2, mask1, mask2):
|
|
| 525 |
CHUNK = 2000
|
| 526 |
for y in range(0, h, CHUNK):
|
| 527 |
ye = min(y + CHUNK, h)
|
|
|
|
|
|
|
| 528 |
w1_c = fp1[y:ye]
|
| 529 |
w2_c = fp2[y:ye]
|
|
|
|
| 530 |
total = w1_c + w2_c
|
| 531 |
total[total < 1e-5] = 1e-5
|
| 532 |
-
alpha = (w1_c / total)[:, :, None]
|
|
|
|
|
|
|
| 533 |
chunk1 = img1[y:ye].astype(np.float32)
|
| 534 |
chunk2 = img2[y:ye].astype(np.float32)
|
|
|
|
|
|
|
| 535 |
blended = chunk1 * alpha + chunk2 * (1.0 - alpha)
|
| 536 |
result[y:ye] = np.clip(blended, 0, 255).astype(np.uint8)
|
| 537 |
|
|
|
|
| 538 |
del fp1, fp2
|
| 539 |
try:
|
| 540 |
os.remove('temp_w1.dat')
|
| 541 |
os.remove('temp_w2.dat')
|
| 542 |
except: pass
|
|
|
|
| 543 |
return result
|
|
|
|
| 544 |
except Exception as e:
|
| 545 |
print(f" [!] Error in disk blending: {e}")
|
| 546 |
-
return img1
|
| 547 |
-
|
| 548 |
|
| 549 |
def smart_blend_manager(img1, img2, mask1, mask2, levels=6):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 550 |
h, w = img1.shape[:2]
|
| 551 |
pixels = h * w
|
| 552 |
-
|
|
|
|
|
|
|
|
|
|
| 553 |
est_mem_gb = est_mem_bytes / (1024**3)
|
|
|
|
| 554 |
print(f" [Memory Check] Image size: {w}x{h}")
|
| 555 |
print(f" [Memory Check] Est. Multi-Band RAM needed: ~{est_mem_gb:.2f} GB")
|
| 556 |
-
|
| 557 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 558 |
print(f" [!] Image exceeds safe limit ({SAFE_LIMIT_GB} GB). Forcing Disk-Based Blending.")
|
| 559 |
return memory_safe_linear_blend(img1, img2, mask1, mask2)
|
|
|
|
|
|
|
| 560 |
try:
|
| 561 |
-
return multi_band_blend(img1.astype(np.float64), img2.astype(np.float64),
|
| 562 |
mask1, mask2, levels)
|
| 563 |
-
except (MemoryError, cv2.error):
|
|
|
|
| 564 |
print("\n [!] RAM Full or OpenCV Error! Switching to Disk-Based Blending...")
|
|
|
|
| 565 |
gc.collect()
|
| 566 |
return memory_safe_linear_blend(img1, img2, mask1, mask2)
|
| 567 |
|
| 568 |
|
| 569 |
|
| 570 |
-
# MAIN PIPELINE
|
| 571 |
-
|
| 572 |
-
|
| 573 |
def stitch_panorama(image_paths, output_dir="output"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
os.makedirs(output_dir, exist_ok=True)
|
| 575 |
|
| 576 |
print("=" * 70)
|
| 577 |
-
print("PANORAMIC IMAGE STITCHING")
|
| 578 |
print("=" * 70)
|
| 579 |
|
|
|
|
| 580 |
# 1. Load images
|
|
|
|
| 581 |
print("\n[1/8] Loading images...")
|
| 582 |
images_bgr, images_gray = load_images(image_paths)
|
| 583 |
n_images = len(images_bgr)
|
| 584 |
-
display_images(images_bgr
|
|
|
|
|
|
|
| 585 |
center_idx = n_images // 2
|
| 586 |
print(f" Reference (center) image: index {center_idx}")
|
| 587 |
|
|
|
|
| 588 |
# 2. Extract SIFT features
|
|
|
|
| 589 |
print("\n[2/8] Extracting SIFT features...")
|
| 590 |
keypoints, descriptors = extract_sift_features(images_gray)
|
| 591 |
-
visualize_keypoints(images_bgr, keypoints
|
| 592 |
|
| 593 |
-
#
|
|
|
|
|
|
|
| 594 |
print("\n[3/8] Matching features & computing homographies...")
|
|
|
|
|
|
|
|
|
|
| 595 |
homographies = [None] * n_images
|
| 596 |
homographies[center_idx] = np.eye(3)
|
| 597 |
|
|
|
|
|
|
|
| 598 |
for i in range(center_idx - 1, -1, -1):
|
| 599 |
print(f"\n Pair: Image {i} <-> Image {i+1}")
|
|
|
|
|
|
|
| 600 |
matches = match_features(descriptors[i], descriptors[i + 1])
|
| 601 |
pts_i, pts_ip1 = get_matched_points(keypoints[i], keypoints[i + 1], matches)
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
H_i_to_ip1, inlier_mask = compute_homography_ransac(pts_i, pts_ip1)
|
|
|
|
|
|
|
|
|
|
| 606 |
homographies[i] = homographies[i + 1] @ H_i_to_ip1
|
| 607 |
print(f" Homography (Image {i} -> center) computed successfully")
|
| 608 |
|
|
|
|
| 609 |
for i in range(center_idx + 1, n_images):
|
| 610 |
print(f"\n Pair: Image {i} <-> Image {i-1}")
|
|
|
|
|
|
|
| 611 |
matches = match_features(descriptors[i], descriptors[i - 1])
|
| 612 |
pts_i, pts_im1 = get_matched_points(keypoints[i], keypoints[i - 1], matches)
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
H_i_to_im1, inlier_mask = compute_homography_ransac(pts_i, pts_im1)
|
|
|
|
|
|
|
| 617 |
homographies[i] = homographies[i - 1] @ H_i_to_im1
|
| 618 |
print(f" Homography (Image {i} -> center) computed successfully")
|
| 619 |
|
| 620 |
-
#
|
|
|
|
|
|
|
| 621 |
print("\n[4/8] Computing output canvas bounds...")
|
|
|
|
| 622 |
all_x_min, all_y_min = 0, 0
|
| 623 |
all_x_max, all_y_max = 0, 0
|
|
|
|
| 624 |
for i in range(n_images):
|
| 625 |
h, w = images_bgr[i].shape[:2]
|
| 626 |
xmin, ymin, xmax, ymax = compute_warped_bounds(homographies[i], h, w)
|
|
@@ -628,9 +1175,10 @@ def stitch_panorama(image_paths, output_dir="output"):
|
|
| 628 |
all_y_min = min(all_y_min, ymin)
|
| 629 |
all_x_max = max(all_x_max, xmax)
|
| 630 |
all_y_max = max(all_y_max, ymax)
|
|
|
|
| 631 |
out_w = all_x_max - all_x_min + 1
|
| 632 |
out_h = all_y_max - all_y_min + 1
|
| 633 |
-
offset = (all_x_min, all_y_min)
|
| 634 |
print(f" Canvas size: {out_w} x {out_h} px, offset: {offset}")
|
| 635 |
|
| 636 |
print("\n[5/8] Warping images into common coordinate frame...")
|
|
@@ -638,38 +1186,74 @@ def stitch_panorama(image_paths, output_dir="output"):
|
|
| 638 |
masks = []
|
| 639 |
for i in range(n_images):
|
| 640 |
print(f" Warping image {i}...")
|
| 641 |
-
warped, mask = warp_image(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
warped_u8 = np.clip(warped, 0, 255).astype(np.uint8)
|
| 643 |
warped_images.append(warped_u8)
|
|
|
|
|
|
|
| 644 |
mask_u8 = (mask > 0.5).astype(np.uint8) * 255
|
| 645 |
masks.append(mask_u8)
|
|
|
|
|
|
|
| 646 |
del warped, mask
|
| 647 |
-
gc.collect()
|
| 648 |
|
|
|
|
| 649 |
# 7. Exposure compensation
|
|
|
|
|
|
|
|
|
|
| 650 |
print("\n[6/8] Compensating exposure differences...")
|
| 651 |
for i in range(n_images):
|
| 652 |
if i == center_idx:
|
| 653 |
continue
|
|
|
|
|
|
|
| 654 |
target_f = warped_images[center_idx].astype(np.float64)
|
| 655 |
source_f = warped_images[i].astype(np.float64)
|
|
|
|
|
|
|
| 656 |
mask_t = masks[center_idx] / 255.0
|
| 657 |
mask_s = masks[i] / 255.0
|
| 658 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
warped_images[i] = np.clip(compensated, 0, 255).astype(np.uint8)
|
|
|
|
|
|
|
| 660 |
del target_f, source_f, compensated
|
| 661 |
gc.collect()
|
| 662 |
|
| 663 |
-
#
|
| 664 |
-
|
|
|
|
|
|
|
| 665 |
naive = naive_stitch(warped_images, masks)
|
| 666 |
naive_clipped = np.clip(naive, 0, 255).astype(np.uint8)
|
| 667 |
cv2.imwrite(os.path.join(output_dir, "04_naive_stitch.png"), naive_clipped)
|
| 668 |
|
| 669 |
-
#
|
| 670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
blended = warped_images[center_idx].copy()
|
| 672 |
blended_mask = masks[center_idx].copy()
|
|
|
|
|
|
|
| 673 |
blend_order = []
|
| 674 |
for d in range(1, n_images):
|
| 675 |
if center_idx - d >= 0:
|
|
@@ -679,23 +1263,38 @@ def stitch_panorama(image_paths, output_dir="output"):
|
|
| 679 |
|
| 680 |
for idx in blend_order:
|
| 681 |
print(f" Blending image {idx} into panorama...")
|
|
|
|
|
|
|
| 682 |
if blended.dtype != np.uint8:
|
| 683 |
blended = np.clip(blended, 0, 255).astype(np.uint8)
|
|
|
|
| 684 |
curr_img = np.clip(warped_images[idx], 0, 255).astype(np.uint8)
|
| 685 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
blended_mask = np.maximum(blended_mask, masks[idx])
|
| 687 |
|
| 688 |
blended_clipped = np.clip(blended, 0, 255).astype(np.uint8)
|
| 689 |
|
| 690 |
-
#
|
|
|
|
|
|
|
| 691 |
print("\n Cropping to clean rectangle...")
|
| 692 |
final_panorama = crop_to_content(blended_clipped, blended_mask)
|
| 693 |
naive_cropped = crop_to_content(naive_clipped, blended_mask)
|
| 694 |
|
|
|
|
| 695 |
cv2.imwrite(os.path.join(output_dir, "05_blended_panorama.png"), final_panorama)
|
| 696 |
cv2.imwrite(os.path.join(output_dir, "04_naive_stitch_cropped.png"), naive_cropped)
|
| 697 |
|
| 698 |
-
#
|
|
|
|
|
|
|
| 699 |
print("\n Generating comparison figure...")
|
| 700 |
fig, axes = plt.subplots(2, 1, figsize=(18, 10))
|
| 701 |
axes[0].imshow(cv2.cvtColor(naive_cropped, cv2.COLOR_BGR2RGB))
|
|
@@ -709,15 +1308,35 @@ def stitch_panorama(image_paths, output_dir="output"):
|
|
| 709 |
plt.close()
|
| 710 |
|
| 711 |
print("\n" + "=" * 70)
|
| 712 |
-
print("Done
|
| 713 |
print("=" * 70)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
|
| 715 |
return final_panorama
|
| 716 |
|
| 717 |
|
|
|
|
|
|
|
| 718 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 719 |
if len(sys.argv) < 4:
|
| 720 |
-
print("Usage: python
|
|
|
|
| 721 |
sys.exit(1)
|
|
|
|
| 722 |
image_paths = sys.argv[1:]
|
| 723 |
-
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
PANORAMIC IMAGE STITCHING - FROM SCRATCH IMPLEMENTATION
|
| 3 |
|
| 4 |
Permitted library usage
|
| 5 |
- OpenCV: Image I/O, SIFT detection, keypoint matching
|
|
|
|
| 12 |
- Perspective warping (forward + inverse mapping)
|
| 13 |
- Multi-band blending (Laplacian pyramid) & feathered blending
|
| 14 |
- Intensity normalization and final cleanup
|
| 15 |
+
By: Priyadip Sau(M25CSA023)
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
"""
|
| 20 |
+
Usage Instructions:
|
| 21 |
+
- Make sure the following files are in the same directory:
|
| 22 |
+
- panorama_stitch_memsafe.py
|
| 23 |
+
- left.jpg
|
| 24 |
+
- center.jpg
|
| 25 |
+
- right.jpg
|
| 26 |
+
|
| 27 |
+
- Run the following command from that directory:
|
| 28 |
+
- "python panorama_stitch_memsafe.py left.jpg center.jpg right.jpg"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
The stitched panorama will be saved in the current directory inside the output/ folder as: final_panorama.png
|
| 32 |
+
|
| 33 |
|
| 34 |
"""
|
| 35 |
|
|
|
|
| 39 |
import os
|
| 40 |
import sys
|
| 41 |
import warnings
|
|
|
|
| 42 |
warnings.filterwarnings('ignore')
|
| 43 |
|
| 44 |
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
def load_images(image_paths):
|
| 50 |
+
"""
|
| 51 |
+
Load a sequence of images from disk. We expect them ordered
|
| 52 |
+
left-to-right (or the user can specify ordering). Each image
|
| 53 |
+
is read in BGR (OpenCV default) and also converted to RGB
|
| 54 |
+
for visualization, plus a grayscale copy for feature detection.
|
| 55 |
+
"""
|
| 56 |
images_bgr = []
|
| 57 |
images_gray = []
|
| 58 |
for p in image_paths:
|
|
|
|
| 65 |
return images_bgr, images_gray
|
| 66 |
|
| 67 |
|
| 68 |
+
def display_images(images_bgr, title="Input Images"):
|
| 69 |
+
"""Quick side-by-side display of the loaded images."""
|
| 70 |
n = len(images_bgr)
|
| 71 |
fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
|
| 72 |
if n == 1:
|
|
|
|
| 77 |
axes[i].axis('off')
|
| 78 |
fig.suptitle(title, fontsize=14)
|
| 79 |
plt.tight_layout()
|
| 80 |
+
plt.savefig("01_input_images.png", dpi=150, bbox_inches='tight')
|
| 81 |
plt.close()
|
| 82 |
|
| 83 |
|
| 84 |
|
| 85 |
+
# STEP 2: SIFT FEATURE EXTRACTION (OpenCV built-in permitted)
|
| 86 |
|
| 87 |
|
| 88 |
def extract_sift_features(images_gray):
|
| 89 |
+
"""
|
| 90 |
+
Detect SIFT keypoints and compute descriptors for each grayscale image.
|
| 91 |
+
|
| 92 |
+
SIFT (Scale-Invariant Feature Transform) identifies interest points that
|
| 93 |
+
are invariant to scale and rotation. Each keypoint receives a 128-dim
|
| 94 |
+
descriptor encoding the local gradient histogram around it.
|
| 95 |
+
|
| 96 |
+
Returns:
|
| 97 |
+
keypoints_list: list of keypoint arrays per image
|
| 98 |
+
descriptors_list: list of descriptor matrices (N x 128) per image
|
| 99 |
+
"""
|
| 100 |
+
sift = cv2.SIFT_create(nfeatures=10000) # generous upper bound
|
| 101 |
+
|
| 102 |
keypoints_list = []
|
| 103 |
descriptors_list = []
|
| 104 |
+
|
| 105 |
for idx, gray in enumerate(images_gray):
|
| 106 |
kp, des = sift.detectAndCompute(gray, None)
|
| 107 |
keypoints_list.append(kp)
|
| 108 |
descriptors_list.append(des)
|
| 109 |
print(f" Image {idx}: detected {len(kp)} SIFT keypoints")
|
| 110 |
+
|
| 111 |
return keypoints_list, descriptors_list
|
| 112 |
|
| 113 |
|
| 114 |
+
def visualize_keypoints(images_bgr, keypoints_list):
|
| 115 |
+
"""Draw detected keypoints on each image """
|
| 116 |
n = len(images_bgr)
|
| 117 |
fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
|
| 118 |
if n == 1:
|
|
|
|
| 127 |
axes[i].axis('off')
|
| 128 |
fig.suptitle("SIFT Keypoints", fontsize=14)
|
| 129 |
plt.tight_layout()
|
| 130 |
+
plt.savefig("02_sift_keypoints.png", dpi=150, bbox_inches='tight')
|
| 131 |
plt.close()
|
| 132 |
|
| 133 |
|
| 134 |
|
| 135 |
+
# STEP 3: FEATURE MATCHING (OpenCV BFMatcher permitted)
|
| 136 |
|
| 137 |
|
| 138 |
def match_features(des1, des2, ratio_thresh=0.75):
|
| 139 |
+
"""
|
| 140 |
+
Match descriptors between two images using Brute-Force with
|
| 141 |
+
Lowe's ratio test.
|
| 142 |
+
|
| 143 |
+
For each descriptor in image 1, we find its two nearest neighbors
|
| 144 |
+
in image 2. If the distance to the closest match is significantly
|
| 145 |
+
smaller than to the second closest (ratio < threshold), we accept
|
| 146 |
+
the match. This filters out ambiguous correspondences.
|
| 147 |
+
|
| 148 |
+
Args:
|
| 149 |
+
des1, des2: descriptor matrices (N1x128, N2x128)
|
| 150 |
+
ratio_thresh: Lowe's ratio test threshold
|
| 151 |
+
|
| 152 |
+
Returns:
|
| 153 |
+
good_matches: list of cv2.DMatch objects passing the ratio test
|
| 154 |
+
"""
|
| 155 |
bf = cv2.BFMatcher(cv2.NORM_L2)
|
| 156 |
raw_matches = bf.knnMatch(des1, des2, k=2)
|
| 157 |
+
|
| 158 |
good_matches = []
|
| 159 |
for m, n in raw_matches:
|
| 160 |
if m.distance < ratio_thresh * n.distance:
|
| 161 |
good_matches.append(m)
|
| 162 |
+
|
| 163 |
print(f" Ratio test: {len(raw_matches)} raw -> {len(good_matches)} good matches")
|
| 164 |
return good_matches
|
| 165 |
|
| 166 |
|
| 167 |
def get_matched_points(kp1, kp2, matches):
|
| 168 |
+
"""
|
| 169 |
+
Extract the (x, y) coordinates of matched keypoints into
|
| 170 |
+
two corresponding Nx2 arrays.
|
| 171 |
+
"""
|
| 172 |
pts1 = np.float64([kp1[m.queryIdx].pt for m in matches])
|
| 173 |
pts2 = np.float64([kp2[m.trainIdx].pt for m in matches])
|
| 174 |
return pts1, pts2
|
| 175 |
|
| 176 |
|
| 177 |
+
def visualize_matches(img1, kp1, img2, kp2, matches, title="Feature Matches", fname="matches.png"):
|
| 178 |
+
"""Draw matches between two images side by side."""
|
| 179 |
vis = cv2.drawMatches(
|
| 180 |
img1, kp1, img2, kp2, matches[:80], None,
|
| 181 |
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
|
|
|
|
| 184 |
plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
|
| 185 |
plt.title(title)
|
| 186 |
plt.axis('off')
|
| 187 |
+
plt.savefig(fname, dpi=150, bbox_inches='tight')
|
| 188 |
plt.close()
|
| 189 |
|
| 190 |
|
| 191 |
|
| 192 |
+
# STEP 4: HOMOGRAPHY ESTIMATION - DIRECT LINEAR TRANSFORM (DLT) - MANUAL IMPLEMENTATION
|
| 193 |
|
| 194 |
|
| 195 |
def normalize_points(pts):
|
| 196 |
+
"""
|
| 197 |
+
Hartley normalization: translate points so centroid is at origin,
|
| 198 |
+
then scale so average distance from origin is sqrt(2).
|
| 199 |
+
|
| 200 |
+
This conditioning dramatically improves the numerical stability
|
| 201 |
+
of the DLT. Without it, the system matrix can be very ill-conditioned
|
| 202 |
+
because pixel coordinates are O(1000) while homogeneous coordinates
|
| 203 |
+
mix 1s with 1000s.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
pts: Nx2 array of 2D points
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
pts_norm: Nx2 normalized points
|
| 210 |
+
T: 3x3 normalization matrix such that pts_norm = T @ pts_homogeneous
|
| 211 |
+
"""
|
| 212 |
centroid = np.mean(pts, axis=0)
|
| 213 |
shifted = pts - centroid
|
| 214 |
avg_dist = np.mean(np.sqrt(np.sum(shifted ** 2, axis=1)))
|
| 215 |
if avg_dist < 1e-10:
|
| 216 |
avg_dist = 1e-10
|
| 217 |
scale = np.sqrt(2.0) / avg_dist
|
| 218 |
+
|
| 219 |
T = np.array([
|
| 220 |
[scale, 0, -scale * centroid[0]],
|
| 221 |
[0, scale, -scale * centroid[1]],
|
| 222 |
[0, 0, 1 ]
|
| 223 |
])
|
| 224 |
+
|
| 225 |
+
pts_h = np.column_stack([pts, np.ones(len(pts))]) # Nx3
|
| 226 |
+
pts_norm_h = (T @ pts_h.T).T # Nx3
|
| 227 |
pts_norm = pts_norm_h[:, :2] / pts_norm_h[:, 2:3]
|
| 228 |
+
|
| 229 |
return pts_norm, T
|
| 230 |
|
| 231 |
|
| 232 |
def compute_homography_dlt(src_pts, dst_pts):
|
| 233 |
+
"""
|
| 234 |
+
Compute the 3x3 homography H that maps src_pts -> dst_pts
|
| 235 |
+
using the Direct Linear Transform (DLT) algorithm.
|
| 236 |
+
|
| 237 |
+
Given a correspondence (x, y) <-> (x', y'), the projective
|
| 238 |
+
relation is:
|
| 239 |
+
[x'] [x]
|
| 240 |
+
[y'] ~ H * [y]
|
| 241 |
+
[1 ] [1]
|
| 242 |
+
|
| 243 |
+
Expanding and eliminating the scale factor, each correspondence
|
| 244 |
+
yields two linear equations in the 9 entries of H. Stacking N>=4
|
| 245 |
+
correspondences produces the system A h = 0, where h = vec(H).
|
| 246 |
+
The solution is the right singular vector of A corresponding to
|
| 247 |
+
the smallest singular value (i.e., the null space of A).
|
| 248 |
+
|
| 249 |
+
We apply Hartley normalization to both point sets before solving,
|
| 250 |
+
then de-normalize the result.
|
| 251 |
+
|
| 252 |
+
Args:
|
| 253 |
+
src_pts: Nx2 source points
|
| 254 |
+
dst_pts: Nx2 destination points (N >= 4)
|
| 255 |
+
|
| 256 |
+
Returns:
|
| 257 |
+
H: 3x3 homography matrix
|
| 258 |
+
"""
|
| 259 |
assert len(src_pts) >= 4, "Need at least 4 correspondences for homography"
|
| 260 |
+
|
| 261 |
+
# --- Hartley normalization ---
|
| 262 |
src_norm, T_src = normalize_points(src_pts)
|
| 263 |
dst_norm, T_dst = normalize_points(dst_pts)
|
| 264 |
+
|
| 265 |
N = len(src_norm)
|
| 266 |
A = np.zeros((2 * N, 9))
|
| 267 |
+
|
| 268 |
for i in range(N):
|
| 269 |
x, y = src_norm[i]
|
| 270 |
xp, yp = dst_norm[i]
|
| 271 |
+
|
| 272 |
+
# Row 2i: -x -y -1 0 0 0 xp*x xp*y xp
|
| 273 |
+
A[2 * i] = [
|
| 274 |
+
-x, -y, -1,
|
| 275 |
+
0, 0, 0,
|
| 276 |
+
xp * x, xp * y, xp
|
| 277 |
+
]
|
| 278 |
+
# Row 2i+1: 0 0 0 -x -y -1 yp*x yp*y yp
|
| 279 |
+
A[2 * i + 1] = [
|
| 280 |
+
0, 0, 0,
|
| 281 |
+
-x, -y, -1,
|
| 282 |
+
yp * x, yp * y, yp
|
| 283 |
+
]
|
| 284 |
+
|
| 285 |
+
# Solve via SVD: the solution h is the last row of Vt (smallest singular value)
|
| 286 |
_, S, Vt = np.linalg.svd(A)
|
| 287 |
h = Vt[-1]
|
| 288 |
H_norm = h.reshape(3, 3)
|
| 289 |
+
|
| 290 |
+
# --- De-normalize ---
|
| 291 |
+
# If T_dst * dst ~ H_norm * T_src * src, then dst ~ T_dst^{-1} H_norm T_src * src
|
| 292 |
H = np.linalg.inv(T_dst) @ H_norm @ T_src
|
| 293 |
+
|
| 294 |
+
# Normalize so that H[2,2] = 1 (conventional, avoids scale ambiguity)
|
| 295 |
if abs(H[2, 2]) > 1e-10:
|
| 296 |
H /= H[2, 2]
|
| 297 |
+
|
| 298 |
return H
|
| 299 |
|
| 300 |
|
| 301 |
|
| 302 |
+
# STEP 5: RANSAC β ROBUST ESTIMATION OF HOMOGRAPHY
|
| 303 |
|
| 304 |
|
| 305 |
def compute_homography_ransac(src_pts, dst_pts,
|
| 306 |
n_iterations=10000,
|
| 307 |
reprojection_thresh=3.0,
|
| 308 |
min_inliers_ratio=0.1):
|
| 309 |
+
"""
|
| 310 |
+
RANSAC wrapper around the DLT homography estimator.
|
| 311 |
+
|
| 312 |
+
RANSAC (Random Sample Consensus) iteratively:
|
| 313 |
+
1. Samples a minimal subset (4 correspondences for homography).
|
| 314 |
+
2. Fits a model (homography via DLT) to that subset.
|
| 315 |
+
3. Counts how many of the remaining correspondences agree
|
| 316 |
+
with the model (inliers within reprojection_thresh pixels).
|
| 317 |
+
4. Keeps the model with the largest inlier set.
|
| 318 |
+
Finally, re-estimates H from ALL inliers of the best model.
|
| 319 |
+
|
| 320 |
+
This makes the estimation robust to outliers produced by
|
| 321 |
+
incorrect feature matches (which are common in practice).
|
| 322 |
+
|
| 323 |
+
Args:
|
| 324 |
+
src_pts, dst_pts: Nx2 matched point arrays
|
| 325 |
+
n_iterations: number of RANSAC trials
|
| 326 |
+
reprojection_thresh: max pixel error to count as inlier
|
| 327 |
+
min_inliers_ratio: minimum fraction of inliers to accept
|
| 328 |
+
|
| 329 |
+
Returns:
|
| 330 |
+
best_H: 3x3 homography (refined on all inliers)
|
| 331 |
+
inlier_mask: boolean array, True for inlier correspondences
|
| 332 |
+
"""
|
| 333 |
N = len(src_pts)
|
| 334 |
if N < 4:
|
| 335 |
raise ValueError(f"Need >=4 matches for homography, got {N}")
|
| 336 |
+
|
| 337 |
best_inlier_count = 0
|
| 338 |
best_inlier_mask = None
|
| 339 |
best_H = None
|
| 340 |
+
|
| 341 |
+
# Pre-compute homogeneous source points for reprojection
|
| 342 |
+
src_h = np.column_stack([src_pts, np.ones(N)]) # Nx3
|
| 343 |
|
| 344 |
for iteration in range(n_iterations):
|
| 345 |
+
# 1) Random minimal sample of 4 points
|
| 346 |
indices = np.random.choice(N, 4, replace=False)
|
| 347 |
s = src_pts[indices]
|
| 348 |
d = dst_pts[indices]
|
| 349 |
+
|
| 350 |
+
# Skip degenerate configurations (collinear points)
|
| 351 |
+
# A quick check: if 3 of 4 source points are nearly collinear, skip
|
| 352 |
try:
|
| 353 |
H_candidate = compute_homography_dlt(s, d)
|
| 354 |
except np.linalg.LinAlgError:
|
| 355 |
continue
|
| 356 |
+
|
| 357 |
if H_candidate is None or np.any(np.isnan(H_candidate)):
|
| 358 |
continue
|
| 359 |
+
|
| 360 |
+
# 2) Reproject ALL source points using H_candidate
|
| 361 |
+
projected_h = (H_candidate @ src_h.T).T # Nx3
|
| 362 |
+
# Avoid division by zero
|
| 363 |
w = projected_h[:, 2:3]
|
| 364 |
w[np.abs(w) < 1e-10] = 1e-10
|
| 365 |
projected = projected_h[:, :2] / w
|
| 366 |
+
|
| 367 |
+
# 3) Compute reprojection error
|
| 368 |
errors = np.sqrt(np.sum((projected - dst_pts) ** 2, axis=1))
|
| 369 |
inlier_mask = errors < reprojection_thresh
|
| 370 |
inlier_count = np.sum(inlier_mask)
|
| 371 |
+
|
| 372 |
+
# 4) Update best model
|
| 373 |
if inlier_count > best_inlier_count:
|
| 374 |
best_inlier_count = inlier_count
|
| 375 |
best_inlier_mask = inlier_mask.copy()
|
| 376 |
best_H = H_candidate.copy()
|
| 377 |
+
|
| 378 |
+
# Early termination: if we have a really good model, stop
|
| 379 |
if inlier_count > 0.9 * N:
|
| 380 |
break
|
| 381 |
|
| 382 |
+
# Validate that we found enough inliers
|
| 383 |
if best_inlier_count < min_inliers_ratio * N:
|
| 384 |
raise RuntimeError(
|
| 385 |
f"RANSAC failed: only {best_inlier_count}/{N} inliers "
|
| 386 |
f"({100*best_inlier_count/N:.1f}%), threshold was {min_inliers_ratio*100:.0f}%"
|
| 387 |
)
|
| 388 |
+
|
| 389 |
+
# 5) Refine H using ALL inliers
|
| 390 |
best_H = compute_homography_dlt(
|
| 391 |
src_pts[best_inlier_mask],
|
| 392 |
dst_pts[best_inlier_mask]
|
| 393 |
)
|
| 394 |
+
|
| 395 |
print(f" RANSAC: {best_inlier_count}/{N} inliers "
|
| 396 |
f"({100*best_inlier_count/N:.1f}%) over {min(iteration+1, n_iterations)} iterations")
|
|
|
|
| 397 |
|
| 398 |
+
return best_H, best_inlier_mask
|
| 399 |
|
|
|
|
| 400 |
|
| 401 |
+
# =====================================================================================
|
| 402 |
+
# STEP 6: IMAGE WARPING - inverse mapping with bilinear interp - MANUAL IMPLEMENTATION
|
| 403 |
+
# =====================================================================================
|
| 404 |
|
| 405 |
def apply_homography_to_point(H, pt):
|
| 406 |
+
"""Apply homography H to a single 2D point, returning the mapped 2D point."""
|
| 407 |
p = np.array([pt[0], pt[1], 1.0])
|
| 408 |
q = H @ p
|
| 409 |
if abs(q[2]) < 1e-10:
|
|
|
|
| 412 |
|
| 413 |
|
| 414 |
def compute_warped_bounds(H, h, w):
|
| 415 |
+
"""
|
| 416 |
+
Determine the bounding box of an image of size (h, w) after
|
| 417 |
+
warping by homography H by mapping its four corners.
|
| 418 |
+
|
| 419 |
+
Returns (x_min, y_min, x_max, y_max) in the destination coordinate frame.
|
| 420 |
+
"""
|
| 421 |
+
corners = np.array([
|
| 422 |
+
[0, 0],
|
| 423 |
+
[w - 1, 0],
|
| 424 |
+
[w - 1, h - 1],
|
| 425 |
+
[0, h - 1]
|
| 426 |
+
], dtype=np.float64)
|
| 427 |
+
|
| 428 |
warped_corners = []
|
| 429 |
for c in corners:
|
| 430 |
wx, wy = apply_homography_to_point(H, c)
|
| 431 |
warped_corners.append([wx, wy])
|
| 432 |
warped_corners = np.array(warped_corners)
|
| 433 |
+
|
| 434 |
x_min = np.floor(warped_corners[:, 0].min()).astype(int)
|
| 435 |
x_max = np.ceil(warped_corners[:, 0].max()).astype(int)
|
| 436 |
y_min = np.floor(warped_corners[:, 1].min()).astype(int)
|
| 437 |
y_max = np.ceil(warped_corners[:, 1].max()).astype(int)
|
| 438 |
+
|
| 439 |
return x_min, y_min, x_max, y_max
|
| 440 |
|
| 441 |
|
| 442 |
+
def bilinear_interpolate(img, x, y):
|
| 443 |
+
"""
|
| 444 |
+
Bilinear interpolation at sub-pixel location (x, y) on a BGR image.
|
| 445 |
+
|
| 446 |
+
Instead of simply rounding to the nearest pixel (which causes
|
| 447 |
+
aliasing), we compute a weighted average of the four surrounding
|
| 448 |
+
pixels. The weights are proportional to the area of the opposite
|
| 449 |
+
rectangle in the unit cell.
|
| 450 |
+
|
| 451 |
+
Args:
|
| 452 |
+
img: HxWx3 image (uint8 or float)
|
| 453 |
+
x, y: float coordinates (x = column, y = row)
|
| 454 |
+
|
| 455 |
+
Returns:
|
| 456 |
+
Interpolated pixel value (3-element array), or zeros if out of bounds.
|
| 457 |
+
"""
|
| 458 |
+
h, w = img.shape[:2]
|
| 459 |
+
channels = img.shape[2] if img.ndim == 3 else 1
|
| 460 |
+
|
| 461 |
+
x0 = int(np.floor(x))
|
| 462 |
+
y0 = int(np.floor(y))
|
| 463 |
+
x1 = x0 + 1
|
| 464 |
+
y1 = y0 + 1
|
| 465 |
+
|
| 466 |
+
# Boundary check
|
| 467 |
+
if x0 < 0 or y0 < 0 or x1 >= w or y1 >= h:
|
| 468 |
+
return np.zeros(channels, dtype=np.float64)
|
| 469 |
+
|
| 470 |
+
# Fractional parts
|
| 471 |
+
dx = x - x0
|
| 472 |
+
dy = y - y0
|
| 473 |
+
|
| 474 |
+
# Four neighbor values
|
| 475 |
+
val = (
|
| 476 |
+
(1 - dx) * (1 - dy) * img[y0, x0].astype(np.float64) +
|
| 477 |
+
dx * (1 - dy) * img[y0, x1].astype(np.float64) +
|
| 478 |
+
(1 - dx) * dy * img[y1, x0].astype(np.float64) +
|
| 479 |
+
dx * dy * img[y1, x1].astype(np.float64)
|
| 480 |
+
)
|
| 481 |
+
return val
|
| 482 |
+
|
| 483 |
+
|
| 484 |
def warp_image(img, H, output_size, offset):
|
| 485 |
+
"""
|
| 486 |
+
Memory-Safe Warping: Processes the image in chunks (scanlines)
|
| 487 |
+
to avoid crashing RAM on large panoramas.
|
| 488 |
+
"""
|
| 489 |
out_h, out_w = output_size
|
| 490 |
H_inv = np.linalg.inv(H)
|
| 491 |
off_x, off_y = offset
|
| 492 |
+
|
| 493 |
+
# 1. Allocate the final destination arrays
|
| 494 |
+
# If this line specifically fails, you simply don't have enough RAM
|
| 495 |
+
# to hold the result, and we'd need to use 'np.memmap' (disk-based array).
|
| 496 |
try:
|
| 497 |
warped = np.zeros((out_h, out_w, 3), dtype=np.float64)
|
| 498 |
mask = np.zeros((out_h, out_w), dtype=np.float64)
|
| 499 |
except MemoryError:
|
| 500 |
print("\n[!] Critical: System RAM cannot hold the final image.")
|
| 501 |
+
print(" Switching to disk-based memory mapping (slower, but works).")
|
| 502 |
warped = np.memmap('temp_warped.dat', dtype='float64', mode='w+', shape=(out_h, out_w, 3))
|
| 503 |
mask = np.memmap('temp_mask.dat', dtype='float64', mode='w+', shape=(out_h, out_w))
|
| 504 |
|
| 505 |
h_src, w_src = img.shape[:2]
|
| 506 |
+
|
| 507 |
+
# 2. Process in Chunks (e.g., 500 rows at a time)
|
| 508 |
+
# This ensures intermediate math arrays never explode RAM.
|
| 509 |
+
CHUNK_SIZE = 500
|
| 510 |
+
|
| 511 |
print(f" -> Processing {out_h} rows in chunks of {CHUNK_SIZE}...")
|
| 512 |
|
| 513 |
for y_start in range(0, out_h, CHUNK_SIZE):
|
| 514 |
y_end = min(y_start + CHUNK_SIZE, out_h)
|
| 515 |
chunk_h = y_end - y_start
|
| 516 |
+
|
| 517 |
+
# --- Generate Grid for THIS CHUNK ONLY ---
|
| 518 |
+
# Create grid of output pixel coordinates for the current strip
|
| 519 |
ys, xs = np.mgrid[y_start:y_end, 0:out_w]
|
| 520 |
dest_x = xs + off_x
|
| 521 |
dest_y = ys + off_y
|
| 522 |
+
|
| 523 |
+
# Flatten for matrix multiplication
|
| 524 |
ones = np.ones(chunk_h * out_w, dtype=np.float64)
|
| 525 |
flat_x = dest_x.reshape(-1)
|
| 526 |
flat_y = dest_y.reshape(-1)
|
| 527 |
+
|
| 528 |
+
# Stack: (N, 3) -> [x, y, 1]
|
| 529 |
+
dest_coords = np.vstack([flat_x, flat_y, ones]) # 3 x N
|
| 530 |
+
|
| 531 |
+
# --- Apply Inverse Homography ---
|
| 532 |
+
# src = H_inv @ dest
|
| 533 |
src_coords = H_inv @ dest_coords
|
| 534 |
+
|
| 535 |
+
# Perspective divide
|
| 536 |
w_vals = src_coords[2, :]
|
| 537 |
w_vals[np.abs(w_vals) < 1e-10] = 1e-10
|
| 538 |
+
src_x = src_coords[0, :] / w_vals
|
| 539 |
+
src_y = src_coords[1, :] / w_vals
|
| 540 |
+
|
| 541 |
+
# Reshape back to chunk shape
|
| 542 |
+
src_x = src_x.reshape(chunk_h, out_w)
|
| 543 |
+
src_y = src_y.reshape(chunk_h, out_w)
|
| 544 |
+
|
| 545 |
+
# --- Vectorized Bilinear Interpolation (Chunk Scope) ---
|
| 546 |
+
|
| 547 |
+
# 1. Determine valid pixels
|
| 548 |
+
valid = (src_x >= 0) & (src_x < w_src - 1) & \
|
| 549 |
+
(src_y >= 0) & (src_y < h_src - 1)
|
| 550 |
+
|
| 551 |
+
# If no valid pixels in this chunk, skip math
|
| 552 |
if not np.any(valid):
|
| 553 |
continue
|
| 554 |
+
|
| 555 |
+
# 2. Get integer and fractional parts
|
| 556 |
+
# We only compute for 'valid' pixels to save more speed/memory,
|
| 557 |
+
# but masking arrays is cleaner in numpy.
|
| 558 |
x0 = np.floor(src_x).astype(int)
|
| 559 |
y0 = np.floor(src_y).astype(int)
|
| 560 |
x1 = x0 + 1
|
| 561 |
y1 = y0 + 1
|
| 562 |
+
|
| 563 |
dx = src_x - x0
|
| 564 |
dy = src_y - y0
|
| 565 |
+
|
| 566 |
+
# Clamp to bounds to prevent index errors on edges
|
| 567 |
x0 = np.clip(x0, 0, w_src - 2)
|
| 568 |
x1 = np.clip(x1, 0, w_src - 1)
|
| 569 |
y0 = np.clip(y0, 0, h_src - 2)
|
| 570 |
y1 = np.clip(y1, 0, h_src - 1)
|
| 571 |
+
|
| 572 |
+
# 3. Fetch neighbors
|
| 573 |
Ia = img[y0, x0].astype(np.float64)
|
| 574 |
Ib = img[y0, x1].astype(np.float64)
|
| 575 |
Ic = img[y1, x0].astype(np.float64)
|
| 576 |
Id = img[y1, x1].astype(np.float64)
|
| 577 |
+
|
| 578 |
+
# 4. Compute weights (re-expand dimensions for broadcasting)
|
| 579 |
wa = ((1 - dx) * (1 - dy))[:, :, None]
|
| 580 |
wb = (dx * (1 - dy))[:, :, None]
|
| 581 |
wc = ((1 - dx) * dy)[:, :, None]
|
| 582 |
wd = (dx * dy)[:, :, None]
|
| 583 |
+
|
| 584 |
+
# 5. Interpolate
|
| 585 |
+
# This line was crashing before. Now it's 1/100th the size.
|
| 586 |
chunk_warped = wa * Ia + wb * Ib + wc * Ic + wd * Id
|
| 587 |
+
|
| 588 |
+
# 6. Assign to final array
|
| 589 |
+
# We apply the validity mask here
|
| 590 |
+
# Expand valid to 3 channels for image, keep 1 channel for mask
|
| 591 |
valid_3ch = valid[:, :, None]
|
| 592 |
+
|
| 593 |
+
# Copy strictly the valid region into the main array
|
| 594 |
+
# (Using 'np.where' is safer than boolean indexing for assignment)
|
| 595 |
current_slice_img = warped[y_start:y_end, :]
|
| 596 |
current_slice_mask = mask[y_start:y_end, :]
|
| 597 |
+
|
| 598 |
np.copyto(current_slice_img, chunk_warped, where=valid_3ch)
|
| 599 |
np.copyto(current_slice_mask, 1.0, where=valid)
|
| 600 |
|
|
|
|
| 602 |
|
| 603 |
|
| 604 |
|
| 605 |
+
# STEP 7: BLENDING β MULTI-BAND LAPLACIAN PYRAMID BLENDING & FEATHERING
|
| 606 |
|
| 607 |
|
| 608 |
def gaussian_blur_manual(img, kernel_size=5, sigma=1.0):
|
| 609 |
+
"""
|
| 610 |
+
Apply Gaussian blur. We build the kernel manually, but use
|
| 611 |
+
cv2.filter2D for the actual convolution (which is a basic
|
| 612 |
+
matrix operation, not a high-level stitching API).
|
| 613 |
+
"""
|
| 614 |
ax = np.linspace(-(kernel_size // 2), kernel_size // 2, kernel_size)
|
| 615 |
xx, yy = np.meshgrid(ax, ax)
|
| 616 |
kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
|
|
|
|
| 619 |
|
| 620 |
|
| 621 |
def build_gaussian_pyramid(img, levels):
|
| 622 |
+
"""Construct a Gaussian pyramid by repeated blur + downsample."""
|
| 623 |
pyramid = [img.astype(np.float64)]
|
| 624 |
current = img.astype(np.float64)
|
| 625 |
for _ in range(levels - 1):
|
| 626 |
blurred = gaussian_blur_manual(current, kernel_size=5, sigma=1.0)
|
| 627 |
+
# Downsample by factor of 2
|
| 628 |
downsampled = blurred[::2, ::2]
|
| 629 |
pyramid.append(downsampled)
|
| 630 |
current = downsampled
|
|
|
|
| 632 |
|
| 633 |
|
| 634 |
def upsample(img, target_shape):
|
| 635 |
+
"""Upsample an image to target_shape using simple pixel duplication + blur."""
|
| 636 |
h, w = target_shape[:2]
|
| 637 |
+
# Use numpy repeat for upsampling (basic array operation)
|
| 638 |
up_h = np.repeat(img, 2, axis=0)[:h]
|
| 639 |
up_w = np.repeat(up_h, 2, axis=1)[:, :w]
|
| 640 |
+
# Smooth to reduce blockiness
|
| 641 |
result = gaussian_blur_manual(up_w, kernel_size=5, sigma=1.0)
|
| 642 |
return result
|
| 643 |
|
| 644 |
|
| 645 |
def build_laplacian_pyramid(img, levels):
|
| 646 |
+
"""
|
| 647 |
+
Construct a Laplacian pyramid. Each level is the difference
|
| 648 |
+
between consecutive Gaussian pyramid levels (after upsampling
|
| 649 |
+
the coarser level). The final level is the coarsest Gaussian.
|
| 650 |
+
|
| 651 |
+
The Laplacian pyramid decomposes the image into frequency bands:
|
| 652 |
+
fine details at the top, coarse structure at the bottom. This
|
| 653 |
+
decomposition is the key to multi-band blendingβby blending
|
| 654 |
+
each frequency band independently with different-width masks,
|
| 655 |
+
we avoid visible seams while preserving local contrast.
|
| 656 |
+
"""
|
| 657 |
gauss_pyr = build_gaussian_pyramid(img, levels)
|
| 658 |
lap_pyr = []
|
| 659 |
for i in range(levels - 1):
|
| 660 |
upsampled = upsample(gauss_pyr[i + 1], gauss_pyr[i].shape)
|
| 661 |
lap = gauss_pyr[i] - upsampled
|
| 662 |
lap_pyr.append(lap)
|
| 663 |
+
lap_pyr.append(gauss_pyr[-1]) # coarsest level
|
| 664 |
return lap_pyr
|
| 665 |
|
| 666 |
|
| 667 |
def multi_band_blend(img1, img2, mask1, mask2, levels=6):
|
| 668 |
+
"""
|
| 669 |
+
Multi-band blending using Laplacian pyramids.
|
| 670 |
+
|
| 671 |
+
The idea (Burt & Adelson, 1983): decompose both images into
|
| 672 |
+
Laplacian pyramids, build a Gaussian pyramid from the blending
|
| 673 |
+
mask, and combine corresponding levels. Reconstruct from the
|
| 674 |
+
blended Laplacian pyramid. High-frequency details blend with
|
| 675 |
+
a sharp transition (preserving sharpness), while low-frequency
|
| 676 |
+
content blends with a wide, smooth transition (avoiding seams).
|
| 677 |
+
|
| 678 |
+
Args:
|
| 679 |
+
img1, img2: float64 images on the same canvas
|
| 680 |
+
mask1, mask2: float64 masks (0/1) indicating valid pixels
|
| 681 |
+
levels: number of pyramid levels
|
| 682 |
+
|
| 683 |
+
Returns:
|
| 684 |
+
blended: the multi-band blended result
|
| 685 |
+
"""
|
| 686 |
+
# Make sure images and masks have the same size
|
| 687 |
h, w = img1.shape[:2]
|
| 688 |
+
|
| 689 |
+
# Ensure 3-channel for consistency
|
| 690 |
if img1.ndim == 2:
|
| 691 |
img1 = img1[:, :, None]
|
| 692 |
if img2.ndim == 2:
|
| 693 |
img2 = img2[:, :, None]
|
| 694 |
+
|
| 695 |
+
# Create a smooth weight mask for blending
|
| 696 |
+
# Where both images overlap, create a gradient transition
|
| 697 |
overlap = (mask1 > 0.5) & (mask2 > 0.5)
|
| 698 |
+
only1 = (mask1 > 0.5) & (mask2 < 0.5)
|
| 699 |
+
only2 = (mask2 > 0.5) & (mask1 < 0.5)
|
| 700 |
+
|
| 701 |
+
# Build the blending weight: use distance transform for smooth transition
|
| 702 |
+
# For the overlap region, weight by relative distance to each image's boundary
|
| 703 |
weight1 = np.zeros((h, w), dtype=np.float64)
|
| 704 |
weight2 = np.zeros((h, w), dtype=np.float64)
|
| 705 |
+
|
| 706 |
if np.any(mask1 > 0.5):
|
| 707 |
+
# Distance from non-mask region
|
| 708 |
dist1 = cv2.distanceTransform((mask1 > 0.5).astype(np.uint8), cv2.DIST_L2, 5)
|
| 709 |
weight1 = dist1.astype(np.float64)
|
| 710 |
if np.any(mask2 > 0.5):
|
| 711 |
dist2 = cv2.distanceTransform((mask2 > 0.5).astype(np.uint8), cv2.DIST_L2, 5)
|
| 712 |
weight2 = dist2.astype(np.float64)
|
| 713 |
+
|
| 714 |
+
# Normalize to get a smooth alpha between 0 and 1
|
| 715 |
total = weight1 + weight2
|
| 716 |
total[total < 1e-10] = 1e-10
|
| 717 |
+
alpha = weight1 / total # alpha=1 => use img1, alpha=0 => use img2
|
| 718 |
+
|
| 719 |
+
# Expand alpha to 3 channels
|
| 720 |
alpha_3 = np.stack([alpha] * 3, axis=-1)
|
| 721 |
+
|
| 722 |
+
# Build Laplacian pyramids for both images
|
| 723 |
lap1 = build_laplacian_pyramid(img1, levels)
|
| 724 |
lap2 = build_laplacian_pyramid(img2, levels)
|
| 725 |
+
|
| 726 |
+
# Build Gaussian pyramid for the blending mask
|
| 727 |
mask_pyr = build_gaussian_pyramid(alpha_3, levels)
|
| 728 |
+
|
| 729 |
+
# Blend each level
|
| 730 |
blended_pyr = []
|
| 731 |
for l in range(levels):
|
| 732 |
blended_level = mask_pyr[l] * lap1[l] + (1.0 - mask_pyr[l]) * lap2[l]
|
| 733 |
blended_pyr.append(blended_level)
|
| 734 |
+
|
| 735 |
+
# Reconstruct from blended Laplacian pyramid
|
| 736 |
result = blended_pyr[-1]
|
| 737 |
for l in range(levels - 2, -1, -1):
|
| 738 |
upsampled = upsample(result, blended_pyr[l].shape)
|
| 739 |
result = upsampled + blended_pyr[l]
|
| 740 |
+
|
| 741 |
return result
|
| 742 |
|
| 743 |
|
| 744 |
def feathered_blend(img1, img2, mask1, mask2):
|
| 745 |
+
"""
|
| 746 |
+
Simpler feathered (distance-weighted) blending as a baseline
|
| 747 |
+
for the "naive vs final" comparison.
|
| 748 |
+
|
| 749 |
+
Each pixel in the overlap zone gets a weighted average,
|
| 750 |
+
with weights proportional to the distance from the boundary
|
| 751 |
+
of each image's valid region.
|
| 752 |
+
"""
|
| 753 |
h, w = img1.shape[:2]
|
| 754 |
weight1 = np.zeros((h, w), dtype=np.float64)
|
| 755 |
weight2 = np.zeros((h, w), dtype=np.float64)
|
| 756 |
+
|
| 757 |
if np.any(mask1 > 0.5):
|
| 758 |
+
weight1 = cv2.distanceTransform(
|
| 759 |
+
(mask1 > 0.5).astype(np.uint8), cv2.DIST_L2, 5
|
| 760 |
+
).astype(np.float64)
|
| 761 |
if np.any(mask2 > 0.5):
|
| 762 |
+
weight2 = cv2.distanceTransform(
|
| 763 |
+
(mask2 > 0.5).astype(np.uint8), cv2.DIST_L2, 5
|
| 764 |
+
).astype(np.float64)
|
| 765 |
+
|
| 766 |
total = weight1 + weight2
|
| 767 |
total[total < 1e-10] = 1e-10
|
| 768 |
+
|
| 769 |
alpha1 = (weight1 / total)[:, :, None]
|
| 770 |
alpha2 = (weight2 / total)[:, :, None]
|
| 771 |
+
|
| 772 |
blended = alpha1 * img1 + alpha2 * img2
|
| 773 |
return blended
|
| 774 |
|
| 775 |
|
| 776 |
|
| 777 |
+
# STEP 8: INTENSITY / EXPOSURE COMPENSATION
|
| 778 |
|
| 779 |
|
| 780 |
def compensate_exposure(img_target, img_source, mask_target, mask_source):
|
| 781 |
+
"""
|
| 782 |
+
Simple gain-based exposure compensation.
|
| 783 |
+
|
| 784 |
+
In the overlap region between two images, compute the per-channel
|
| 785 |
+
mean intensity ratio and apply it as a multiplicative gain to the
|
| 786 |
+
source image. This corrects for differences in auto-exposure
|
| 787 |
+
between the original photographs.
|
| 788 |
+
|
| 789 |
+
Args:
|
| 790 |
+
img_target: reference image (float64)
|
| 791 |
+
img_source: image to adjust
|
| 792 |
+
mask_target: valid-pixel mask for target
|
| 793 |
+
mask_source: valid-pixel mask for source
|
| 794 |
+
|
| 795 |
+
Returns:
|
| 796 |
+
compensated: exposure-corrected source image
|
| 797 |
+
"""
|
| 798 |
overlap = (mask_target > 0.5) & (mask_source > 0.5)
|
| 799 |
+
|
| 800 |
if np.sum(overlap) < 100:
|
| 801 |
+
# Not enough overlap for reliable estimation
|
| 802 |
return img_source.copy()
|
| 803 |
+
|
| 804 |
gains = []
|
| 805 |
for c in range(3):
|
| 806 |
mean_target = np.mean(img_target[:, :, c][overlap])
|
| 807 |
mean_source = np.mean(img_source[:, :, c][overlap])
|
| 808 |
if mean_source > 1.0:
|
| 809 |
gain = mean_target / mean_source
|
| 810 |
+
# Clamp to avoid extreme corrections
|
| 811 |
gain = np.clip(gain, 0.5, 2.0)
|
| 812 |
else:
|
| 813 |
gain = 1.0
|
| 814 |
gains.append(gain)
|
| 815 |
+
|
| 816 |
compensated = img_source.copy()
|
| 817 |
for c in range(3):
|
| 818 |
compensated[:, :, c] = compensated[:, :, c] * gains[c]
|
| 819 |
+
|
| 820 |
print(f" Exposure gains: R={gains[2]:.3f}, G={gains[1]:.3f}, B={gains[0]:.3f}")
|
| 821 |
return compensated
|
| 822 |
|
| 823 |
|
| 824 |
+
# STEP 9: NAIVE STITCH
|
| 825 |
|
| 826 |
|
| 827 |
def naive_stitch(warped_images, masks):
|
| 828 |
+
"""
|
| 829 |
+
A naive stitch simply overlays images in order, with no blending.
|
| 830 |
+
Later images overwrite earlier ones. This produces visible seams
|
| 831 |
+
at exposure boundaries and ghosting at misalignments.
|
| 832 |
+
"""
|
| 833 |
canvas = np.zeros_like(warped_images[0])
|
| 834 |
for img, mask in zip(warped_images, masks):
|
| 835 |
region = mask > 0.5
|
|
|
|
| 842 |
|
| 843 |
|
| 844 |
|
| 845 |
+
# STEP 10: FINAL CLEANUP β CROP & RECTANGLE
|
| 846 |
|
| 847 |
|
| 848 |
def crop_to_content(panorama, combined_mask):
|
| 849 |
+
"""
|
| 850 |
+
Manual 'Smart Crop' using NumPy Matrix Operations
|
| 851 |
+
|
| 852 |
+
1. Uses NumPy to find the bounding box of valid pixels .
|
| 853 |
+
2. Uses a 'Greedy Shrink' algorithm to find the largest clean rectangle.
|
| 854 |
+
3. Safety Check: Ensures we don't crop away actual image content.
|
| 855 |
+
"""
|
| 856 |
+
# 1. Create a binary mask using NumPy boolean operations
|
| 857 |
+
# (Strict manual thresholding)
|
| 858 |
if combined_mask.dtype == np.float64 or combined_mask.dtype == np.float32:
|
| 859 |
+
# Check for non-black pixels (allow tiny float error > 0.01)
|
| 860 |
valid_mask = (combined_mask > 0.01)
|
| 861 |
else:
|
| 862 |
valid_mask = (combined_mask > 1)
|
| 863 |
+
|
| 864 |
+
# 2. Manual Bounding Box (Using NumPy)
|
| 865 |
+
# Project mask onto axes to find where content starts/ends
|
| 866 |
rows = np.any(valid_mask, axis=1)
|
| 867 |
cols = np.any(valid_mask, axis=0)
|
| 868 |
+
|
| 869 |
+
# If image is empty, return original
|
| 870 |
if not np.any(rows) or not np.any(cols):
|
| 871 |
return panorama
|
| 872 |
+
|
| 873 |
y_min, y_max = np.where(rows)[0][[0, -1]]
|
| 874 |
x_min, x_max = np.where(cols)[0][[0, -1]]
|
| 875 |
+
|
| 876 |
+
# Cut out the black void
|
| 877 |
safe_crop = panorama[y_min:y_max+1, x_min:x_max+1]
|
| 878 |
safe_mask = valid_mask[y_min:y_max+1, x_min:x_max+1]
|
| 879 |
+
|
| 880 |
+
# Count actual valid pixels (content)
|
| 881 |
total_content = np.count_nonzero(safe_mask)
|
| 882 |
+
|
| 883 |
+
# 3. Greedy Shrink Loop
|
| 884 |
top, bottom = 0, safe_mask.shape[0] - 1
|
| 885 |
left, right = 0, safe_mask.shape[1] - 1
|
| 886 |
+
|
| 887 |
+
# Safety: We will NOT crop if we lose more than 20% of the actual pixels.(for this step, we can allow some cropping, but not if it cuts into the main image content)
|
| 888 |
+
# This prevents the code from returning a tiny square or crashing on slanted images.
|
| 889 |
+
min_content_threshold = total_content * 0.20 # We want to keep at least 20% of the content to avoid over-cropping.(for more wider range of images, we can adjust this threshold as needed)
|
| 890 |
+
|
| 891 |
+
# Limit iterations to prevent hanging
|
| 892 |
max_iters = min(safe_mask.shape[0], safe_mask.shape[1])
|
| 893 |
+
|
| 894 |
for _ in range(max_iters):
|
| 895 |
+
# Current dimensions
|
| 896 |
h = bottom - top
|
| 897 |
w = right - left
|
| 898 |
if h <= 0 or w <= 0: break
|
| 899 |
+
|
| 900 |
+
# Get the current ROI from the mask
|
| 901 |
roi = safe_mask[top:bottom+1, left:right+1]
|
| 902 |
+
|
| 903 |
+
# STOP if we are cutting into the main image too much
|
| 904 |
if np.count_nonzero(roi) < min_content_threshold:
|
| 905 |
print(" [Crop] Stopping to preserve image content.")
|
| 906 |
break
|
| 907 |
+
|
| 908 |
+
# Check the 4 edges using NumPy slicing
|
| 909 |
row_top = roi[0, :]
|
| 910 |
row_btm = roi[-1, :]
|
| 911 |
col_lft = roi[:, 0]
|
| 912 |
col_rgt = roi[:, -1]
|
| 913 |
+
|
| 914 |
+
# Count invalid (False) pixels on edges
|
| 915 |
+
bad_t = np.sum(~row_top) # '~' is logical NOT
|
| 916 |
bad_b = np.sum(~row_btm)
|
| 917 |
bad_l = np.sum(~col_lft)
|
| 918 |
bad_r = np.sum(~col_rgt)
|
| 919 |
+
|
| 920 |
+
# If edges are clean, we are done!
|
| 921 |
if (bad_t + bad_b + bad_l + bad_r) == 0:
|
| 922 |
print(f" [Crop] Found Clean Rect: {w}x{h} px")
|
| 923 |
return safe_crop[top:bottom+1, left:right+1]
|
| 924 |
+
|
| 925 |
+
# Greedy choice: Shrink the edge with the most black pixels
|
| 926 |
max_bad = max(bad_t, bad_b, bad_l, bad_r)
|
| 927 |
+
|
| 928 |
if max_bad == bad_t: top += 1
|
| 929 |
elif max_bad == bad_b: bottom -= 1
|
| 930 |
elif max_bad == bad_l: left += 1
|
| 931 |
elif max_bad == bad_r: right -= 1
|
| 932 |
+
|
| 933 |
+
# Return the best we found
|
| 934 |
return safe_crop[top:bottom+1, left:right+1]
|
| 935 |
|
| 936 |
+
# MAIN
|
| 937 |
+
|
| 938 |
|
| 939 |
def memory_safe_linear_blend(img1, img2, mask1, mask2):
|
| 940 |
+
"""
|
| 941 |
+
Disk-Based Linear Blending: Uses hard drive (memmap) to store weights.
|
| 942 |
+
This guarantees it will run, even if RAM is completely full.
|
| 943 |
+
"""
|
| 944 |
+
print(" -> RAM Full. Switching to Disk-Based Blending (Slow but Robust)...")
|
| 945 |
h, w = img1.shape[:2]
|
| 946 |
+
|
| 947 |
+
# 1. Calculate Weights on Disk
|
| 948 |
+
# We calculate w1, save to disk, delete from RAM. Then w2.
|
| 949 |
+
# This ensures we never hold two huge float arrays in RAM at once.
|
| 950 |
+
|
| 951 |
try:
|
| 952 |
+
# --- Weight 1 ---
|
| 953 |
print(" Calculating weight map 1...")
|
| 954 |
if np.any(mask1 > 0):
|
| 955 |
+
# mask1 is uint8 255, convert to binary 1
|
| 956 |
m1_bin = (mask1 > 127).astype(np.uint8)
|
| 957 |
w_ram = cv2.distanceTransform(m1_bin, cv2.DIST_L2, 5)
|
| 958 |
else:
|
| 959 |
w_ram = np.zeros((h, w), dtype=np.float32)
|
| 960 |
+
|
| 961 |
+
# Write to disk
|
| 962 |
fp1 = np.memmap('temp_w1.dat', dtype='float32', mode='w+', shape=(h, w))
|
| 963 |
fp1[:] = w_ram[:]
|
| 964 |
+
del w_ram, m1_bin # Free RAM
|
| 965 |
+
import gc; gc.collect()
|
| 966 |
|
| 967 |
+
# --- Weight 2 ---
|
| 968 |
print(" Calculating weight map 2...")
|
| 969 |
if np.any(mask2 > 0):
|
| 970 |
m2_bin = (mask2 > 127).astype(np.uint8)
|
| 971 |
w_ram = cv2.distanceTransform(m2_bin, cv2.DIST_L2, 5)
|
| 972 |
else:
|
| 973 |
w_ram = np.zeros((h, w), dtype=np.float32)
|
| 974 |
+
|
| 975 |
+
# Write to disk
|
| 976 |
fp2 = np.memmap('temp_w2.dat', dtype='float32', mode='w+', shape=(h, w))
|
| 977 |
fp2[:] = w_ram[:]
|
| 978 |
del w_ram, m2_bin
|
| 979 |
gc.collect()
|
| 980 |
+
|
| 981 |
+
# --- Chunked Blending ---
|
| 982 |
print(" Blending chunks...")
|
| 983 |
+
# Output array (try RAM, fallback to disk)
|
| 984 |
try:
|
| 985 |
result = np.zeros(img1.shape, dtype=np.uint8)
|
| 986 |
except MemoryError:
|
|
|
|
| 989 |
CHUNK = 2000
|
| 990 |
for y in range(0, h, CHUNK):
|
| 991 |
ye = min(y + CHUNK, h)
|
| 992 |
+
|
| 993 |
+
# Read weights from disk (small slice only)
|
| 994 |
w1_c = fp1[y:ye]
|
| 995 |
w2_c = fp2[y:ye]
|
| 996 |
+
|
| 997 |
total = w1_c + w2_c
|
| 998 |
total[total < 1e-5] = 1e-5
|
| 999 |
+
alpha = (w1_c / total)[:, :, None] # Expand to 3 channels
|
| 1000 |
+
|
| 1001 |
+
# Read images
|
| 1002 |
chunk1 = img1[y:ye].astype(np.float32)
|
| 1003 |
chunk2 = img2[y:ye].astype(np.float32)
|
| 1004 |
+
|
| 1005 |
+
# Blend
|
| 1006 |
blended = chunk1 * alpha + chunk2 * (1.0 - alpha)
|
| 1007 |
result[y:ye] = np.clip(blended, 0, 255).astype(np.uint8)
|
| 1008 |
|
| 1009 |
+
# Clean up temp files
|
| 1010 |
del fp1, fp2
|
| 1011 |
try:
|
| 1012 |
os.remove('temp_w1.dat')
|
| 1013 |
os.remove('temp_w2.dat')
|
| 1014 |
except: pass
|
| 1015 |
+
|
| 1016 |
return result
|
| 1017 |
+
|
| 1018 |
except Exception as e:
|
| 1019 |
print(f" [!] Error in disk blending: {e}")
|
| 1020 |
+
return img1 # Last resort return
|
|
|
|
| 1021 |
|
| 1022 |
def smart_blend_manager(img1, img2, mask1, mask2, levels=6):
|
| 1023 |
+
"""
|
| 1024 |
+
Smart Blending Manager:
|
| 1025 |
+
1. Checks if the image is too big for RAM.
|
| 1026 |
+
2. If too big, goes DIRECTLY to Disk-Based Linear Blending (Safe).
|
| 1027 |
+
3. If small, tries High-Quality Multi-Band Blending.
|
| 1028 |
+
"""
|
| 1029 |
+
# 1. Estimate Memory Requirement
|
| 1030 |
+
# A float64 image takes 8 bytes per pixel per channel.
|
| 1031 |
+
# Pyramids create roughly 1.33x copies.
|
| 1032 |
+
# We need 2 images + overhead.
|
| 1033 |
h, w = img1.shape[:2]
|
| 1034 |
pixels = h * w
|
| 1035 |
+
|
| 1036 |
+
# Rough math: (Pixels * 3 channels * 8 bytes) * 2 images * 2 (pyramid overhead)
|
| 1037 |
+
# This is a conservative estimate.
|
| 1038 |
+
est_mem_bytes = (pixels * 3 * 8) * 4
|
| 1039 |
est_mem_gb = est_mem_bytes / (1024**3)
|
| 1040 |
+
|
| 1041 |
print(f" [Memory Check] Image size: {w}x{h}")
|
| 1042 |
print(f" [Memory Check] Est. Multi-Band RAM needed: ~{est_mem_gb:.2f} GB")
|
| 1043 |
+
|
| 1044 |
+
# THRESHOLD: If it needs more than 2.0 GB, skip Multi-Band entirely.
|
| 1045 |
+
# This prevents the C++ OpenCV crash.
|
| 1046 |
+
SAFE_LIMIT_GB = 2.0
|
| 1047 |
+
|
| 1048 |
+
if est_mem_gb > SAFE_LIMIT_GB:
|
| 1049 |
print(f" [!] Image exceeds safe limit ({SAFE_LIMIT_GB} GB). Forcing Disk-Based Blending.")
|
| 1050 |
return memory_safe_linear_blend(img1, img2, mask1, mask2)
|
| 1051 |
+
|
| 1052 |
+
# 2. Try High-Quality Blend (only for small images)
|
| 1053 |
try:
|
| 1054 |
+
return multi_band_blend(img1.astype(np.float64), img2.astype(np.float64),
|
| 1055 |
mask1, mask2, levels)
|
| 1056 |
+
except (MemoryError, np.core._exceptions._ArrayMemoryError, cv2.error):
|
| 1057 |
+
# Catch both Python MemoryErrors and OpenCV C++ Errors
|
| 1058 |
print("\n [!] RAM Full or OpenCV Error! Switching to Disk-Based Blending...")
|
| 1059 |
+
import gc
|
| 1060 |
gc.collect()
|
| 1061 |
return memory_safe_linear_blend(img1, img2, mask1, mask2)
|
| 1062 |
|
| 1063 |
|
| 1064 |
|
|
|
|
|
|
|
|
|
|
| 1065 |
def stitch_panorama(image_paths, output_dir="output"):
|
| 1066 |
+
"""
|
| 1067 |
+
Full panoramic stitching pipeline.
|
| 1068 |
+
|
| 1069 |
+
Strategy: Use the CENTER image as the reference frame (identity
|
| 1070 |
+
homography). Compute homographies that map the left image -> center
|
| 1071 |
+
and right image -> center. For more than 3 images, chain
|
| 1072 |
+
homographies outward from center.
|
| 1073 |
+
|
| 1074 |
+
This center-projection approach minimizes the total amount of
|
| 1075 |
+
perspective distortion in the final panorama, since the center
|
| 1076 |
+
image undergoes no warping at all.
|
| 1077 |
+
"""
|
| 1078 |
os.makedirs(output_dir, exist_ok=True)
|
| 1079 |
|
| 1080 |
print("=" * 70)
|
| 1081 |
+
print("PANORAMIC IMAGE STITCHING ")
|
| 1082 |
print("=" * 70)
|
| 1083 |
|
| 1084 |
+
# ------------------------------------------------------------------
|
| 1085 |
# 1. Load images
|
| 1086 |
+
# ------------------------------------------------------------------
|
| 1087 |
print("\n[1/8] Loading images...")
|
| 1088 |
images_bgr, images_gray = load_images(image_paths)
|
| 1089 |
n_images = len(images_bgr)
|
| 1090 |
+
display_images(images_bgr)
|
| 1091 |
+
|
| 1092 |
+
# The center image is our reference
|
| 1093 |
center_idx = n_images // 2
|
| 1094 |
print(f" Reference (center) image: index {center_idx}")
|
| 1095 |
|
| 1096 |
+
# ------------------------------------------------------------------
|
| 1097 |
# 2. Extract SIFT features
|
| 1098 |
+
# ------------------------------------------------------------------
|
| 1099 |
print("\n[2/8] Extracting SIFT features...")
|
| 1100 |
keypoints, descriptors = extract_sift_features(images_gray)
|
| 1101 |
+
visualize_keypoints(images_bgr, keypoints)
|
| 1102 |
|
| 1103 |
+
# ------------------------------------------------------------------
|
| 1104 |
+
# 3-5. For each adjacent pair, match features & compute homography
|
| 1105 |
+
# ------------------------------------------------------------------
|
| 1106 |
print("\n[3/8] Matching features & computing homographies...")
|
| 1107 |
+
|
| 1108 |
+
# We'll store homographies mapping each image -> center reference frame
|
| 1109 |
+
# H[center_idx] = Identity
|
| 1110 |
homographies = [None] * n_images
|
| 1111 |
homographies[center_idx] = np.eye(3)
|
| 1112 |
|
| 1113 |
+
# Process pairs outward from center
|
| 1114 |
+
# Left side: center-1, center-2, ...
|
| 1115 |
for i in range(center_idx - 1, -1, -1):
|
| 1116 |
print(f"\n Pair: Image {i} <-> Image {i+1}")
|
| 1117 |
+
|
| 1118 |
+
# Match features
|
| 1119 |
matches = match_features(descriptors[i], descriptors[i + 1])
|
| 1120 |
pts_i, pts_ip1 = get_matched_points(keypoints[i], keypoints[i + 1], matches)
|
| 1121 |
+
|
| 1122 |
+
# Visualize matches
|
| 1123 |
+
visualize_matches(
|
| 1124 |
+
images_bgr[i], keypoints[i],
|
| 1125 |
+
images_bgr[i + 1], keypoints[i + 1],
|
| 1126 |
+
matches,
|
| 1127 |
+
title=f"Matches: Image {i} <-> Image {i+1}",
|
| 1128 |
+
fname=f"03_matches_{i}_{i+1}.png"
|
| 1129 |
+
)
|
| 1130 |
+
|
| 1131 |
+
# Compute homography: Image i -> Image i+1
|
| 1132 |
H_i_to_ip1, inlier_mask = compute_homography_ransac(pts_i, pts_ip1)
|
| 1133 |
+
|
| 1134 |
+
# Chain to get Image i -> center
|
| 1135 |
+
# H[i] = H[i+1] @ H_i_to_ip1
|
| 1136 |
homographies[i] = homographies[i + 1] @ H_i_to_ip1
|
| 1137 |
print(f" Homography (Image {i} -> center) computed successfully")
|
| 1138 |
|
| 1139 |
+
# Right side: center+1, center+2, ...
|
| 1140 |
for i in range(center_idx + 1, n_images):
|
| 1141 |
print(f"\n Pair: Image {i} <-> Image {i-1}")
|
| 1142 |
+
|
| 1143 |
+
# Match features
|
| 1144 |
matches = match_features(descriptors[i], descriptors[i - 1])
|
| 1145 |
pts_i, pts_im1 = get_matched_points(keypoints[i], keypoints[i - 1], matches)
|
| 1146 |
+
|
| 1147 |
+
# Visualize matches
|
| 1148 |
+
visualize_matches(
|
| 1149 |
+
images_bgr[i], keypoints[i],
|
| 1150 |
+
images_bgr[i - 1], keypoints[i - 1],
|
| 1151 |
+
matches,
|
| 1152 |
+
title=f"Matches: Image {i} <-> Image {i-1}",
|
| 1153 |
+
fname=f"03_matches_{i}_{i-1}.png"
|
| 1154 |
+
)
|
| 1155 |
+
|
| 1156 |
+
# Compute homography: Image i -> Image i-1
|
| 1157 |
H_i_to_im1, inlier_mask = compute_homography_ransac(pts_i, pts_im1)
|
| 1158 |
+
|
| 1159 |
+
# Chain: H[i] = H[i-1] @ H_i_to_im1
|
| 1160 |
homographies[i] = homographies[i - 1] @ H_i_to_im1
|
| 1161 |
print(f" Homography (Image {i} -> center) computed successfully")
|
| 1162 |
|
| 1163 |
+
# ------------------------------------------------------------------
|
| 1164 |
+
# 6. Compute canvas size and warp all images
|
| 1165 |
+
# ------------------------------------------------------------------
|
| 1166 |
print("\n[4/8] Computing output canvas bounds...")
|
| 1167 |
+
|
| 1168 |
all_x_min, all_y_min = 0, 0
|
| 1169 |
all_x_max, all_y_max = 0, 0
|
| 1170 |
+
|
| 1171 |
for i in range(n_images):
|
| 1172 |
h, w = images_bgr[i].shape[:2]
|
| 1173 |
xmin, ymin, xmax, ymax = compute_warped_bounds(homographies[i], h, w)
|
|
|
|
| 1175 |
all_y_min = min(all_y_min, ymin)
|
| 1176 |
all_x_max = max(all_x_max, xmax)
|
| 1177 |
all_y_max = max(all_y_max, ymax)
|
| 1178 |
+
|
| 1179 |
out_w = all_x_max - all_x_min + 1
|
| 1180 |
out_h = all_y_max - all_y_min + 1
|
| 1181 |
+
offset = (all_x_min, all_y_min) # translation offset
|
| 1182 |
print(f" Canvas size: {out_w} x {out_h} px, offset: {offset}")
|
| 1183 |
|
| 1184 |
print("\n[5/8] Warping images into common coordinate frame...")
|
|
|
|
| 1186 |
masks = []
|
| 1187 |
for i in range(n_images):
|
| 1188 |
print(f" Warping image {i}...")
|
| 1189 |
+
warped, mask = warp_image(
|
| 1190 |
+
images_bgr[i], homographies[i],
|
| 1191 |
+
output_size=(out_h, out_w),
|
| 1192 |
+
offset=offset
|
| 1193 |
+
)
|
| 1194 |
+
|
| 1195 |
+
# --- CRITICAL FIX: Save 85% RAM immediately ---
|
| 1196 |
+
# Convert float64 (8 bytes/pixel) -> uint8 (1 byte/pixel)
|
| 1197 |
+
# This reduces a 8GB image to 1GB.
|
| 1198 |
warped_u8 = np.clip(warped, 0, 255).astype(np.uint8)
|
| 1199 |
warped_images.append(warped_u8)
|
| 1200 |
+
|
| 1201 |
+
# Save mask as uint8 (0 or 255) to save space too
|
| 1202 |
mask_u8 = (mask > 0.5).astype(np.uint8) * 255
|
| 1203 |
masks.append(mask_u8)
|
| 1204 |
+
|
| 1205 |
+
# Force delete the heavy arrays
|
| 1206 |
del warped, mask
|
| 1207 |
+
import gc; gc.collect()
|
| 1208 |
|
| 1209 |
+
# ------------------------------------------------------------------
|
| 1210 |
# 7. Exposure compensation
|
| 1211 |
+
# ------------------------------------------------------------------
|
| 1212 |
+
|
| 1213 |
+
|
| 1214 |
print("\n[6/8] Compensating exposure differences...")
|
| 1215 |
for i in range(n_images):
|
| 1216 |
if i == center_idx:
|
| 1217 |
continue
|
| 1218 |
+
|
| 1219 |
+
# Convert to float temporarily for math
|
| 1220 |
target_f = warped_images[center_idx].astype(np.float64)
|
| 1221 |
source_f = warped_images[i].astype(np.float64)
|
| 1222 |
+
|
| 1223 |
+
# Masks are now uint8 0-255, normalize to 0-1 for the function
|
| 1224 |
mask_t = masks[center_idx] / 255.0
|
| 1225 |
mask_s = masks[i] / 255.0
|
| 1226 |
+
|
| 1227 |
+
compensated = compensate_exposure(
|
| 1228 |
+
target_f, source_f,
|
| 1229 |
+
mask_t, mask_s
|
| 1230 |
+
)
|
| 1231 |
+
|
| 1232 |
+
# Convert back to uint8 immediately to save RAM
|
| 1233 |
warped_images[i] = np.clip(compensated, 0, 255).astype(np.uint8)
|
| 1234 |
+
|
| 1235 |
+
# Clean up
|
| 1236 |
del target_f, source_f, compensated
|
| 1237 |
gc.collect()
|
| 1238 |
|
| 1239 |
+
# ------------------------------------------------------------------
|
| 1240 |
+
# 8a. Naive stitch (for report comparison)
|
| 1241 |
+
# ------------------------------------------------------------------
|
| 1242 |
+
print("\n[7/8] Creating naive stitch (direct overlay, no blending)...")
|
| 1243 |
naive = naive_stitch(warped_images, masks)
|
| 1244 |
naive_clipped = np.clip(naive, 0, 255).astype(np.uint8)
|
| 1245 |
cv2.imwrite(os.path.join(output_dir, "04_naive_stitch.png"), naive_clipped)
|
| 1246 |
|
| 1247 |
+
# ------------------------------------------------------------------
|
| 1248 |
+
# 8b. Multi-band blending (final panorama)
|
| 1249 |
+
# ------------------------------------------------------------------
|
| 1250 |
+
print("\n[8/8] Multi-band Laplacian blending for seamless result...")
|
| 1251 |
+
|
| 1252 |
+
# Iteratively blend: start with center, blend outward
|
| 1253 |
blended = warped_images[center_idx].copy()
|
| 1254 |
blended_mask = masks[center_idx].copy()
|
| 1255 |
+
|
| 1256 |
+
# Order of blending: alternate left and right from center
|
| 1257 |
blend_order = []
|
| 1258 |
for d in range(1, n_images):
|
| 1259 |
if center_idx - d >= 0:
|
|
|
|
| 1263 |
|
| 1264 |
for idx in blend_order:
|
| 1265 |
print(f" Blending image {idx} into panorama...")
|
| 1266 |
+
|
| 1267 |
+
# Ensure we are passing valid types
|
| 1268 |
if blended.dtype != np.uint8:
|
| 1269 |
blended = np.clip(blended, 0, 255).astype(np.uint8)
|
| 1270 |
+
|
| 1271 |
curr_img = np.clip(warped_images[idx], 0, 255).astype(np.uint8)
|
| 1272 |
+
|
| 1273 |
+
# Use the MANAGER instead of calling multi_band_blend directly
|
| 1274 |
+
blended = smart_blend_manager(
|
| 1275 |
+
blended, curr_img,
|
| 1276 |
+
blended_mask, masks[idx],
|
| 1277 |
+
levels=6
|
| 1278 |
+
)
|
| 1279 |
+
# Update combined mask
|
| 1280 |
blended_mask = np.maximum(blended_mask, masks[idx])
|
| 1281 |
|
| 1282 |
blended_clipped = np.clip(blended, 0, 255).astype(np.uint8)
|
| 1283 |
|
| 1284 |
+
# ------------------------------------------------------------------
|
| 1285 |
+
# 9. Final cleanup β crop to clean rectangle
|
| 1286 |
+
# ------------------------------------------------------------------
|
| 1287 |
print("\n Cropping to clean rectangle...")
|
| 1288 |
final_panorama = crop_to_content(blended_clipped, blended_mask)
|
| 1289 |
naive_cropped = crop_to_content(naive_clipped, blended_mask)
|
| 1290 |
|
| 1291 |
+
# Save results
|
| 1292 |
cv2.imwrite(os.path.join(output_dir, "05_blended_panorama.png"), final_panorama)
|
| 1293 |
cv2.imwrite(os.path.join(output_dir, "04_naive_stitch_cropped.png"), naive_cropped)
|
| 1294 |
|
| 1295 |
+
# ------------------------------------------------------------------
|
| 1296 |
+
# 10. Comparison figure for the report
|
| 1297 |
+
# ------------------------------------------------------------------
|
| 1298 |
print("\n Generating comparison figure...")
|
| 1299 |
fig, axes = plt.subplots(2, 1, figsize=(18, 10))
|
| 1300 |
axes[0].imshow(cv2.cvtColor(naive_cropped, cv2.COLOR_BGR2RGB))
|
|
|
|
| 1308 |
plt.close()
|
| 1309 |
|
| 1310 |
print("\n" + "=" * 70)
|
| 1311 |
+
print("Done All are saved to:", output_dir)
|
| 1312 |
print("=" * 70)
|
| 1313 |
+
print(" 01_input_images.png β original images side by side")
|
| 1314 |
+
print(" 02_sift_keypoints.png β detected SIFT keypoints")
|
| 1315 |
+
print(" 03_matches_*.png β feature matches per pair")
|
| 1316 |
+
print(" 04_naive_stitch.png β naive direct-overlay stitch")
|
| 1317 |
+
print(" 05_blended_panorama.png β final multi-band blended panorama")
|
| 1318 |
+
print(" 06_comparison.png β side-by-side naive vs final")
|
| 1319 |
|
| 1320 |
return final_panorama
|
| 1321 |
|
| 1322 |
|
| 1323 |
+
|
| 1324 |
+
|
| 1325 |
if __name__ == "__main__":
|
| 1326 |
+
"""
|
| 1327 |
+
USAGE:
|
| 1328 |
+
python panorama_stitcher.py img_left.jpg img_center.jpg img_right.jpg
|
| 1329 |
+
|
| 1330 |
+
The images should be supplied left-to-right in the order they
|
| 1331 |
+
were captured. The middle image will be used as the reference.
|
| 1332 |
+
You may supply 3 or more images.
|
| 1333 |
+
"""
|
| 1334 |
+
|
| 1335 |
if len(sys.argv) < 4:
|
| 1336 |
+
print("Usage: python panorama_stitcher.py <left.jpg> <center.jpg> <right.jpg> [more...]")
|
| 1337 |
+
print("Supply at least 3 overlapping images, ordered left to right.")
|
| 1338 |
sys.exit(1)
|
| 1339 |
+
|
| 1340 |
image_paths = sys.argv[1:]
|
| 1341 |
+
print(f"Stitching {len(image_paths)} images...")
|
| 1342 |
+
stitch_panorama(image_paths, output_dir="output")
|