priyadip commited on
Commit
33a8eda
Β·
verified Β·
1 Parent(s): e6423fb

Update panorama_stitch_memsafe.py

Browse files
Files changed (1) hide show
  1. panorama_stitch_memsafe.py +697 -78
panorama_stitch_memsafe.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- PANORAMIC IMAGE STITCHING β€” FROM SCRATCH IMPLEMENTATION
3
 
4
  Permitted library usage
5
  - OpenCV: Image I/O, SIFT detection, keypoint matching
@@ -12,6 +12,24 @@ Manual implementations (NO high-level APIs):
12
  - Perspective warping (forward + inverse mapping)
13
  - Multi-band blending (Laplacian pyramid) & feathered blending
14
  - Intensity normalization and final cleanup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  """
17
 
@@ -21,7 +39,6 @@ import matplotlib.pyplot as plt
21
  import os
22
  import sys
23
  import warnings
24
- import gc
25
  warnings.filterwarnings('ignore')
26
 
27
 
@@ -30,6 +47,12 @@ warnings.filterwarnings('ignore')
30
 
31
 
32
  def load_images(image_paths):
 
 
 
 
 
 
33
  images_bgr = []
34
  images_gray = []
35
  for p in image_paths:
@@ -42,7 +65,8 @@ def load_images(image_paths):
42
  return images_bgr, images_gray
43
 
44
 
45
- def display_images(images_bgr, title="Input Images", output_dir="output"):
 
46
  n = len(images_bgr)
47
  fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
48
  if n == 1:
@@ -53,27 +77,42 @@ def display_images(images_bgr, title="Input Images", output_dir="output"):
53
  axes[i].axis('off')
54
  fig.suptitle(title, fontsize=14)
55
  plt.tight_layout()
56
- plt.savefig(os.path.join(output_dir, "01_input_images.png"), dpi=150, bbox_inches='tight')
57
  plt.close()
58
 
59
 
60
 
61
- # STEP 2: SIFT FEATURE EXTRACTION
62
 
63
 
64
  def extract_sift_features(images_gray):
65
- sift = cv2.SIFT_create(nfeatures=10000)
 
 
 
 
 
 
 
 
 
 
 
 
66
  keypoints_list = []
67
  descriptors_list = []
 
68
  for idx, gray in enumerate(images_gray):
69
  kp, des = sift.detectAndCompute(gray, None)
70
  keypoints_list.append(kp)
71
  descriptors_list.append(des)
72
  print(f" Image {idx}: detected {len(kp)} SIFT keypoints")
 
73
  return keypoints_list, descriptors_list
74
 
75
 
76
- def visualize_keypoints(images_bgr, keypoints_list, output_dir="output"):
 
77
  n = len(images_bgr)
78
  fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
79
  if n == 1:
@@ -88,32 +127,55 @@ def visualize_keypoints(images_bgr, keypoints_list, output_dir="output"):
88
  axes[i].axis('off')
89
  fig.suptitle("SIFT Keypoints", fontsize=14)
90
  plt.tight_layout()
91
- plt.savefig(os.path.join(output_dir, "02_sift_keypoints.png"), dpi=150, bbox_inches='tight')
92
  plt.close()
93
 
94
 
95
 
96
- # STEP 3: FEATURE MATCHING
97
 
98
 
99
  def match_features(des1, des2, ratio_thresh=0.75):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  bf = cv2.BFMatcher(cv2.NORM_L2)
101
  raw_matches = bf.knnMatch(des1, des2, k=2)
 
102
  good_matches = []
103
  for m, n in raw_matches:
104
  if m.distance < ratio_thresh * n.distance:
105
  good_matches.append(m)
 
106
  print(f" Ratio test: {len(raw_matches)} raw -> {len(good_matches)} good matches")
107
  return good_matches
108
 
109
 
110
  def get_matched_points(kp1, kp2, matches):
 
 
 
 
111
  pts1 = np.float64([kp1[m.queryIdx].pt for m in matches])
112
  pts2 = np.float64([kp2[m.trainIdx].pt for m in matches])
113
  return pts1, pts2
114
 
115
 
116
- def visualize_matches(img1, kp1, img2, kp2, matches, title="Feature Matches", fname="matches.png", output_dir="output"):
 
117
  vis = cv2.drawMatches(
118
  img1, kp1, img2, kp2, matches[:80], None,
119
  flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
@@ -122,110 +184,226 @@ def visualize_matches(img1, kp1, img2, kp2, matches, title="Feature Matches", fn
122
  plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
123
  plt.title(title)
124
  plt.axis('off')
125
- plt.savefig(os.path.join(output_dir, fname), dpi=150, bbox_inches='tight')
126
  plt.close()
127
 
128
 
129
 
130
- # STEP 4: HOMOGRAPHY ESTIMATION - DLT
131
 
132
 
133
  def normalize_points(pts):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  centroid = np.mean(pts, axis=0)
135
  shifted = pts - centroid
136
  avg_dist = np.mean(np.sqrt(np.sum(shifted ** 2, axis=1)))
137
  if avg_dist < 1e-10:
138
  avg_dist = 1e-10
139
  scale = np.sqrt(2.0) / avg_dist
 
140
  T = np.array([
141
  [scale, 0, -scale * centroid[0]],
142
  [0, scale, -scale * centroid[1]],
143
  [0, 0, 1 ]
144
  ])
145
- pts_h = np.column_stack([pts, np.ones(len(pts))])
146
- pts_norm_h = (T @ pts_h.T).T
 
147
  pts_norm = pts_norm_h[:, :2] / pts_norm_h[:, 2:3]
 
148
  return pts_norm, T
149
 
150
 
151
  def compute_homography_dlt(src_pts, dst_pts):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  assert len(src_pts) >= 4, "Need at least 4 correspondences for homography"
 
 
153
  src_norm, T_src = normalize_points(src_pts)
154
  dst_norm, T_dst = normalize_points(dst_pts)
 
155
  N = len(src_norm)
156
  A = np.zeros((2 * N, 9))
 
157
  for i in range(N):
158
  x, y = src_norm[i]
159
  xp, yp = dst_norm[i]
160
- A[2 * i] = [-x, -y, -1, 0, 0, 0, xp * x, xp * y, xp]
161
- A[2 * i + 1] = [0, 0, 0, -x, -y, -1, yp * x, yp * y, yp]
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  _, S, Vt = np.linalg.svd(A)
163
  h = Vt[-1]
164
  H_norm = h.reshape(3, 3)
 
 
 
165
  H = np.linalg.inv(T_dst) @ H_norm @ T_src
 
 
166
  if abs(H[2, 2]) > 1e-10:
167
  H /= H[2, 2]
 
168
  return H
169
 
170
 
171
 
172
- # STEP 5: RANSAC
173
 
174
 
175
  def compute_homography_ransac(src_pts, dst_pts,
176
  n_iterations=10000,
177
  reprojection_thresh=3.0,
178
  min_inliers_ratio=0.1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  N = len(src_pts)
180
  if N < 4:
181
  raise ValueError(f"Need >=4 matches for homography, got {N}")
 
182
  best_inlier_count = 0
183
  best_inlier_mask = None
184
  best_H = None
185
- src_h = np.column_stack([src_pts, np.ones(N)])
 
 
186
 
187
  for iteration in range(n_iterations):
 
188
  indices = np.random.choice(N, 4, replace=False)
189
  s = src_pts[indices]
190
  d = dst_pts[indices]
 
 
 
191
  try:
192
  H_candidate = compute_homography_dlt(s, d)
193
  except np.linalg.LinAlgError:
194
  continue
 
195
  if H_candidate is None or np.any(np.isnan(H_candidate)):
196
  continue
197
- projected_h = (H_candidate @ src_h.T).T
 
 
 
198
  w = projected_h[:, 2:3]
199
  w[np.abs(w) < 1e-10] = 1e-10
200
  projected = projected_h[:, :2] / w
 
 
201
  errors = np.sqrt(np.sum((projected - dst_pts) ** 2, axis=1))
202
  inlier_mask = errors < reprojection_thresh
203
  inlier_count = np.sum(inlier_mask)
 
 
204
  if inlier_count > best_inlier_count:
205
  best_inlier_count = inlier_count
206
  best_inlier_mask = inlier_mask.copy()
207
  best_H = H_candidate.copy()
 
 
208
  if inlier_count > 0.9 * N:
209
  break
210
 
 
211
  if best_inlier_count < min_inliers_ratio * N:
212
  raise RuntimeError(
213
  f"RANSAC failed: only {best_inlier_count}/{N} inliers "
214
  f"({100*best_inlier_count/N:.1f}%), threshold was {min_inliers_ratio*100:.0f}%"
215
  )
 
 
216
  best_H = compute_homography_dlt(
217
  src_pts[best_inlier_mask],
218
  dst_pts[best_inlier_mask]
219
  )
 
220
  print(f" RANSAC: {best_inlier_count}/{N} inliers "
221
  f"({100*best_inlier_count/N:.1f}%) over {min(iteration+1, n_iterations)} iterations")
222
- return best_H, best_inlier_mask
223
 
 
224
 
225
- # STEP 6: IMAGE WARPING
226
 
 
 
 
227
 
228
  def apply_homography_to_point(H, pt):
 
229
  p = np.array([pt[0], pt[1], 1.0])
230
  q = H @ p
231
  if abs(q[2]) < 1e-10:
@@ -234,76 +412,189 @@ def apply_homography_to_point(H, pt):
234
 
235
 
236
  def compute_warped_bounds(H, h, w):
237
- corners = np.array([[0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1]], dtype=np.float64)
 
 
 
 
 
 
 
 
 
 
 
 
238
  warped_corners = []
239
  for c in corners:
240
  wx, wy = apply_homography_to_point(H, c)
241
  warped_corners.append([wx, wy])
242
  warped_corners = np.array(warped_corners)
 
243
  x_min = np.floor(warped_corners[:, 0].min()).astype(int)
244
  x_max = np.ceil(warped_corners[:, 0].max()).astype(int)
245
  y_min = np.floor(warped_corners[:, 1].min()).astype(int)
246
  y_max = np.ceil(warped_corners[:, 1].max()).astype(int)
 
247
  return x_min, y_min, x_max, y_max
248
 
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def warp_image(img, H, output_size, offset):
 
 
 
 
251
  out_h, out_w = output_size
252
  H_inv = np.linalg.inv(H)
253
  off_x, off_y = offset
 
 
 
 
254
  try:
255
  warped = np.zeros((out_h, out_w, 3), dtype=np.float64)
256
  mask = np.zeros((out_h, out_w), dtype=np.float64)
257
  except MemoryError:
258
  print("\n[!] Critical: System RAM cannot hold the final image.")
259
- print(" Switching to disk-based memory mapping.")
260
  warped = np.memmap('temp_warped.dat', dtype='float64', mode='w+', shape=(out_h, out_w, 3))
261
  mask = np.memmap('temp_mask.dat', dtype='float64', mode='w+', shape=(out_h, out_w))
262
 
263
  h_src, w_src = img.shape[:2]
264
- CHUNK_SIZE = 500
 
 
 
 
265
  print(f" -> Processing {out_h} rows in chunks of {CHUNK_SIZE}...")
266
 
267
  for y_start in range(0, out_h, CHUNK_SIZE):
268
  y_end = min(y_start + CHUNK_SIZE, out_h)
269
  chunk_h = y_end - y_start
 
 
 
270
  ys, xs = np.mgrid[y_start:y_end, 0:out_w]
271
  dest_x = xs + off_x
272
  dest_y = ys + off_y
 
 
273
  ones = np.ones(chunk_h * out_w, dtype=np.float64)
274
  flat_x = dest_x.reshape(-1)
275
  flat_y = dest_y.reshape(-1)
276
- dest_coords = np.vstack([flat_x, flat_y, ones])
 
 
 
 
 
277
  src_coords = H_inv @ dest_coords
 
 
278
  w_vals = src_coords[2, :]
279
  w_vals[np.abs(w_vals) < 1e-10] = 1e-10
280
- src_x = (src_coords[0, :] / w_vals).reshape(chunk_h, out_w)
281
- src_y = (src_coords[1, :] / w_vals).reshape(chunk_h, out_w)
282
- valid = (src_x >= 0) & (src_x < w_src - 1) & (src_y >= 0) & (src_y < h_src - 1)
 
 
 
 
 
 
 
 
 
 
 
283
  if not np.any(valid):
284
  continue
 
 
 
 
285
  x0 = np.floor(src_x).astype(int)
286
  y0 = np.floor(src_y).astype(int)
287
  x1 = x0 + 1
288
  y1 = y0 + 1
 
289
  dx = src_x - x0
290
  dy = src_y - y0
 
 
291
  x0 = np.clip(x0, 0, w_src - 2)
292
  x1 = np.clip(x1, 0, w_src - 1)
293
  y0 = np.clip(y0, 0, h_src - 2)
294
  y1 = np.clip(y1, 0, h_src - 1)
 
 
295
  Ia = img[y0, x0].astype(np.float64)
296
  Ib = img[y0, x1].astype(np.float64)
297
  Ic = img[y1, x0].astype(np.float64)
298
  Id = img[y1, x1].astype(np.float64)
 
 
299
  wa = ((1 - dx) * (1 - dy))[:, :, None]
300
  wb = (dx * (1 - dy))[:, :, None]
301
  wc = ((1 - dx) * dy)[:, :, None]
302
  wd = (dx * dy)[:, :, None]
 
 
 
303
  chunk_warped = wa * Ia + wb * Ib + wc * Ic + wd * Id
 
 
 
 
304
  valid_3ch = valid[:, :, None]
 
 
 
305
  current_slice_img = warped[y_start:y_end, :]
306
  current_slice_mask = mask[y_start:y_end, :]
 
307
  np.copyto(current_slice_img, chunk_warped, where=valid_3ch)
308
  np.copyto(current_slice_mask, 1.0, where=valid)
309
 
@@ -311,10 +602,15 @@ def warp_image(img, H, output_size, offset):
311
 
312
 
313
 
314
- # STEP 7: BLENDING
315
 
316
 
317
  def gaussian_blur_manual(img, kernel_size=5, sigma=1.0):
 
 
 
 
 
318
  ax = np.linspace(-(kernel_size // 2), kernel_size // 2, kernel_size)
319
  xx, yy = np.meshgrid(ax, ax)
320
  kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
@@ -323,10 +619,12 @@ def gaussian_blur_manual(img, kernel_size=5, sigma=1.0):
323
 
324
 
325
  def build_gaussian_pyramid(img, levels):
 
326
  pyramid = [img.astype(np.float64)]
327
  current = img.astype(np.float64)
328
  for _ in range(levels - 1):
329
  blurred = gaussian_blur_manual(current, kernel_size=5, sigma=1.0)
 
330
  downsampled = blurred[::2, ::2]
331
  pyramid.append(downsampled)
332
  current = downsampled
@@ -334,102 +632,204 @@ def build_gaussian_pyramid(img, levels):
334
 
335
 
336
  def upsample(img, target_shape):
 
337
  h, w = target_shape[:2]
 
338
  up_h = np.repeat(img, 2, axis=0)[:h]
339
  up_w = np.repeat(up_h, 2, axis=1)[:, :w]
 
340
  result = gaussian_blur_manual(up_w, kernel_size=5, sigma=1.0)
341
  return result
342
 
343
 
344
  def build_laplacian_pyramid(img, levels):
 
 
 
 
 
 
 
 
 
 
 
345
  gauss_pyr = build_gaussian_pyramid(img, levels)
346
  lap_pyr = []
347
  for i in range(levels - 1):
348
  upsampled = upsample(gauss_pyr[i + 1], gauss_pyr[i].shape)
349
  lap = gauss_pyr[i] - upsampled
350
  lap_pyr.append(lap)
351
- lap_pyr.append(gauss_pyr[-1])
352
  return lap_pyr
353
 
354
 
355
  def multi_band_blend(img1, img2, mask1, mask2, levels=6):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  h, w = img1.shape[:2]
 
 
357
  if img1.ndim == 2:
358
  img1 = img1[:, :, None]
359
  if img2.ndim == 2:
360
  img2 = img2[:, :, None]
 
 
 
361
  overlap = (mask1 > 0.5) & (mask2 > 0.5)
 
 
 
 
 
362
  weight1 = np.zeros((h, w), dtype=np.float64)
363
  weight2 = np.zeros((h, w), dtype=np.float64)
 
364
  if np.any(mask1 > 0.5):
 
365
  dist1 = cv2.distanceTransform((mask1 > 0.5).astype(np.uint8), cv2.DIST_L2, 5)
366
  weight1 = dist1.astype(np.float64)
367
  if np.any(mask2 > 0.5):
368
  dist2 = cv2.distanceTransform((mask2 > 0.5).astype(np.uint8), cv2.DIST_L2, 5)
369
  weight2 = dist2.astype(np.float64)
 
 
370
  total = weight1 + weight2
371
  total[total < 1e-10] = 1e-10
372
- alpha = weight1 / total
 
 
373
  alpha_3 = np.stack([alpha] * 3, axis=-1)
 
 
374
  lap1 = build_laplacian_pyramid(img1, levels)
375
  lap2 = build_laplacian_pyramid(img2, levels)
 
 
376
  mask_pyr = build_gaussian_pyramid(alpha_3, levels)
 
 
377
  blended_pyr = []
378
  for l in range(levels):
379
  blended_level = mask_pyr[l] * lap1[l] + (1.0 - mask_pyr[l]) * lap2[l]
380
  blended_pyr.append(blended_level)
 
 
381
  result = blended_pyr[-1]
382
  for l in range(levels - 2, -1, -1):
383
  upsampled = upsample(result, blended_pyr[l].shape)
384
  result = upsampled + blended_pyr[l]
 
385
  return result
386
 
387
 
388
  def feathered_blend(img1, img2, mask1, mask2):
 
 
 
 
 
 
 
 
389
  h, w = img1.shape[:2]
390
  weight1 = np.zeros((h, w), dtype=np.float64)
391
  weight2 = np.zeros((h, w), dtype=np.float64)
 
392
  if np.any(mask1 > 0.5):
393
- weight1 = cv2.distanceTransform((mask1 > 0.5).astype(np.uint8), cv2.DIST_L2, 5).astype(np.float64)
 
 
394
  if np.any(mask2 > 0.5):
395
- weight2 = cv2.distanceTransform((mask2 > 0.5).astype(np.uint8), cv2.DIST_L2, 5).astype(np.float64)
 
 
 
396
  total = weight1 + weight2
397
  total[total < 1e-10] = 1e-10
 
398
  alpha1 = (weight1 / total)[:, :, None]
399
  alpha2 = (weight2 / total)[:, :, None]
 
400
  blended = alpha1 * img1 + alpha2 * img2
401
  return blended
402
 
403
 
404
 
405
- # STEP 8: EXPOSURE COMPENSATION
406
 
407
 
408
  def compensate_exposure(img_target, img_source, mask_target, mask_source):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  overlap = (mask_target > 0.5) & (mask_source > 0.5)
 
410
  if np.sum(overlap) < 100:
 
411
  return img_source.copy()
 
412
  gains = []
413
  for c in range(3):
414
  mean_target = np.mean(img_target[:, :, c][overlap])
415
  mean_source = np.mean(img_source[:, :, c][overlap])
416
  if mean_source > 1.0:
417
  gain = mean_target / mean_source
 
418
  gain = np.clip(gain, 0.5, 2.0)
419
  else:
420
  gain = 1.0
421
  gains.append(gain)
 
422
  compensated = img_source.copy()
423
  for c in range(3):
424
  compensated[:, :, c] = compensated[:, :, c] * gains[c]
 
425
  print(f" Exposure gains: R={gains[2]:.3f}, G={gains[1]:.3f}, B={gains[0]:.3f}")
426
  return compensated
427
 
428
 
429
- # STEP 9: NAIVE STITCH
430
 
431
 
432
  def naive_stitch(warped_images, masks):
 
 
 
 
 
433
  canvas = np.zeros_like(warped_images[0])
434
  for img, mask in zip(warped_images, masks):
435
  region = mask > 0.5
@@ -442,81 +842,145 @@ def naive_stitch(warped_images, masks):
442
 
443
 
444
 
445
- # STEP 10: FINAL CLEANUP
446
 
447
 
448
  def crop_to_content(panorama, combined_mask):
 
 
 
 
 
 
 
 
 
449
  if combined_mask.dtype == np.float64 or combined_mask.dtype == np.float32:
 
450
  valid_mask = (combined_mask > 0.01)
451
  else:
452
  valid_mask = (combined_mask > 1)
 
 
 
453
  rows = np.any(valid_mask, axis=1)
454
  cols = np.any(valid_mask, axis=0)
 
 
455
  if not np.any(rows) or not np.any(cols):
456
  return panorama
 
457
  y_min, y_max = np.where(rows)[0][[0, -1]]
458
  x_min, x_max = np.where(cols)[0][[0, -1]]
 
 
459
  safe_crop = panorama[y_min:y_max+1, x_min:x_max+1]
460
  safe_mask = valid_mask[y_min:y_max+1, x_min:x_max+1]
 
 
461
  total_content = np.count_nonzero(safe_mask)
 
 
462
  top, bottom = 0, safe_mask.shape[0] - 1
463
  left, right = 0, safe_mask.shape[1] - 1
464
- min_content_threshold = total_content * 0.20
 
 
 
 
 
465
  max_iters = min(safe_mask.shape[0], safe_mask.shape[1])
 
466
  for _ in range(max_iters):
 
467
  h = bottom - top
468
  w = right - left
469
  if h <= 0 or w <= 0: break
 
 
470
  roi = safe_mask[top:bottom+1, left:right+1]
 
 
471
  if np.count_nonzero(roi) < min_content_threshold:
472
  print(" [Crop] Stopping to preserve image content.")
473
  break
 
 
474
  row_top = roi[0, :]
475
  row_btm = roi[-1, :]
476
  col_lft = roi[:, 0]
477
  col_rgt = roi[:, -1]
478
- bad_t = np.sum(~row_top)
 
 
479
  bad_b = np.sum(~row_btm)
480
  bad_l = np.sum(~col_lft)
481
  bad_r = np.sum(~col_rgt)
 
 
482
  if (bad_t + bad_b + bad_l + bad_r) == 0:
483
  print(f" [Crop] Found Clean Rect: {w}x{h} px")
484
  return safe_crop[top:bottom+1, left:right+1]
 
 
485
  max_bad = max(bad_t, bad_b, bad_l, bad_r)
 
486
  if max_bad == bad_t: top += 1
487
  elif max_bad == bad_b: bottom -= 1
488
  elif max_bad == bad_l: left += 1
489
  elif max_bad == bad_r: right -= 1
 
 
490
  return safe_crop[top:bottom+1, left:right+1]
491
 
 
 
492
 
493
  def memory_safe_linear_blend(img1, img2, mask1, mask2):
494
- print(" -> RAM Full. Switching to Disk-Based Blending...")
 
 
 
 
495
  h, w = img1.shape[:2]
 
 
 
 
 
496
  try:
 
497
  print(" Calculating weight map 1...")
498
  if np.any(mask1 > 0):
 
499
  m1_bin = (mask1 > 127).astype(np.uint8)
500
  w_ram = cv2.distanceTransform(m1_bin, cv2.DIST_L2, 5)
501
  else:
502
  w_ram = np.zeros((h, w), dtype=np.float32)
 
 
503
  fp1 = np.memmap('temp_w1.dat', dtype='float32', mode='w+', shape=(h, w))
504
  fp1[:] = w_ram[:]
505
- del w_ram, m1_bin
506
- gc.collect()
507
 
 
508
  print(" Calculating weight map 2...")
509
  if np.any(mask2 > 0):
510
  m2_bin = (mask2 > 127).astype(np.uint8)
511
  w_ram = cv2.distanceTransform(m2_bin, cv2.DIST_L2, 5)
512
  else:
513
  w_ram = np.zeros((h, w), dtype=np.float32)
 
 
514
  fp2 = np.memmap('temp_w2.dat', dtype='float32', mode='w+', shape=(h, w))
515
  fp2[:] = w_ram[:]
516
  del w_ram, m2_bin
517
  gc.collect()
518
-
 
519
  print(" Blending chunks...")
 
520
  try:
521
  result = np.zeros(img1.shape, dtype=np.uint8)
522
  except MemoryError:
@@ -525,102 +989,185 @@ def memory_safe_linear_blend(img1, img2, mask1, mask2):
525
  CHUNK = 2000
526
  for y in range(0, h, CHUNK):
527
  ye = min(y + CHUNK, h)
 
 
528
  w1_c = fp1[y:ye]
529
  w2_c = fp2[y:ye]
 
530
  total = w1_c + w2_c
531
  total[total < 1e-5] = 1e-5
532
- alpha = (w1_c / total)[:, :, None]
 
 
533
  chunk1 = img1[y:ye].astype(np.float32)
534
  chunk2 = img2[y:ye].astype(np.float32)
 
 
535
  blended = chunk1 * alpha + chunk2 * (1.0 - alpha)
536
  result[y:ye] = np.clip(blended, 0, 255).astype(np.uint8)
537
 
 
538
  del fp1, fp2
539
  try:
540
  os.remove('temp_w1.dat')
541
  os.remove('temp_w2.dat')
542
  except: pass
 
543
  return result
 
544
  except Exception as e:
545
  print(f" [!] Error in disk blending: {e}")
546
- return img1
547
-
548
 
549
  def smart_blend_manager(img1, img2, mask1, mask2, levels=6):
 
 
 
 
 
 
 
 
 
 
550
  h, w = img1.shape[:2]
551
  pixels = h * w
552
- est_mem_bytes = (pixels * 3 * 8) * 4
 
 
 
553
  est_mem_gb = est_mem_bytes / (1024**3)
 
554
  print(f" [Memory Check] Image size: {w}x{h}")
555
  print(f" [Memory Check] Est. Multi-Band RAM needed: ~{est_mem_gb:.2f} GB")
556
- SAFE_LIMIT_GB = 2.0
557
- if est_mem_gb > SAFE_LIMIT_GB:
 
 
 
 
558
  print(f" [!] Image exceeds safe limit ({SAFE_LIMIT_GB} GB). Forcing Disk-Based Blending.")
559
  return memory_safe_linear_blend(img1, img2, mask1, mask2)
 
 
560
  try:
561
- return multi_band_blend(img1.astype(np.float64), img2.astype(np.float64),
562
  mask1, mask2, levels)
563
- except (MemoryError, cv2.error):
 
564
  print("\n [!] RAM Full or OpenCV Error! Switching to Disk-Based Blending...")
 
565
  gc.collect()
566
  return memory_safe_linear_blend(img1, img2, mask1, mask2)
567
 
568
 
569
 
570
- # MAIN PIPELINE
571
-
572
-
573
  def stitch_panorama(image_paths, output_dir="output"):
 
 
 
 
 
 
 
 
 
 
 
 
574
  os.makedirs(output_dir, exist_ok=True)
575
 
576
  print("=" * 70)
577
- print("PANORAMIC IMAGE STITCHING")
578
  print("=" * 70)
579
 
 
580
  # 1. Load images
 
581
  print("\n[1/8] Loading images...")
582
  images_bgr, images_gray = load_images(image_paths)
583
  n_images = len(images_bgr)
584
- display_images(images_bgr, output_dir=output_dir)
 
 
585
  center_idx = n_images // 2
586
  print(f" Reference (center) image: index {center_idx}")
587
 
 
588
  # 2. Extract SIFT features
 
589
  print("\n[2/8] Extracting SIFT features...")
590
  keypoints, descriptors = extract_sift_features(images_gray)
591
- visualize_keypoints(images_bgr, keypoints, output_dir=output_dir)
592
 
593
- # 3-5. Match features & compute homographies
 
 
594
  print("\n[3/8] Matching features & computing homographies...")
 
 
 
595
  homographies = [None] * n_images
596
  homographies[center_idx] = np.eye(3)
597
 
 
 
598
  for i in range(center_idx - 1, -1, -1):
599
  print(f"\n Pair: Image {i} <-> Image {i+1}")
 
 
600
  matches = match_features(descriptors[i], descriptors[i + 1])
601
  pts_i, pts_ip1 = get_matched_points(keypoints[i], keypoints[i + 1], matches)
602
- visualize_matches(images_bgr[i], keypoints[i], images_bgr[i + 1], keypoints[i + 1],
603
- matches, title=f"Matches: Image {i} <-> Image {i+1}",
604
- fname=f"03_matches_{i}_{i+1}.png", output_dir=output_dir)
 
 
 
 
 
 
 
 
605
  H_i_to_ip1, inlier_mask = compute_homography_ransac(pts_i, pts_ip1)
 
 
 
606
  homographies[i] = homographies[i + 1] @ H_i_to_ip1
607
  print(f" Homography (Image {i} -> center) computed successfully")
608
 
 
609
  for i in range(center_idx + 1, n_images):
610
  print(f"\n Pair: Image {i} <-> Image {i-1}")
 
 
611
  matches = match_features(descriptors[i], descriptors[i - 1])
612
  pts_i, pts_im1 = get_matched_points(keypoints[i], keypoints[i - 1], matches)
613
- visualize_matches(images_bgr[i], keypoints[i], images_bgr[i - 1], keypoints[i - 1],
614
- matches, title=f"Matches: Image {i} <-> Image {i-1}",
615
- fname=f"03_matches_{i}_{i-1}.png", output_dir=output_dir)
 
 
 
 
 
 
 
 
616
  H_i_to_im1, inlier_mask = compute_homography_ransac(pts_i, pts_im1)
 
 
617
  homographies[i] = homographies[i - 1] @ H_i_to_im1
618
  print(f" Homography (Image {i} -> center) computed successfully")
619
 
620
- # 6. Compute canvas size and warp
 
 
621
  print("\n[4/8] Computing output canvas bounds...")
 
622
  all_x_min, all_y_min = 0, 0
623
  all_x_max, all_y_max = 0, 0
 
624
  for i in range(n_images):
625
  h, w = images_bgr[i].shape[:2]
626
  xmin, ymin, xmax, ymax = compute_warped_bounds(homographies[i], h, w)
@@ -628,9 +1175,10 @@ def stitch_panorama(image_paths, output_dir="output"):
628
  all_y_min = min(all_y_min, ymin)
629
  all_x_max = max(all_x_max, xmax)
630
  all_y_max = max(all_y_max, ymax)
 
631
  out_w = all_x_max - all_x_min + 1
632
  out_h = all_y_max - all_y_min + 1
633
- offset = (all_x_min, all_y_min)
634
  print(f" Canvas size: {out_w} x {out_h} px, offset: {offset}")
635
 
636
  print("\n[5/8] Warping images into common coordinate frame...")
@@ -638,38 +1186,74 @@ def stitch_panorama(image_paths, output_dir="output"):
638
  masks = []
639
  for i in range(n_images):
640
  print(f" Warping image {i}...")
641
- warped, mask = warp_image(images_bgr[i], homographies[i], output_size=(out_h, out_w), offset=offset)
 
 
 
 
 
 
 
 
642
  warped_u8 = np.clip(warped, 0, 255).astype(np.uint8)
643
  warped_images.append(warped_u8)
 
 
644
  mask_u8 = (mask > 0.5).astype(np.uint8) * 255
645
  masks.append(mask_u8)
 
 
646
  del warped, mask
647
- gc.collect()
648
 
 
649
  # 7. Exposure compensation
 
 
 
650
  print("\n[6/8] Compensating exposure differences...")
651
  for i in range(n_images):
652
  if i == center_idx:
653
  continue
 
 
654
  target_f = warped_images[center_idx].astype(np.float64)
655
  source_f = warped_images[i].astype(np.float64)
 
 
656
  mask_t = masks[center_idx] / 255.0
657
  mask_s = masks[i] / 255.0
658
- compensated = compensate_exposure(target_f, source_f, mask_t, mask_s)
 
 
 
 
 
 
659
  warped_images[i] = np.clip(compensated, 0, 255).astype(np.uint8)
 
 
660
  del target_f, source_f, compensated
661
  gc.collect()
662
 
663
- # 8a. Naive stitch
664
- print("\n[7/8] Creating naive stitch...")
 
 
665
  naive = naive_stitch(warped_images, masks)
666
  naive_clipped = np.clip(naive, 0, 255).astype(np.uint8)
667
  cv2.imwrite(os.path.join(output_dir, "04_naive_stitch.png"), naive_clipped)
668
 
669
- # 8b. Multi-band blending
670
- print("\n[8/8] Multi-band Laplacian blending...")
 
 
 
 
671
  blended = warped_images[center_idx].copy()
672
  blended_mask = masks[center_idx].copy()
 
 
673
  blend_order = []
674
  for d in range(1, n_images):
675
  if center_idx - d >= 0:
@@ -679,23 +1263,38 @@ def stitch_panorama(image_paths, output_dir="output"):
679
 
680
  for idx in blend_order:
681
  print(f" Blending image {idx} into panorama...")
 
 
682
  if blended.dtype != np.uint8:
683
  blended = np.clip(blended, 0, 255).astype(np.uint8)
 
684
  curr_img = np.clip(warped_images[idx], 0, 255).astype(np.uint8)
685
- blended = smart_blend_manager(blended, curr_img, blended_mask, masks[idx], levels=6)
 
 
 
 
 
 
 
686
  blended_mask = np.maximum(blended_mask, masks[idx])
687
 
688
  blended_clipped = np.clip(blended, 0, 255).astype(np.uint8)
689
 
690
- # 9. Crop
 
 
691
  print("\n Cropping to clean rectangle...")
692
  final_panorama = crop_to_content(blended_clipped, blended_mask)
693
  naive_cropped = crop_to_content(naive_clipped, blended_mask)
694
 
 
695
  cv2.imwrite(os.path.join(output_dir, "05_blended_panorama.png"), final_panorama)
696
  cv2.imwrite(os.path.join(output_dir, "04_naive_stitch_cropped.png"), naive_cropped)
697
 
698
- # 10. Comparison figure
 
 
699
  print("\n Generating comparison figure...")
700
  fig, axes = plt.subplots(2, 1, figsize=(18, 10))
701
  axes[0].imshow(cv2.cvtColor(naive_cropped, cv2.COLOR_BGR2RGB))
@@ -709,15 +1308,35 @@ def stitch_panorama(image_paths, output_dir="output"):
709
  plt.close()
710
 
711
  print("\n" + "=" * 70)
712
- print("Done! All saved to:", output_dir)
713
  print("=" * 70)
 
 
 
 
 
 
714
 
715
  return final_panorama
716
 
717
 
 
 
718
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
719
  if len(sys.argv) < 4:
720
- print("Usage: python panorama_stitch_memsafe.py <left.jpg> <center.jpg> <right.jpg> [more...]")
 
721
  sys.exit(1)
 
722
  image_paths = sys.argv[1:]
723
- stitch_panorama(image_paths, output_dir="output")
 
 
1
  """
2
+ PANORAMIC IMAGE STITCHING - FROM SCRATCH IMPLEMENTATION
3
 
4
  Permitted library usage
5
  - OpenCV: Image I/O, SIFT detection, keypoint matching
 
12
  - Perspective warping (forward + inverse mapping)
13
  - Multi-band blending (Laplacian pyramid) & feathered blending
14
  - Intensity normalization and final cleanup
15
+ By: Priyadip Sau(M25CSA023)
16
+ """
17
+
18
+
19
+ """
20
+ Usage Instructions:
21
+ - Make sure the following files are in the same directory:
22
+ - panorama_stitch_memsafe.py
23
+ - left.jpg
24
+ - center.jpg
25
+ - right.jpg
26
+
27
+ - Run the following command from that directory:
28
+ - "python panorama_stitch_memsafe.py left.jpg center.jpg right.jpg"
29
+
30
+
31
+ The stitched panorama will be saved in the current directory inside the output/ folder as: final_panorama.png
32
+
33
 
34
  """
35
 
 
39
  import os
40
  import sys
41
  import warnings
 
42
  warnings.filterwarnings('ignore')
43
 
44
 
 
47
 
48
 
49
  def load_images(image_paths):
50
+ """
51
+ Load a sequence of images from disk. We expect them ordered
52
+ left-to-right (or the user can specify ordering). Each image
53
+ is read in BGR (OpenCV default) and also converted to RGB
54
+ for visualization, plus a grayscale copy for feature detection.
55
+ """
56
  images_bgr = []
57
  images_gray = []
58
  for p in image_paths:
 
65
  return images_bgr, images_gray
66
 
67
 
68
+ def display_images(images_bgr, title="Input Images"):
69
+ """Quick side-by-side display of the loaded images."""
70
  n = len(images_bgr)
71
  fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
72
  if n == 1:
 
77
  axes[i].axis('off')
78
  fig.suptitle(title, fontsize=14)
79
  plt.tight_layout()
80
+ plt.savefig("01_input_images.png", dpi=150, bbox_inches='tight')
81
  plt.close()
82
 
83
 
84
 
85
+ # STEP 2: SIFT FEATURE EXTRACTION (OpenCV built-in permitted)
86
 
87
 
88
  def extract_sift_features(images_gray):
89
+ """
90
+ Detect SIFT keypoints and compute descriptors for each grayscale image.
91
+
92
+ SIFT (Scale-Invariant Feature Transform) identifies interest points that
93
+ are invariant to scale and rotation. Each keypoint receives a 128-dim
94
+ descriptor encoding the local gradient histogram around it.
95
+
96
+ Returns:
97
+ keypoints_list: list of keypoint arrays per image
98
+ descriptors_list: list of descriptor matrices (N x 128) per image
99
+ """
100
+ sift = cv2.SIFT_create(nfeatures=10000) # generous upper bound
101
+
102
  keypoints_list = []
103
  descriptors_list = []
104
+
105
  for idx, gray in enumerate(images_gray):
106
  kp, des = sift.detectAndCompute(gray, None)
107
  keypoints_list.append(kp)
108
  descriptors_list.append(des)
109
  print(f" Image {idx}: detected {len(kp)} SIFT keypoints")
110
+
111
  return keypoints_list, descriptors_list
112
 
113
 
114
+ def visualize_keypoints(images_bgr, keypoints_list):
115
+ """Draw detected keypoints on each image """
116
  n = len(images_bgr)
117
  fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
118
  if n == 1:
 
127
  axes[i].axis('off')
128
  fig.suptitle("SIFT Keypoints", fontsize=14)
129
  plt.tight_layout()
130
+ plt.savefig("02_sift_keypoints.png", dpi=150, bbox_inches='tight')
131
  plt.close()
132
 
133
 
134
 
135
+ # STEP 3: FEATURE MATCHING (OpenCV BFMatcher permitted)
136
 
137
 
138
  def match_features(des1, des2, ratio_thresh=0.75):
139
+ """
140
+ Match descriptors between two images using Brute-Force with
141
+ Lowe's ratio test.
142
+
143
+ For each descriptor in image 1, we find its two nearest neighbors
144
+ in image 2. If the distance to the closest match is significantly
145
+ smaller than to the second closest (ratio < threshold), we accept
146
+ the match. This filters out ambiguous correspondences.
147
+
148
+ Args:
149
+ des1, des2: descriptor matrices (N1x128, N2x128)
150
+ ratio_thresh: Lowe's ratio test threshold
151
+
152
+ Returns:
153
+ good_matches: list of cv2.DMatch objects passing the ratio test
154
+ """
155
  bf = cv2.BFMatcher(cv2.NORM_L2)
156
  raw_matches = bf.knnMatch(des1, des2, k=2)
157
+
158
  good_matches = []
159
  for m, n in raw_matches:
160
  if m.distance < ratio_thresh * n.distance:
161
  good_matches.append(m)
162
+
163
  print(f" Ratio test: {len(raw_matches)} raw -> {len(good_matches)} good matches")
164
  return good_matches
165
 
166
 
167
  def get_matched_points(kp1, kp2, matches):
168
+ """
169
+ Extract the (x, y) coordinates of matched keypoints into
170
+ two corresponding Nx2 arrays.
171
+ """
172
  pts1 = np.float64([kp1[m.queryIdx].pt for m in matches])
173
  pts2 = np.float64([kp2[m.trainIdx].pt for m in matches])
174
  return pts1, pts2
175
 
176
 
177
+ def visualize_matches(img1, kp1, img2, kp2, matches, title="Feature Matches", fname="matches.png"):
178
+ """Draw matches between two images side by side."""
179
  vis = cv2.drawMatches(
180
  img1, kp1, img2, kp2, matches[:80], None,
181
  flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
 
184
  plt.imshow(cv2.cvtColor(vis, cv2.COLOR_BGR2RGB))
185
  plt.title(title)
186
  plt.axis('off')
187
+ plt.savefig(fname, dpi=150, bbox_inches='tight')
188
  plt.close()
189
 
190
 
191
 
192
+ # STEP 4: HOMOGRAPHY ESTIMATION - DIRECT LINEAR TRANSFORM (DLT) - MANUAL IMPLEMENTATION
193
 
194
 
195
  def normalize_points(pts):
196
+ """
197
+ Hartley normalization: translate points so centroid is at origin,
198
+ then scale so average distance from origin is sqrt(2).
199
+
200
+ This conditioning dramatically improves the numerical stability
201
+ of the DLT. Without it, the system matrix can be very ill-conditioned
202
+ because pixel coordinates are O(1000) while homogeneous coordinates
203
+ mix 1s with 1000s.
204
+
205
+ Args:
206
+ pts: Nx2 array of 2D points
207
+
208
+ Returns:
209
+ pts_norm: Nx2 normalized points
210
+ T: 3x3 normalization matrix such that pts_norm = T @ pts_homogeneous
211
+ """
212
  centroid = np.mean(pts, axis=0)
213
  shifted = pts - centroid
214
  avg_dist = np.mean(np.sqrt(np.sum(shifted ** 2, axis=1)))
215
  if avg_dist < 1e-10:
216
  avg_dist = 1e-10
217
  scale = np.sqrt(2.0) / avg_dist
218
+
219
  T = np.array([
220
  [scale, 0, -scale * centroid[0]],
221
  [0, scale, -scale * centroid[1]],
222
  [0, 0, 1 ]
223
  ])
224
+
225
+ pts_h = np.column_stack([pts, np.ones(len(pts))]) # Nx3
226
+ pts_norm_h = (T @ pts_h.T).T # Nx3
227
  pts_norm = pts_norm_h[:, :2] / pts_norm_h[:, 2:3]
228
+
229
  return pts_norm, T
230
 
231
 
232
  def compute_homography_dlt(src_pts, dst_pts):
233
+ """
234
+ Compute the 3x3 homography H that maps src_pts -> dst_pts
235
+ using the Direct Linear Transform (DLT) algorithm.
236
+
237
+ Given a correspondence (x, y) <-> (x', y'), the projective
238
+ relation is:
239
+ [x'] [x]
240
+ [y'] ~ H * [y]
241
+ [1 ] [1]
242
+
243
+ Expanding and eliminating the scale factor, each correspondence
244
+ yields two linear equations in the 9 entries of H. Stacking N>=4
245
+ correspondences produces the system A h = 0, where h = vec(H).
246
+ The solution is the right singular vector of A corresponding to
247
+ the smallest singular value (i.e., the null space of A).
248
+
249
+ We apply Hartley normalization to both point sets before solving,
250
+ then de-normalize the result.
251
+
252
+ Args:
253
+ src_pts: Nx2 source points
254
+ dst_pts: Nx2 destination points (N >= 4)
255
+
256
+ Returns:
257
+ H: 3x3 homography matrix
258
+ """
259
  assert len(src_pts) >= 4, "Need at least 4 correspondences for homography"
260
+
261
+ # --- Hartley normalization ---
262
  src_norm, T_src = normalize_points(src_pts)
263
  dst_norm, T_dst = normalize_points(dst_pts)
264
+
265
  N = len(src_norm)
266
  A = np.zeros((2 * N, 9))
267
+
268
  for i in range(N):
269
  x, y = src_norm[i]
270
  xp, yp = dst_norm[i]
271
+
272
+ # Row 2i: -x -y -1 0 0 0 xp*x xp*y xp
273
+ A[2 * i] = [
274
+ -x, -y, -1,
275
+ 0, 0, 0,
276
+ xp * x, xp * y, xp
277
+ ]
278
+ # Row 2i+1: 0 0 0 -x -y -1 yp*x yp*y yp
279
+ A[2 * i + 1] = [
280
+ 0, 0, 0,
281
+ -x, -y, -1,
282
+ yp * x, yp * y, yp
283
+ ]
284
+
285
+ # Solve via SVD: the solution h is the last row of Vt (smallest singular value)
286
  _, S, Vt = np.linalg.svd(A)
287
  h = Vt[-1]
288
  H_norm = h.reshape(3, 3)
289
+
290
+ # --- De-normalize ---
291
+ # If T_dst * dst ~ H_norm * T_src * src, then dst ~ T_dst^{-1} H_norm T_src * src
292
  H = np.linalg.inv(T_dst) @ H_norm @ T_src
293
+
294
+ # Normalize so that H[2,2] = 1 (conventional, avoids scale ambiguity)
295
  if abs(H[2, 2]) > 1e-10:
296
  H /= H[2, 2]
297
+
298
  return H
299
 
300
 
301
 
302
+ # STEP 5: RANSAC β€” ROBUST ESTIMATION OF HOMOGRAPHY
303
 
304
 
305
  def compute_homography_ransac(src_pts, dst_pts,
306
  n_iterations=10000,
307
  reprojection_thresh=3.0,
308
  min_inliers_ratio=0.1):
309
+ """
310
+ RANSAC wrapper around the DLT homography estimator.
311
+
312
+ RANSAC (Random Sample Consensus) iteratively:
313
+ 1. Samples a minimal subset (4 correspondences for homography).
314
+ 2. Fits a model (homography via DLT) to that subset.
315
+ 3. Counts how many of the remaining correspondences agree
316
+ with the model (inliers within reprojection_thresh pixels).
317
+ 4. Keeps the model with the largest inlier set.
318
+ Finally, re-estimates H from ALL inliers of the best model.
319
+
320
+ This makes the estimation robust to outliers produced by
321
+ incorrect feature matches (which are common in practice).
322
+
323
+ Args:
324
+ src_pts, dst_pts: Nx2 matched point arrays
325
+ n_iterations: number of RANSAC trials
326
+ reprojection_thresh: max pixel error to count as inlier
327
+ min_inliers_ratio: minimum fraction of inliers to accept
328
+
329
+ Returns:
330
+ best_H: 3x3 homography (refined on all inliers)
331
+ inlier_mask: boolean array, True for inlier correspondences
332
+ """
333
  N = len(src_pts)
334
  if N < 4:
335
  raise ValueError(f"Need >=4 matches for homography, got {N}")
336
+
337
  best_inlier_count = 0
338
  best_inlier_mask = None
339
  best_H = None
340
+
341
+ # Pre-compute homogeneous source points for reprojection
342
+ src_h = np.column_stack([src_pts, np.ones(N)]) # Nx3
343
 
344
  for iteration in range(n_iterations):
345
+ # 1) Random minimal sample of 4 points
346
  indices = np.random.choice(N, 4, replace=False)
347
  s = src_pts[indices]
348
  d = dst_pts[indices]
349
+
350
+ # Skip degenerate configurations (collinear points)
351
+ # A quick check: if 3 of 4 source points are nearly collinear, skip
352
  try:
353
  H_candidate = compute_homography_dlt(s, d)
354
  except np.linalg.LinAlgError:
355
  continue
356
+
357
  if H_candidate is None or np.any(np.isnan(H_candidate)):
358
  continue
359
+
360
+ # 2) Reproject ALL source points using H_candidate
361
+ projected_h = (H_candidate @ src_h.T).T # Nx3
362
+ # Avoid division by zero
363
  w = projected_h[:, 2:3]
364
  w[np.abs(w) < 1e-10] = 1e-10
365
  projected = projected_h[:, :2] / w
366
+
367
+ # 3) Compute reprojection error
368
  errors = np.sqrt(np.sum((projected - dst_pts) ** 2, axis=1))
369
  inlier_mask = errors < reprojection_thresh
370
  inlier_count = np.sum(inlier_mask)
371
+
372
+ # 4) Update best model
373
  if inlier_count > best_inlier_count:
374
  best_inlier_count = inlier_count
375
  best_inlier_mask = inlier_mask.copy()
376
  best_H = H_candidate.copy()
377
+
378
+ # Early termination: if we have a really good model, stop
379
  if inlier_count > 0.9 * N:
380
  break
381
 
382
+ # Validate that we found enough inliers
383
  if best_inlier_count < min_inliers_ratio * N:
384
  raise RuntimeError(
385
  f"RANSAC failed: only {best_inlier_count}/{N} inliers "
386
  f"({100*best_inlier_count/N:.1f}%), threshold was {min_inliers_ratio*100:.0f}%"
387
  )
388
+
389
+ # 5) Refine H using ALL inliers
390
  best_H = compute_homography_dlt(
391
  src_pts[best_inlier_mask],
392
  dst_pts[best_inlier_mask]
393
  )
394
+
395
  print(f" RANSAC: {best_inlier_count}/{N} inliers "
396
  f"({100*best_inlier_count/N:.1f}%) over {min(iteration+1, n_iterations)} iterations")
 
397
 
398
+ return best_H, best_inlier_mask
399
 
 
400
 
401
+ # =====================================================================================
402
+ # STEP 6: IMAGE WARPING - inverse mapping with bilinear interp - MANUAL IMPLEMENTATION
403
+ # =====================================================================================
404
 
405
  def apply_homography_to_point(H, pt):
406
+ """Apply homography H to a single 2D point, returning the mapped 2D point."""
407
  p = np.array([pt[0], pt[1], 1.0])
408
  q = H @ p
409
  if abs(q[2]) < 1e-10:
 
412
 
413
 
414
  def compute_warped_bounds(H, h, w):
415
+ """
416
+ Determine the bounding box of an image of size (h, w) after
417
+ warping by homography H by mapping its four corners.
418
+
419
+ Returns (x_min, y_min, x_max, y_max) in the destination coordinate frame.
420
+ """
421
+ corners = np.array([
422
+ [0, 0],
423
+ [w - 1, 0],
424
+ [w - 1, h - 1],
425
+ [0, h - 1]
426
+ ], dtype=np.float64)
427
+
428
  warped_corners = []
429
  for c in corners:
430
  wx, wy = apply_homography_to_point(H, c)
431
  warped_corners.append([wx, wy])
432
  warped_corners = np.array(warped_corners)
433
+
434
  x_min = np.floor(warped_corners[:, 0].min()).astype(int)
435
  x_max = np.ceil(warped_corners[:, 0].max()).astype(int)
436
  y_min = np.floor(warped_corners[:, 1].min()).astype(int)
437
  y_max = np.ceil(warped_corners[:, 1].max()).astype(int)
438
+
439
  return x_min, y_min, x_max, y_max
440
 
441
 
442
+ def bilinear_interpolate(img, x, y):
443
+ """
444
+ Bilinear interpolation at sub-pixel location (x, y) on a BGR image.
445
+
446
+ Instead of simply rounding to the nearest pixel (which causes
447
+ aliasing), we compute a weighted average of the four surrounding
448
+ pixels. The weights are proportional to the area of the opposite
449
+ rectangle in the unit cell.
450
+
451
+ Args:
452
+ img: HxWx3 image (uint8 or float)
453
+ x, y: float coordinates (x = column, y = row)
454
+
455
+ Returns:
456
+ Interpolated pixel value (3-element array), or zeros if out of bounds.
457
+ """
458
+ h, w = img.shape[:2]
459
+ channels = img.shape[2] if img.ndim == 3 else 1
460
+
461
+ x0 = int(np.floor(x))
462
+ y0 = int(np.floor(y))
463
+ x1 = x0 + 1
464
+ y1 = y0 + 1
465
+
466
+ # Boundary check
467
+ if x0 < 0 or y0 < 0 or x1 >= w or y1 >= h:
468
+ return np.zeros(channels, dtype=np.float64)
469
+
470
+ # Fractional parts
471
+ dx = x - x0
472
+ dy = y - y0
473
+
474
+ # Four neighbor values
475
+ val = (
476
+ (1 - dx) * (1 - dy) * img[y0, x0].astype(np.float64) +
477
+ dx * (1 - dy) * img[y0, x1].astype(np.float64) +
478
+ (1 - dx) * dy * img[y1, x0].astype(np.float64) +
479
+ dx * dy * img[y1, x1].astype(np.float64)
480
+ )
481
+ return val
482
+
483
+
484
  def warp_image(img, H, output_size, offset):
485
+ """
486
+ Memory-Safe Warping: Processes the image in chunks (scanlines)
487
+ to avoid crashing RAM on large panoramas.
488
+ """
489
  out_h, out_w = output_size
490
  H_inv = np.linalg.inv(H)
491
  off_x, off_y = offset
492
+
493
+ # 1. Allocate the final destination arrays
494
+ # If this line specifically fails, you simply don't have enough RAM
495
+ # to hold the result, and we'd need to use 'np.memmap' (disk-based array).
496
  try:
497
  warped = np.zeros((out_h, out_w, 3), dtype=np.float64)
498
  mask = np.zeros((out_h, out_w), dtype=np.float64)
499
  except MemoryError:
500
  print("\n[!] Critical: System RAM cannot hold the final image.")
501
+ print(" Switching to disk-based memory mapping (slower, but works).")
502
  warped = np.memmap('temp_warped.dat', dtype='float64', mode='w+', shape=(out_h, out_w, 3))
503
  mask = np.memmap('temp_mask.dat', dtype='float64', mode='w+', shape=(out_h, out_w))
504
 
505
  h_src, w_src = img.shape[:2]
506
+
507
+ # 2. Process in Chunks (e.g., 500 rows at a time)
508
+ # This ensures intermediate math arrays never explode RAM.
509
+ CHUNK_SIZE = 500
510
+
511
  print(f" -> Processing {out_h} rows in chunks of {CHUNK_SIZE}...")
512
 
513
  for y_start in range(0, out_h, CHUNK_SIZE):
514
  y_end = min(y_start + CHUNK_SIZE, out_h)
515
  chunk_h = y_end - y_start
516
+
517
+ # --- Generate Grid for THIS CHUNK ONLY ---
518
+ # Create grid of output pixel coordinates for the current strip
519
  ys, xs = np.mgrid[y_start:y_end, 0:out_w]
520
  dest_x = xs + off_x
521
  dest_y = ys + off_y
522
+
523
+ # Flatten for matrix multiplication
524
  ones = np.ones(chunk_h * out_w, dtype=np.float64)
525
  flat_x = dest_x.reshape(-1)
526
  flat_y = dest_y.reshape(-1)
527
+
528
+ # Stack: (N, 3) -> [x, y, 1]
529
+ dest_coords = np.vstack([flat_x, flat_y, ones]) # 3 x N
530
+
531
+ # --- Apply Inverse Homography ---
532
+ # src = H_inv @ dest
533
  src_coords = H_inv @ dest_coords
534
+
535
+ # Perspective divide
536
  w_vals = src_coords[2, :]
537
  w_vals[np.abs(w_vals) < 1e-10] = 1e-10
538
+ src_x = src_coords[0, :] / w_vals
539
+ src_y = src_coords[1, :] / w_vals
540
+
541
+ # Reshape back to chunk shape
542
+ src_x = src_x.reshape(chunk_h, out_w)
543
+ src_y = src_y.reshape(chunk_h, out_w)
544
+
545
+ # --- Vectorized Bilinear Interpolation (Chunk Scope) ---
546
+
547
+ # 1. Determine valid pixels
548
+ valid = (src_x >= 0) & (src_x < w_src - 1) & \
549
+ (src_y >= 0) & (src_y < h_src - 1)
550
+
551
+ # If no valid pixels in this chunk, skip math
552
  if not np.any(valid):
553
  continue
554
+
555
+ # 2. Get integer and fractional parts
556
+ # We only compute for 'valid' pixels to save more speed/memory,
557
+ # but masking arrays is cleaner in numpy.
558
  x0 = np.floor(src_x).astype(int)
559
  y0 = np.floor(src_y).astype(int)
560
  x1 = x0 + 1
561
  y1 = y0 + 1
562
+
563
  dx = src_x - x0
564
  dy = src_y - y0
565
+
566
+ # Clamp to bounds to prevent index errors on edges
567
  x0 = np.clip(x0, 0, w_src - 2)
568
  x1 = np.clip(x1, 0, w_src - 1)
569
  y0 = np.clip(y0, 0, h_src - 2)
570
  y1 = np.clip(y1, 0, h_src - 1)
571
+
572
+ # 3. Fetch neighbors
573
  Ia = img[y0, x0].astype(np.float64)
574
  Ib = img[y0, x1].astype(np.float64)
575
  Ic = img[y1, x0].astype(np.float64)
576
  Id = img[y1, x1].astype(np.float64)
577
+
578
+ # 4. Compute weights (re-expand dimensions for broadcasting)
579
  wa = ((1 - dx) * (1 - dy))[:, :, None]
580
  wb = (dx * (1 - dy))[:, :, None]
581
  wc = ((1 - dx) * dy)[:, :, None]
582
  wd = (dx * dy)[:, :, None]
583
+
584
+ # 5. Interpolate
585
+ # This line was crashing before. Now it's 1/100th the size.
586
  chunk_warped = wa * Ia + wb * Ib + wc * Ic + wd * Id
587
+
588
+ # 6. Assign to final array
589
+ # We apply the validity mask here
590
+ # Expand valid to 3 channels for image, keep 1 channel for mask
591
  valid_3ch = valid[:, :, None]
592
+
593
+ # Copy strictly the valid region into the main array
594
+ # (Using 'np.where' is safer than boolean indexing for assignment)
595
  current_slice_img = warped[y_start:y_end, :]
596
  current_slice_mask = mask[y_start:y_end, :]
597
+
598
  np.copyto(current_slice_img, chunk_warped, where=valid_3ch)
599
  np.copyto(current_slice_mask, 1.0, where=valid)
600
 
 
602
 
603
 
604
 
605
+ # STEP 7: BLENDING β€” MULTI-BAND LAPLACIAN PYRAMID BLENDING & FEATHERING
606
 
607
 
608
  def gaussian_blur_manual(img, kernel_size=5, sigma=1.0):
609
+ """
610
+ Apply Gaussian blur. We build the kernel manually, but use
611
+ cv2.filter2D for the actual convolution (which is a basic
612
+ matrix operation, not a high-level stitching API).
613
+ """
614
  ax = np.linspace(-(kernel_size // 2), kernel_size // 2, kernel_size)
615
  xx, yy = np.meshgrid(ax, ax)
616
  kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
 
619
 
620
 
621
  def build_gaussian_pyramid(img, levels):
622
+ """Construct a Gaussian pyramid by repeated blur + downsample."""
623
  pyramid = [img.astype(np.float64)]
624
  current = img.astype(np.float64)
625
  for _ in range(levels - 1):
626
  blurred = gaussian_blur_manual(current, kernel_size=5, sigma=1.0)
627
+ # Downsample by factor of 2
628
  downsampled = blurred[::2, ::2]
629
  pyramid.append(downsampled)
630
  current = downsampled
 
632
 
633
 
634
  def upsample(img, target_shape):
635
+ """Upsample an image to target_shape using simple pixel duplication + blur."""
636
  h, w = target_shape[:2]
637
+ # Use numpy repeat for upsampling (basic array operation)
638
  up_h = np.repeat(img, 2, axis=0)[:h]
639
  up_w = np.repeat(up_h, 2, axis=1)[:, :w]
640
+ # Smooth to reduce blockiness
641
  result = gaussian_blur_manual(up_w, kernel_size=5, sigma=1.0)
642
  return result
643
 
644
 
645
  def build_laplacian_pyramid(img, levels):
646
+ """
647
+ Construct a Laplacian pyramid. Each level is the difference
648
+ between consecutive Gaussian pyramid levels (after upsampling
649
+ the coarser level). The final level is the coarsest Gaussian.
650
+
651
+ The Laplacian pyramid decomposes the image into frequency bands:
652
+ fine details at the top, coarse structure at the bottom. This
653
+ decomposition is the key to multi-band blendingβ€”by blending
654
+ each frequency band independently with different-width masks,
655
+ we avoid visible seams while preserving local contrast.
656
+ """
657
  gauss_pyr = build_gaussian_pyramid(img, levels)
658
  lap_pyr = []
659
  for i in range(levels - 1):
660
  upsampled = upsample(gauss_pyr[i + 1], gauss_pyr[i].shape)
661
  lap = gauss_pyr[i] - upsampled
662
  lap_pyr.append(lap)
663
+ lap_pyr.append(gauss_pyr[-1]) # coarsest level
664
  return lap_pyr
665
 
666
 
667
  def multi_band_blend(img1, img2, mask1, mask2, levels=6):
668
+ """
669
+ Multi-band blending using Laplacian pyramids.
670
+
671
+ The idea (Burt & Adelson, 1983): decompose both images into
672
+ Laplacian pyramids, build a Gaussian pyramid from the blending
673
+ mask, and combine corresponding levels. Reconstruct from the
674
+ blended Laplacian pyramid. High-frequency details blend with
675
+ a sharp transition (preserving sharpness), while low-frequency
676
+ content blends with a wide, smooth transition (avoiding seams).
677
+
678
+ Args:
679
+ img1, img2: float64 images on the same canvas
680
+ mask1, mask2: float64 masks (0/1) indicating valid pixels
681
+ levels: number of pyramid levels
682
+
683
+ Returns:
684
+ blended: the multi-band blended result
685
+ """
686
+ # Make sure images and masks have the same size
687
  h, w = img1.shape[:2]
688
+
689
+ # Ensure 3-channel for consistency
690
  if img1.ndim == 2:
691
  img1 = img1[:, :, None]
692
  if img2.ndim == 2:
693
  img2 = img2[:, :, None]
694
+
695
+ # Create a smooth weight mask for blending
696
+ # Where both images overlap, create a gradient transition
697
  overlap = (mask1 > 0.5) & (mask2 > 0.5)
698
+ only1 = (mask1 > 0.5) & (mask2 < 0.5)
699
+ only2 = (mask2 > 0.5) & (mask1 < 0.5)
700
+
701
+ # Build the blending weight: use distance transform for smooth transition
702
+ # For the overlap region, weight by relative distance to each image's boundary
703
  weight1 = np.zeros((h, w), dtype=np.float64)
704
  weight2 = np.zeros((h, w), dtype=np.float64)
705
+
706
  if np.any(mask1 > 0.5):
707
+ # Distance from non-mask region
708
  dist1 = cv2.distanceTransform((mask1 > 0.5).astype(np.uint8), cv2.DIST_L2, 5)
709
  weight1 = dist1.astype(np.float64)
710
  if np.any(mask2 > 0.5):
711
  dist2 = cv2.distanceTransform((mask2 > 0.5).astype(np.uint8), cv2.DIST_L2, 5)
712
  weight2 = dist2.astype(np.float64)
713
+
714
+ # Normalize to get a smooth alpha between 0 and 1
715
  total = weight1 + weight2
716
  total[total < 1e-10] = 1e-10
717
+ alpha = weight1 / total # alpha=1 => use img1, alpha=0 => use img2
718
+
719
+ # Expand alpha to 3 channels
720
  alpha_3 = np.stack([alpha] * 3, axis=-1)
721
+
722
+ # Build Laplacian pyramids for both images
723
  lap1 = build_laplacian_pyramid(img1, levels)
724
  lap2 = build_laplacian_pyramid(img2, levels)
725
+
726
+ # Build Gaussian pyramid for the blending mask
727
  mask_pyr = build_gaussian_pyramid(alpha_3, levels)
728
+
729
+ # Blend each level
730
  blended_pyr = []
731
  for l in range(levels):
732
  blended_level = mask_pyr[l] * lap1[l] + (1.0 - mask_pyr[l]) * lap2[l]
733
  blended_pyr.append(blended_level)
734
+
735
+ # Reconstruct from blended Laplacian pyramid
736
  result = blended_pyr[-1]
737
  for l in range(levels - 2, -1, -1):
738
  upsampled = upsample(result, blended_pyr[l].shape)
739
  result = upsampled + blended_pyr[l]
740
+
741
  return result
742
 
743
 
744
  def feathered_blend(img1, img2, mask1, mask2):
745
+ """
746
+ Simpler feathered (distance-weighted) blending as a baseline
747
+ for the "naive vs final" comparison.
748
+
749
+ Each pixel in the overlap zone gets a weighted average,
750
+ with weights proportional to the distance from the boundary
751
+ of each image's valid region.
752
+ """
753
  h, w = img1.shape[:2]
754
  weight1 = np.zeros((h, w), dtype=np.float64)
755
  weight2 = np.zeros((h, w), dtype=np.float64)
756
+
757
  if np.any(mask1 > 0.5):
758
+ weight1 = cv2.distanceTransform(
759
+ (mask1 > 0.5).astype(np.uint8), cv2.DIST_L2, 5
760
+ ).astype(np.float64)
761
  if np.any(mask2 > 0.5):
762
+ weight2 = cv2.distanceTransform(
763
+ (mask2 > 0.5).astype(np.uint8), cv2.DIST_L2, 5
764
+ ).astype(np.float64)
765
+
766
  total = weight1 + weight2
767
  total[total < 1e-10] = 1e-10
768
+
769
  alpha1 = (weight1 / total)[:, :, None]
770
  alpha2 = (weight2 / total)[:, :, None]
771
+
772
  blended = alpha1 * img1 + alpha2 * img2
773
  return blended
774
 
775
 
776
 
777
+ # STEP 8: INTENSITY / EXPOSURE COMPENSATION
778
 
779
 
780
  def compensate_exposure(img_target, img_source, mask_target, mask_source):
781
+ """
782
+ Simple gain-based exposure compensation.
783
+
784
+ In the overlap region between two images, compute the per-channel
785
+ mean intensity ratio and apply it as a multiplicative gain to the
786
+ source image. This corrects for differences in auto-exposure
787
+ between the original photographs.
788
+
789
+ Args:
790
+ img_target: reference image (float64)
791
+ img_source: image to adjust
792
+ mask_target: valid-pixel mask for target
793
+ mask_source: valid-pixel mask for source
794
+
795
+ Returns:
796
+ compensated: exposure-corrected source image
797
+ """
798
  overlap = (mask_target > 0.5) & (mask_source > 0.5)
799
+
800
  if np.sum(overlap) < 100:
801
+ # Not enough overlap for reliable estimation
802
  return img_source.copy()
803
+
804
  gains = []
805
  for c in range(3):
806
  mean_target = np.mean(img_target[:, :, c][overlap])
807
  mean_source = np.mean(img_source[:, :, c][overlap])
808
  if mean_source > 1.0:
809
  gain = mean_target / mean_source
810
+ # Clamp to avoid extreme corrections
811
  gain = np.clip(gain, 0.5, 2.0)
812
  else:
813
  gain = 1.0
814
  gains.append(gain)
815
+
816
  compensated = img_source.copy()
817
  for c in range(3):
818
  compensated[:, :, c] = compensated[:, :, c] * gains[c]
819
+
820
  print(f" Exposure gains: R={gains[2]:.3f}, G={gains[1]:.3f}, B={gains[0]:.3f}")
821
  return compensated
822
 
823
 
824
+ # STEP 9: NAIVE STITCH
825
 
826
 
827
  def naive_stitch(warped_images, masks):
828
+ """
829
+ A naive stitch simply overlays images in order, with no blending.
830
+ Later images overwrite earlier ones. This produces visible seams
831
+ at exposure boundaries and ghosting at misalignments.
832
+ """
833
  canvas = np.zeros_like(warped_images[0])
834
  for img, mask in zip(warped_images, masks):
835
  region = mask > 0.5
 
842
 
843
 
844
 
845
+ # STEP 10: FINAL CLEANUP β€” CROP & RECTANGLE
846
 
847
 
848
  def crop_to_content(panorama, combined_mask):
849
+ """
850
+ Manual 'Smart Crop' using NumPy Matrix Operations
851
+
852
+ 1. Uses NumPy to find the bounding box of valid pixels .
853
+ 2. Uses a 'Greedy Shrink' algorithm to find the largest clean rectangle.
854
+ 3. Safety Check: Ensures we don't crop away actual image content.
855
+ """
856
+ # 1. Create a binary mask using NumPy boolean operations
857
+ # (Strict manual thresholding)
858
  if combined_mask.dtype == np.float64 or combined_mask.dtype == np.float32:
859
+ # Check for non-black pixels (allow tiny float error > 0.01)
860
  valid_mask = (combined_mask > 0.01)
861
  else:
862
  valid_mask = (combined_mask > 1)
863
+
864
+ # 2. Manual Bounding Box (Using NumPy)
865
+ # Project mask onto axes to find where content starts/ends
866
  rows = np.any(valid_mask, axis=1)
867
  cols = np.any(valid_mask, axis=0)
868
+
869
+ # If image is empty, return original
870
  if not np.any(rows) or not np.any(cols):
871
  return panorama
872
+
873
  y_min, y_max = np.where(rows)[0][[0, -1]]
874
  x_min, x_max = np.where(cols)[0][[0, -1]]
875
+
876
+ # Cut out the black void
877
  safe_crop = panorama[y_min:y_max+1, x_min:x_max+1]
878
  safe_mask = valid_mask[y_min:y_max+1, x_min:x_max+1]
879
+
880
+ # Count actual valid pixels (content)
881
  total_content = np.count_nonzero(safe_mask)
882
+
883
+ # 3. Greedy Shrink Loop
884
  top, bottom = 0, safe_mask.shape[0] - 1
885
  left, right = 0, safe_mask.shape[1] - 1
886
+
887
+ # Safety: We will NOT crop if we lose more than 20% of the actual pixels.(for this step, we can allow some cropping, but not if it cuts into the main image content)
888
+ # This prevents the code from returning a tiny square or crashing on slanted images.
889
+ min_content_threshold = total_content * 0.20 # We want to keep at least 20% of the content to avoid over-cropping.(for more wider range of images, we can adjust this threshold as needed)
890
+
891
+ # Limit iterations to prevent hanging
892
  max_iters = min(safe_mask.shape[0], safe_mask.shape[1])
893
+
894
  for _ in range(max_iters):
895
+ # Current dimensions
896
  h = bottom - top
897
  w = right - left
898
  if h <= 0 or w <= 0: break
899
+
900
+ # Get the current ROI from the mask
901
  roi = safe_mask[top:bottom+1, left:right+1]
902
+
903
+ # STOP if we are cutting into the main image too much
904
  if np.count_nonzero(roi) < min_content_threshold:
905
  print(" [Crop] Stopping to preserve image content.")
906
  break
907
+
908
+ # Check the 4 edges using NumPy slicing
909
  row_top = roi[0, :]
910
  row_btm = roi[-1, :]
911
  col_lft = roi[:, 0]
912
  col_rgt = roi[:, -1]
913
+
914
+ # Count invalid (False) pixels on edges
915
+ bad_t = np.sum(~row_top) # '~' is logical NOT
916
  bad_b = np.sum(~row_btm)
917
  bad_l = np.sum(~col_lft)
918
  bad_r = np.sum(~col_rgt)
919
+
920
+ # If edges are clean, we are done!
921
  if (bad_t + bad_b + bad_l + bad_r) == 0:
922
  print(f" [Crop] Found Clean Rect: {w}x{h} px")
923
  return safe_crop[top:bottom+1, left:right+1]
924
+
925
+ # Greedy choice: Shrink the edge with the most black pixels
926
  max_bad = max(bad_t, bad_b, bad_l, bad_r)
927
+
928
  if max_bad == bad_t: top += 1
929
  elif max_bad == bad_b: bottom -= 1
930
  elif max_bad == bad_l: left += 1
931
  elif max_bad == bad_r: right -= 1
932
+
933
+ # Return the best we found
934
  return safe_crop[top:bottom+1, left:right+1]
935
 
936
+ # MAIN
937
+
938
 
939
  def memory_safe_linear_blend(img1, img2, mask1, mask2):
940
+ """
941
+ Disk-Based Linear Blending: Uses hard drive (memmap) to store weights.
942
+ This guarantees it will run, even if RAM is completely full.
943
+ """
944
+ print(" -> RAM Full. Switching to Disk-Based Blending (Slow but Robust)...")
945
  h, w = img1.shape[:2]
946
+
947
+ # 1. Calculate Weights on Disk
948
+ # We calculate w1, save to disk, delete from RAM. Then w2.
949
+ # This ensures we never hold two huge float arrays in RAM at once.
950
+
951
  try:
952
+ # --- Weight 1 ---
953
  print(" Calculating weight map 1...")
954
  if np.any(mask1 > 0):
955
+ # mask1 is uint8 255, convert to binary 1
956
  m1_bin = (mask1 > 127).astype(np.uint8)
957
  w_ram = cv2.distanceTransform(m1_bin, cv2.DIST_L2, 5)
958
  else:
959
  w_ram = np.zeros((h, w), dtype=np.float32)
960
+
961
+ # Write to disk
962
  fp1 = np.memmap('temp_w1.dat', dtype='float32', mode='w+', shape=(h, w))
963
  fp1[:] = w_ram[:]
964
+ del w_ram, m1_bin # Free RAM
965
+ import gc; gc.collect()
966
 
967
+ # --- Weight 2 ---
968
  print(" Calculating weight map 2...")
969
  if np.any(mask2 > 0):
970
  m2_bin = (mask2 > 127).astype(np.uint8)
971
  w_ram = cv2.distanceTransform(m2_bin, cv2.DIST_L2, 5)
972
  else:
973
  w_ram = np.zeros((h, w), dtype=np.float32)
974
+
975
+ # Write to disk
976
  fp2 = np.memmap('temp_w2.dat', dtype='float32', mode='w+', shape=(h, w))
977
  fp2[:] = w_ram[:]
978
  del w_ram, m2_bin
979
  gc.collect()
980
+
981
+ # --- Chunked Blending ---
982
  print(" Blending chunks...")
983
+ # Output array (try RAM, fallback to disk)
984
  try:
985
  result = np.zeros(img1.shape, dtype=np.uint8)
986
  except MemoryError:
 
989
  CHUNK = 2000
990
  for y in range(0, h, CHUNK):
991
  ye = min(y + CHUNK, h)
992
+
993
+ # Read weights from disk (small slice only)
994
  w1_c = fp1[y:ye]
995
  w2_c = fp2[y:ye]
996
+
997
  total = w1_c + w2_c
998
  total[total < 1e-5] = 1e-5
999
+ alpha = (w1_c / total)[:, :, None] # Expand to 3 channels
1000
+
1001
+ # Read images
1002
  chunk1 = img1[y:ye].astype(np.float32)
1003
  chunk2 = img2[y:ye].astype(np.float32)
1004
+
1005
+ # Blend
1006
  blended = chunk1 * alpha + chunk2 * (1.0 - alpha)
1007
  result[y:ye] = np.clip(blended, 0, 255).astype(np.uint8)
1008
 
1009
+ # Clean up temp files
1010
  del fp1, fp2
1011
  try:
1012
  os.remove('temp_w1.dat')
1013
  os.remove('temp_w2.dat')
1014
  except: pass
1015
+
1016
  return result
1017
+
1018
  except Exception as e:
1019
  print(f" [!] Error in disk blending: {e}")
1020
+ return img1 # Last resort return
 
1021
 
1022
  def smart_blend_manager(img1, img2, mask1, mask2, levels=6):
1023
+ """
1024
+ Smart Blending Manager:
1025
+ 1. Checks if the image is too big for RAM.
1026
+ 2. If too big, goes DIRECTLY to Disk-Based Linear Blending (Safe).
1027
+ 3. If small, tries High-Quality Multi-Band Blending.
1028
+ """
1029
+ # 1. Estimate Memory Requirement
1030
+ # A float64 image takes 8 bytes per pixel per channel.
1031
+ # Pyramids create roughly 1.33x copies.
1032
+ # We need 2 images + overhead.
1033
  h, w = img1.shape[:2]
1034
  pixels = h * w
1035
+
1036
+ # Rough math: (Pixels * 3 channels * 8 bytes) * 2 images * 2 (pyramid overhead)
1037
+ # This is a conservative estimate.
1038
+ est_mem_bytes = (pixels * 3 * 8) * 4
1039
  est_mem_gb = est_mem_bytes / (1024**3)
1040
+
1041
  print(f" [Memory Check] Image size: {w}x{h}")
1042
  print(f" [Memory Check] Est. Multi-Band RAM needed: ~{est_mem_gb:.2f} GB")
1043
+
1044
+ # THRESHOLD: If it needs more than 2.0 GB, skip Multi-Band entirely.
1045
+ # This prevents the C++ OpenCV crash.
1046
+ SAFE_LIMIT_GB = 2.0
1047
+
1048
+ if est_mem_gb > SAFE_LIMIT_GB:
1049
  print(f" [!] Image exceeds safe limit ({SAFE_LIMIT_GB} GB). Forcing Disk-Based Blending.")
1050
  return memory_safe_linear_blend(img1, img2, mask1, mask2)
1051
+
1052
+ # 2. Try High-Quality Blend (only for small images)
1053
  try:
1054
+ return multi_band_blend(img1.astype(np.float64), img2.astype(np.float64),
1055
  mask1, mask2, levels)
1056
+ except (MemoryError, np.core._exceptions._ArrayMemoryError, cv2.error):
1057
+ # Catch both Python MemoryErrors and OpenCV C++ Errors
1058
  print("\n [!] RAM Full or OpenCV Error! Switching to Disk-Based Blending...")
1059
+ import gc
1060
  gc.collect()
1061
  return memory_safe_linear_blend(img1, img2, mask1, mask2)
1062
 
1063
 
1064
 
 
 
 
1065
  def stitch_panorama(image_paths, output_dir="output"):
1066
+ """
1067
+ Full panoramic stitching pipeline.
1068
+
1069
+ Strategy: Use the CENTER image as the reference frame (identity
1070
+ homography). Compute homographies that map the left image -> center
1071
+ and right image -> center. For more than 3 images, chain
1072
+ homographies outward from center.
1073
+
1074
+ This center-projection approach minimizes the total amount of
1075
+ perspective distortion in the final panorama, since the center
1076
+ image undergoes no warping at all.
1077
+ """
1078
  os.makedirs(output_dir, exist_ok=True)
1079
 
1080
  print("=" * 70)
1081
+ print("PANORAMIC IMAGE STITCHING ")
1082
  print("=" * 70)
1083
 
1084
+ # ------------------------------------------------------------------
1085
  # 1. Load images
1086
+ # ------------------------------------------------------------------
1087
  print("\n[1/8] Loading images...")
1088
  images_bgr, images_gray = load_images(image_paths)
1089
  n_images = len(images_bgr)
1090
+ display_images(images_bgr)
1091
+
1092
+ # The center image is our reference
1093
  center_idx = n_images // 2
1094
  print(f" Reference (center) image: index {center_idx}")
1095
 
1096
+ # ------------------------------------------------------------------
1097
  # 2. Extract SIFT features
1098
+ # ------------------------------------------------------------------
1099
  print("\n[2/8] Extracting SIFT features...")
1100
  keypoints, descriptors = extract_sift_features(images_gray)
1101
+ visualize_keypoints(images_bgr, keypoints)
1102
 
1103
+ # ------------------------------------------------------------------
1104
+ # 3-5. For each adjacent pair, match features & compute homography
1105
+ # ------------------------------------------------------------------
1106
  print("\n[3/8] Matching features & computing homographies...")
1107
+
1108
+ # We'll store homographies mapping each image -> center reference frame
1109
+ # H[center_idx] = Identity
1110
  homographies = [None] * n_images
1111
  homographies[center_idx] = np.eye(3)
1112
 
1113
+ # Process pairs outward from center
1114
+ # Left side: center-1, center-2, ...
1115
  for i in range(center_idx - 1, -1, -1):
1116
  print(f"\n Pair: Image {i} <-> Image {i+1}")
1117
+
1118
+ # Match features
1119
  matches = match_features(descriptors[i], descriptors[i + 1])
1120
  pts_i, pts_ip1 = get_matched_points(keypoints[i], keypoints[i + 1], matches)
1121
+
1122
+ # Visualize matches
1123
+ visualize_matches(
1124
+ images_bgr[i], keypoints[i],
1125
+ images_bgr[i + 1], keypoints[i + 1],
1126
+ matches,
1127
+ title=f"Matches: Image {i} <-> Image {i+1}",
1128
+ fname=f"03_matches_{i}_{i+1}.png"
1129
+ )
1130
+
1131
+ # Compute homography: Image i -> Image i+1
1132
  H_i_to_ip1, inlier_mask = compute_homography_ransac(pts_i, pts_ip1)
1133
+
1134
+ # Chain to get Image i -> center
1135
+ # H[i] = H[i+1] @ H_i_to_ip1
1136
  homographies[i] = homographies[i + 1] @ H_i_to_ip1
1137
  print(f" Homography (Image {i} -> center) computed successfully")
1138
 
1139
+ # Right side: center+1, center+2, ...
1140
  for i in range(center_idx + 1, n_images):
1141
  print(f"\n Pair: Image {i} <-> Image {i-1}")
1142
+
1143
+ # Match features
1144
  matches = match_features(descriptors[i], descriptors[i - 1])
1145
  pts_i, pts_im1 = get_matched_points(keypoints[i], keypoints[i - 1], matches)
1146
+
1147
+ # Visualize matches
1148
+ visualize_matches(
1149
+ images_bgr[i], keypoints[i],
1150
+ images_bgr[i - 1], keypoints[i - 1],
1151
+ matches,
1152
+ title=f"Matches: Image {i} <-> Image {i-1}",
1153
+ fname=f"03_matches_{i}_{i-1}.png"
1154
+ )
1155
+
1156
+ # Compute homography: Image i -> Image i-1
1157
  H_i_to_im1, inlier_mask = compute_homography_ransac(pts_i, pts_im1)
1158
+
1159
+ # Chain: H[i] = H[i-1] @ H_i_to_im1
1160
  homographies[i] = homographies[i - 1] @ H_i_to_im1
1161
  print(f" Homography (Image {i} -> center) computed successfully")
1162
 
1163
+ # ------------------------------------------------------------------
1164
+ # 6. Compute canvas size and warp all images
1165
+ # ------------------------------------------------------------------
1166
  print("\n[4/8] Computing output canvas bounds...")
1167
+
1168
  all_x_min, all_y_min = 0, 0
1169
  all_x_max, all_y_max = 0, 0
1170
+
1171
  for i in range(n_images):
1172
  h, w = images_bgr[i].shape[:2]
1173
  xmin, ymin, xmax, ymax = compute_warped_bounds(homographies[i], h, w)
 
1175
  all_y_min = min(all_y_min, ymin)
1176
  all_x_max = max(all_x_max, xmax)
1177
  all_y_max = max(all_y_max, ymax)
1178
+
1179
  out_w = all_x_max - all_x_min + 1
1180
  out_h = all_y_max - all_y_min + 1
1181
+ offset = (all_x_min, all_y_min) # translation offset
1182
  print(f" Canvas size: {out_w} x {out_h} px, offset: {offset}")
1183
 
1184
  print("\n[5/8] Warping images into common coordinate frame...")
 
1186
  masks = []
1187
  for i in range(n_images):
1188
  print(f" Warping image {i}...")
1189
+ warped, mask = warp_image(
1190
+ images_bgr[i], homographies[i],
1191
+ output_size=(out_h, out_w),
1192
+ offset=offset
1193
+ )
1194
+
1195
+ # --- CRITICAL FIX: Save 85% RAM immediately ---
1196
+ # Convert float64 (8 bytes/pixel) -> uint8 (1 byte/pixel)
1197
+ # This reduces a 8GB image to 1GB.
1198
  warped_u8 = np.clip(warped, 0, 255).astype(np.uint8)
1199
  warped_images.append(warped_u8)
1200
+
1201
+ # Save mask as uint8 (0 or 255) to save space too
1202
  mask_u8 = (mask > 0.5).astype(np.uint8) * 255
1203
  masks.append(mask_u8)
1204
+
1205
+ # Force delete the heavy arrays
1206
  del warped, mask
1207
+ import gc; gc.collect()
1208
 
1209
+ # ------------------------------------------------------------------
1210
  # 7. Exposure compensation
1211
+ # ------------------------------------------------------------------
1212
+
1213
+
1214
  print("\n[6/8] Compensating exposure differences...")
1215
  for i in range(n_images):
1216
  if i == center_idx:
1217
  continue
1218
+
1219
+ # Convert to float temporarily for math
1220
  target_f = warped_images[center_idx].astype(np.float64)
1221
  source_f = warped_images[i].astype(np.float64)
1222
+
1223
+ # Masks are now uint8 0-255, normalize to 0-1 for the function
1224
  mask_t = masks[center_idx] / 255.0
1225
  mask_s = masks[i] / 255.0
1226
+
1227
+ compensated = compensate_exposure(
1228
+ target_f, source_f,
1229
+ mask_t, mask_s
1230
+ )
1231
+
1232
+ # Convert back to uint8 immediately to save RAM
1233
  warped_images[i] = np.clip(compensated, 0, 255).astype(np.uint8)
1234
+
1235
+ # Clean up
1236
  del target_f, source_f, compensated
1237
  gc.collect()
1238
 
1239
+ # ------------------------------------------------------------------
1240
+ # 8a. Naive stitch (for report comparison)
1241
+ # ------------------------------------------------------------------
1242
+ print("\n[7/8] Creating naive stitch (direct overlay, no blending)...")
1243
  naive = naive_stitch(warped_images, masks)
1244
  naive_clipped = np.clip(naive, 0, 255).astype(np.uint8)
1245
  cv2.imwrite(os.path.join(output_dir, "04_naive_stitch.png"), naive_clipped)
1246
 
1247
+ # ------------------------------------------------------------------
1248
+ # 8b. Multi-band blending (final panorama)
1249
+ # ------------------------------------------------------------------
1250
+ print("\n[8/8] Multi-band Laplacian blending for seamless result...")
1251
+
1252
+ # Iteratively blend: start with center, blend outward
1253
  blended = warped_images[center_idx].copy()
1254
  blended_mask = masks[center_idx].copy()
1255
+
1256
+ # Order of blending: alternate left and right from center
1257
  blend_order = []
1258
  for d in range(1, n_images):
1259
  if center_idx - d >= 0:
 
1263
 
1264
  for idx in blend_order:
1265
  print(f" Blending image {idx} into panorama...")
1266
+
1267
+ # Ensure we are passing valid types
1268
  if blended.dtype != np.uint8:
1269
  blended = np.clip(blended, 0, 255).astype(np.uint8)
1270
+
1271
  curr_img = np.clip(warped_images[idx], 0, 255).astype(np.uint8)
1272
+
1273
+ # Use the MANAGER instead of calling multi_band_blend directly
1274
+ blended = smart_blend_manager(
1275
+ blended, curr_img,
1276
+ blended_mask, masks[idx],
1277
+ levels=6
1278
+ )
1279
+ # Update combined mask
1280
  blended_mask = np.maximum(blended_mask, masks[idx])
1281
 
1282
  blended_clipped = np.clip(blended, 0, 255).astype(np.uint8)
1283
 
1284
+ # ------------------------------------------------------------------
1285
+ # 9. Final cleanup β€” crop to clean rectangle
1286
+ # ------------------------------------------------------------------
1287
  print("\n Cropping to clean rectangle...")
1288
  final_panorama = crop_to_content(blended_clipped, blended_mask)
1289
  naive_cropped = crop_to_content(naive_clipped, blended_mask)
1290
 
1291
+ # Save results
1292
  cv2.imwrite(os.path.join(output_dir, "05_blended_panorama.png"), final_panorama)
1293
  cv2.imwrite(os.path.join(output_dir, "04_naive_stitch_cropped.png"), naive_cropped)
1294
 
1295
+ # ------------------------------------------------------------------
1296
+ # 10. Comparison figure for the report
1297
+ # ------------------------------------------------------------------
1298
  print("\n Generating comparison figure...")
1299
  fig, axes = plt.subplots(2, 1, figsize=(18, 10))
1300
  axes[0].imshow(cv2.cvtColor(naive_cropped, cv2.COLOR_BGR2RGB))
 
1308
  plt.close()
1309
 
1310
  print("\n" + "=" * 70)
1311
+ print("Done All are saved to:", output_dir)
1312
  print("=" * 70)
1313
+ print(" 01_input_images.png β€” original images side by side")
1314
+ print(" 02_sift_keypoints.png β€” detected SIFT keypoints")
1315
+ print(" 03_matches_*.png β€” feature matches per pair")
1316
+ print(" 04_naive_stitch.png β€” naive direct-overlay stitch")
1317
+ print(" 05_blended_panorama.png β€” final multi-band blended panorama")
1318
+ print(" 06_comparison.png β€” side-by-side naive vs final")
1319
 
1320
  return final_panorama
1321
 
1322
 
1323
+
1324
+
1325
  if __name__ == "__main__":
1326
+ """
1327
+ USAGE:
1328
+ python panorama_stitcher.py img_left.jpg img_center.jpg img_right.jpg
1329
+
1330
+ The images should be supplied left-to-right in the order they
1331
+ were captured. The middle image will be used as the reference.
1332
+ You may supply 3 or more images.
1333
+ """
1334
+
1335
  if len(sys.argv) < 4:
1336
+ print("Usage: python panorama_stitcher.py <left.jpg> <center.jpg> <right.jpg> [more...]")
1337
+ print("Supply at least 3 overlapping images, ordered left to right.")
1338
  sys.exit(1)
1339
+
1340
  image_paths = sys.argv[1:]
1341
+ print(f"Stitching {len(image_paths)} images...")
1342
+ stitch_panorama(image_paths, output_dir="output")