#!/usr/bin/env python3 import sys import struct import subprocess import numpy as np import nedc_image_tools import multiprocessing as mp import time # Processing constants TILE_SIZE = 2048 WINDOW_SIZE = 256 STRIDE_SIZE = 128 HISTOGRAM_BINS = 16 "Detect number of available CUDA GPUs" def detect_gpu_count(): try: result = subprocess.run( ['nvidia-smi', '--query-gpu=count', '--format=csv,noheader'], capture_output=True, text=True, timeout=5 ) return len(result.stdout.strip().split('\n')) except: return 1 "Process a single image on specified GPU using compiled C++ worker" def process_image_on_gpu(image_file, gpu_id): try: # Launch worker subprocess for this GPU worker = subprocess.Popen( ['./myaverager', str(gpu_id)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) # Tell worker we're sending 1 image worker.stdin.write(struct.pack('i', 1)) worker.stdin.flush() # Open image and get dimensions reader = nedc_image_tools.Nil() reader.open(image_file) width, height = reader.get_dimension() # Send image dimensions worker.stdin.write(struct.pack('ii', width, height)) worker.stdin.flush() window_count = 0 # Process image in overlapping tiles tile_stride = TILE_SIZE - WINDOW_SIZE + STRIDE_SIZE for tile_y in range(0, height - WINDOW_SIZE + 1, tile_stride): for tile_x in range(0, width - WINDOW_SIZE + 1, tile_stride): # Calculate tile dimensions tile_w = min(TILE_SIZE, width - tile_x) tile_h = min(TILE_SIZE, height - tile_y) # Skip tiles too small for even one window if tile_w < WINDOW_SIZE or tile_h < WINDOW_SIZE: continue # Read tile from SVS file tile = reader.read_data( coordinate=(tile_x, tile_y), npixx=tile_w, npixy=tile_h, color_mode="RGB" ) # Ensure contiguous memory layout for efficient transfer tile = np.ascontiguousarray(tile, dtype=np.uint8) # Send tile dimensions and data worker.stdin.write(struct.pack('ii', tile_w, tile_h)) worker.stdin.write(tile.tobytes()) worker.stdin.flush() # Count windows in this tile num_windows_x = (tile_w - WINDOW_SIZE) // STRIDE_SIZE + 1 num_windows_y = (tile_h - WINDOW_SIZE) // STRIDE_SIZE + 1 window_count += num_windows_x * num_windows_y # Send terminator (0, 0) worker.stdin.write(struct.pack('ii', 0, 0)) worker.stdin.flush() # Close reader and input pipe reader.close() worker.stdin.close() # Read histogram result histogram_bytes = worker.stdout.read(HISTOGRAM_BINS * 8) if len(histogram_bytes) != HISTOGRAM_BINS * 8: raise RuntimeError(f"Expected {HISTOGRAM_BINS * 8} bytes, got {len(histogram_bytes)}") histogram = np.frombuffer(histogram_bytes, dtype=np.uint64) # Wait for worker to complete worker.wait() return histogram, window_count except Exception as e: sys.stderr.write(f"Error processing {image_file}: {e}\n") return np.zeros(HISTOGRAM_BINS, dtype=np.uint64), 0 "Wrapper function for multiprocessing pool" def worker_wrapper(args): image_file, gpu_id = args return process_image_on_gpu(image_file, gpu_id) "Calculate mean and standard deviation from histogram" def calculate_statistics(histogram, num_bins=HISTOGRAM_BINS): total_count = np.sum(histogram) if total_count == 0: return 0.0, 0.0 # Bin parameters (from FFT magnitude range) max_magnitude = 20.0 # 256 * 256 * 256 bin_width = max_magnitude / num_bins bin_centers = np.arange(num_bins) * bin_width + bin_width / 2 # Calculate mean mean = np.sum(histogram * bin_centers) / total_count # Calculate variance and standard deviation variance = np.sum(histogram * (bin_centers - mean) ** 2) / total_count stdev = np.sqrt(variance) return mean, stdev def main(): if len(sys.argv) < 2: sys.stderr.write("Usage: python3 myaverager.py images.list\n") sys.exit(1) files_list = sys.argv[1] # Detect GPUs num_gpus = detect_gpu_count() # Read image file list with open(files_list, 'r') as f: image_files = [line.strip() for line in f if line.strip()] if not image_files: sys.stderr.write("Error: No image files found in list\n") sys.exit(1) print(f"Processing {len(image_files)} images using {num_gpus} GPU(s)\n") start_time = time.time() # Distribute images across GPUs (round-robin) tasks = [(image_file, i % num_gpus) for i, image_file in enumerate(image_files)] # Process images in parallel (one process per GPU) with mp.Pool(processes=num_gpus) as pool: results = pool.map(worker_wrapper, tasks) # Aggregate results from all workers global_histogram = np.zeros(HISTOGRAM_BINS, dtype=np.uint64) total_windows = 0 for histogram, window_count in results: global_histogram += histogram total_windows += window_count elapsed_time = time.time() - start_time # Calculate statistics mean, stdev = calculate_statistics(global_histogram) # Display results print("\n" + "=" * 60) print("RESULTS") print("=" * 60) print(f"Images processed: {len(image_files)}") print(f"Total windows: {total_windows:,}") print(f"Average magnitude: {mean:.6f}") print(f"Standard deviation: {stdev:.6f}") print(f"\nTime spent: {elapsed_time:.2f} seconds") print(f"Throughput: {len(image_files)/elapsed_time:.2f} images/second") print(f" {total_windows/elapsed_time:.1f} windows/second") print(f"Per-image average: {elapsed_time/len(image_files)*1000:.2f} ms") print("=" * 60) if __name__ == "__main__": main()