| !pip install -U adapter-transformers |
| !pip install -U transformers |
| import gradio as gr |
| from transformers import CLIPProcessor, CLIPModel |
| from PIL import Image |
| import torch |
|
|
| # Load the model and processor |
| model = CLIPModel.from_pretrained("Taarhoinc/TaarhoGen1") |
| processor = CLIPProcessor.from_pretrained("Taarhoinc/TaarhoGen1") |
|
|
| # Define the function to describe a floor plan |
| def describe_floorplan(floorplan_image: Image.Image, top_k: int = 3): |
| """Describes a floor plan drawing by listing components.""" |
|
|
| # Define a list of common floor plan components |
| components = [ |
| "bedroom", |
| "kitchen", |
| "bathroom", |
| "living room", |
| "dining room", |
| "hallway", |
| "garage", |
| "balcony", |
| "stairs", |
| "door", |
| "window", |
| ] |
|
|
| # Preprocess the image and text prompts |
| inputs = processor( |
| text=components, images=floorplan_image, return_tensors="pt", padding=True |
| ) |
|
|
| # Get the logits (similarity scores) |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| logits_per_image = outputs.logits_per_image |
|
|
| # Get the predicted probabilities |
| probs = logits_per_image.softmax(dim=1).cpu().numpy()[0] |
|
|
| # Get the indices of the top-k components |
| top_k_indices = probs.argsort()[-top_k:][::-1] |
|
|
| # Get the top-k components |
| detected_components = [components[i] for i in top_k_indices] |
|
|
| return ", ".join(detected_components) # Return as a comma-separated string |
|
|
| # Create the Gradio interface |
| gr.Interface( |
| fn=describe_floorplan, |
| inputs=[ |
| gr.Image(label="Upload a floor plan drawing", type="pil"), |
| gr.Slider(1, 10, step=1, value=3, label="Number of components to detect"), |
| ], |
| outputs=gr.Label(label="Detected Components"), |
| title="Floor Plan Description with TaarhoGen1", |
| description="Upload a floor plan drawing to get a list of detected components.", |
| ).launch() |