import os
import pathlib
= pathlib.Path("./data/barhill-newts-all")
data_path
if not data_path.exists():
f"kaggle datasets download -d mshahoyi/barhill-newts-all --unzip -p {data_path}") os.system(
Newt Segmentation
This notebook is used to segment the newts in the Barhill dataset using the Grounded-SAM-2 model.
try:
import supervision
"gsam2")
os.chdir(except:
'git clone https://github.com/IDEA-Research/Grounded-SAM-2 gsam2')
os.system("gsam2")
os.chdir('pip install -q -e . -e grounding_dino')
os.system('pip install -q supervision') os.system(
/kaggle/working/gcn-reid/nbs/gsam2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pycocotools.mask as mask_util
from PIL import Image
import argparse
import os
import cv2
import json
import torch
import pandas as pd
from gcn_reid.newt_dataset import upload_to_kaggle
import pathlib
import shutil
from datetime import datetime
import subprocess
import tempfile
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import supervision as sv
import pycocotools.mask as mask_util
from pathlib import Path
from supervision.draw.color import ColorPalette
from utils.supervision_utils import CUSTOM_COLOR_MAP
from PIL import Image
import matplotlib.pyplot as plt
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
import pandas as pd
= "IDEA-Research/grounding-dino-base"
GROUNDING_MODEL = "./checkpoints/sam2.1_hiera_large.pt"
SAM2_CHECKPOINT = "configs/sam2.1/sam2.1_hiera_l.yaml"
SAM2_MODEL_CONFIG = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE = Path("outputs/test_sam2.1")
OUTPUT_DIR = True
DUMP_JSON_RESULTS =True, exist_ok=True)
OUTPUT_DIR.mkdir(parents= "newt amphibian reptile."
TEXT_PROMPT
=DEVICE, dtype=torch.bfloat16).__enter__()
torch.autocast(device_type
if torch.cuda.get_device_properties(0).major >= 8:
= True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32
"checkpoints")
os.chdir("bash download_ckpts.sh")
os.system("..")
os.chdir(
= SAM2_CHECKPOINT
sam2_checkpoint = SAM2_MODEL_CONFIG
model_cfg = build_sam2(model_cfg, sam2_checkpoint, device=DEVICE)
sam2_model = SAM2ImagePredictor(sam2_model)
sam2_predictor
= GROUNDING_MODEL
model_id = AutoProcessor.from_pretrained(model_id)
processor = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(DEVICE)
grounding_model
"..") os.chdir(
= pd.read_csv(data_path / "metadata.csv")
df df
Create Boudning Boxes
df[‘bbox’] = None
for i, row in tqdm(df[~df.is_video].iterrows()): # Process first 3 newts for demo image_path = data_path / row.file_path image_key = f”{row.identity}/{row.file_name}” image = cv2.imread(image_path) pil_image = Image.open(image_path) inputs = processor(images=pil_image, text=TEXT_PROMPT, return_tensors=“pt”).to(DEVICE) with torch.no_grad(): outputs = grounding_model(**inputs)
results = processor.post_process_grounded_object_detection(
outputs,
inputs.input_ids,
box_threshold=0.4,
text_threshold=0.3,
target_sizes=[(image.shape[0], image.shape[1])]
)[0] # Get first (and only) result
bbox = results["boxes"].cpu().numpy().tolist()
if len(bbox) > 0:
df.at[i, 'bbox'] = bbox[0]
else:
df.at[i, 'bbox'] = None
df.bbox.describe()
Add manual annotations
with open("manual_annotations.json", "r") as f:
= json.load(f) manual_annotations
for annot in manual_annotations["images"]:
= annot["image"]
image_name = annot["annotations"][0]["boundingBox"]
bbox_dict # Convert from XYWH to XYXY format
= bbox_dict["x"], bbox_dict["y"], bbox_dict["width"], bbox_dict["height"]
x, y, w, h = [x, y, x + w, y + h] # Convert to XYXY format
bbox == image_name].index[0], "bbox"] = bbox df.at[df[df.file_name
df.bbox.describe()
Visualise bounding boxes
visualize_bbox
visualize_bbox (image_path, bbox, label)
= df[df.bbox.notna()].index[0]
i / df.iloc[i].file_path, df.iloc[i].bbox, "newt") visualize_bbox(data_path
Create Segmentation Masks
'segmentation_mask_rle'] = None
df[
for i, row in tqdm(df[~df.is_video].iterrows()):
= data_path / row.file_path
image_path # image = cv2.imread(image_path)
= Image.open(image_path)
pil_image
= np.array(row.bbox).reshape(1, 4)
box = box.astype(int)
sam_box "RGB")))
sam2_predictor.set_image(np.array(pil_image.convert(= sam2_predictor.predict(
masks, _, _ =sam_box,
box=False
multimask_output
)= masks[0] # Get the first (and only) mask
mask = mask.astype(np.uint8, order='F')
mask_uint8 = mask_util.encode(mask_uint8)
rle = rle['counts'].decode('utf-8') if isinstance(rle['counts'], bytes) else str(rle['counts'])
rle_string
'segmentation_mask_rle'] = f"{rle['size'][0]}x{rle['size'][1]}:{rle_string}" df.at[i,
df.segmentation_mask_rle.describe()
Visualise Segmentation Masks
decode_rle_mask
decode_rle_mask (rle_string)
Decode RLE string back to binary mask
= df[df.segmentation_mask_rle.notna()].index[0]
i = decode_rle_mask(df.iloc[i].segmentation_mask_rle)
mask 1, 2, 1)
plt.subplot(
plt.imshow(mask)'off')
plt.axis(1, 2, 2)
plt.subplot(/ df.iloc[i].file_path))
plt.imshow(cv2.imread(data_path 'off')
plt.axis( plt.show()
visualize_segmentation
visualize_segmentation (image_path, mask, bbox, label)
= df[df.segmentation_mask_rle.notna()].index[0]
i = decode_rle_mask(df.iloc[i].segmentation_mask_rle)
mask / df.iloc[i].file_path, mask.reshape(1, mask.shape[0], mask.shape[1]), df.iloc[i].bbox, "newt") visualize_segmentation(data_path
Updated metadata saved to ./data/barhill/gallery_and_probes_with_masks.csv
Added segmentation masks for 998 out of 1253 images
Create sample visualisations
/ "sample_visualisations", exist_ok=True)
os.makedirs(data_path
for i, row in tqdm(df[df.segmentation_mask_rle.notna()].sample(100).iterrows()):
= decode_rle_mask(row.segmentation_mask_rle)
mask = mask.reshape(1, mask.shape[0], mask.shape[1])
mask
=(10, 10))
plt.figure(figsize/ row.file_path, mask, row.bbox, "newt")
visualize_segmentation(data_path / "sample_visualisations" / f"{row.identity}-{row.file_name}")
plt.savefig(data_path plt.close()
Create New Kaggle Dataset
metadata_dest = data_path / “metadata.csv” df.to_csv(metadata_dest, index=False)
Upload to Kaggle
="mshahoyi",
upload_to_kaggle(user_id="GCNs Segmented",
titleid="newts-segmented-new",
=[{"name": "CC0-1.0"}],
licenses=["biology", "computer-vision", "animals", "great crested newts"],
keywords=data_path) dataset_dir