|
9 | 9 | from PIL import Image
|
10 | 10 | from torchvision.transforms import Compose
|
11 | 11 |
|
| 12 | +from build.lib.invokeai.backend.model_management.models.base import ModelNotFoundException |
12 | 13 | from invokeai.app.services.config.config_default import InvokeAIAppConfig
|
13 | 14 | from invokeai.backend.image_util.depth_anything.model.dpt import DPT_DINOv2
|
14 | 15 | from invokeai.backend.image_util.depth_anything.utilities.util import NormalizeImage, PrepareForNet, Resize
|
|
17 | 18 |
|
18 | 19 | config = InvokeAIAppConfig.get_config()
|
19 | 20 |
|
| 21 | +DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"] |
| 22 | + |
20 | 23 | DEPTH_ANYTHING_MODELS = {
|
21 | 24 | "large": {
|
22 | 25 | "url": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitl14.pth?download=true",
|
|
53 | 56 | class DepthAnythingDetector:
|
54 | 57 | def __init__(self) -> None:
|
55 | 58 | self.model = None
|
56 |
| - self.model_size: Union[Literal["large", "base", "small"], None] = None |
| 59 | + self.model_size: Union[DEPTH_ANYTHING_MODEL_SIZES, None] = None |
57 | 60 |
|
58 |
| - def load_model(self, model_size=Literal["large", "base", "small"]): |
| 61 | + def load_model(self, model_size: DEPTH_ANYTHING_MODEL_SIZES = "small"): |
59 | 62 | DEPTH_ANYTHING_MODEL_PATH = pathlib.Path(config.models_path / DEPTH_ANYTHING_MODELS[model_size]["local"])
|
60 | 63 | if not DEPTH_ANYTHING_MODEL_PATH.exists():
|
61 | 64 | download_with_progress_bar(DEPTH_ANYTHING_MODELS[model_size]["url"], DEPTH_ANYTHING_MODEL_PATH)
|
@@ -84,16 +87,19 @@ def to(self, device):
|
84 | 87 | self.model.to(device)
|
85 | 88 | return self
|
86 | 89 |
|
87 |
| - def __call__(self, image, resolution=512): |
88 |
| - image = np.array(image, dtype=np.uint8) |
89 |
| - image = image[:, :, ::-1] / 255.0 |
| 90 | + def __call__(self, image: Image.Image, resolution: int = 512): |
| 91 | + if self.model is None: |
| 92 | + raise ModelNotFoundException("Depth Anything Model not loaded") |
| 93 | + |
| 94 | + np_image = np.array(image, dtype=np.uint8) |
| 95 | + np_image = np_image[:, :, ::-1] / 255.0 |
90 | 96 |
|
91 |
| - image_height, image_width = image.shape[:2] |
92 |
| - image = transform({"image": image})["image"] |
93 |
| - image = torch.from_numpy(image).unsqueeze(0).to(choose_torch_device()) |
| 97 | + image_height, image_width = np_image.shape[:2] |
| 98 | + np_image = transform({"image": image})["image"] |
| 99 | + tensor_image = torch.from_numpy(np_image).unsqueeze(0).to(choose_torch_device()) |
94 | 100 |
|
95 | 101 | with torch.no_grad():
|
96 |
| - depth = self.model(image) |
| 102 | + depth = self.model(tensor_image) |
97 | 103 | depth = F.interpolate(depth[None], (image_height, image_width), mode="bilinear", align_corners=False)[0, 0]
|
98 | 104 | depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
99 | 105 |
|
|
0 commit comments