[10/15/25 22:39:26] ERROR    Exception in ASGI application                      
                                                                                
                             ╭─────── Traceback (most recent call last) ───────╮
                             │ /usr/src/immich_ml/main.py:177 in predict       │
                             │                                                 │
                             │   174 │   │   inputs = text                     │
                             │   175 │   else:                                 │
                             │   176 │   │   raise HTTPException(400, "Either  │
                             │ ❱ 177 │   response = await run_inference(inputs │
                             │   178 │   return ORJSONResponse(response)       │
                             │   179                                           │
                             │   180                                           │
                             │                                                 │
                             │ /usr/src/immich_ml/main.py:200 in run_inference │
                             │                                                 │
                             │   197 │   │   response[entry["task"]] = output  │
                             │   198 │                                         │
                             │   199 │   without_deps, with_deps = entries     │
                             │ ❱ 200 │   await asyncio.gather(*[_run_inference │
                             │   201 │   if with_deps:                         │
                             │   202 │   │   await asyncio.gather(*[_run_infer │
                             │   203 │   if isinstance(payload, Image):        │
                             │                                                 │
                             │ /usr/src/immich_ml/main.py:195 in               │
                             │ _run_inference                                  │
                             │                                                 │
                             │   192 │   │   │   │   message = f"Task {entry[' │
                             │       output of {dep}"                          │
                             │   193 │   │   │   │   raise HTTPException(400,  │
                             │   194 │   │   model = await load(model)         │
                             │ ❱ 195 │   │   output = await run(model.predict, │
                             │   196 │   │   outputs[model.identity] = output  │
                             │   197 │   │   response[entry["task"]] = output  │
                             │   198                                           │
                             │                                                 │
                             │ /usr/src/immich_ml/main.py:213 in run           │
                             │                                                 │
                             │   210 │   if thread_pool is None:               │
                             │   211 │   │   return func(*args, **kwargs)      │
                             │   212 │   partial_func = partial(func, *args, * │
                             │ ❱ 213 │   return await asyncio.get_running_loop │
                             │   214                                           │
                             │   215                                           │
                             │   216 async def load(model: InferenceModel) ->  │
                             │                                                 │
                             │ /usr/local/lib/python3.11/concurrent/futures/th │
                             │ read.py:58 in run                               │
                             │                                                 │
                             │ /usr/src/immich_ml/models/base.py:61 in predict │
                             │                                                 │
                             │    58 │   │   self.load()                       │
                             │    59 │   │   if model_kwargs:                  │
                             │    60 │   │   │   self.configure(**model_kwargs │
                             │ ❱  61 │   │   return self._predict(*inputs, **m │
                             │    62 │                                         │
                             │    63 │   @abstractmethod                       │
                             │    64 │   def _predict(self, *inputs: Any, **mo │
                             │                                                 │
                             │ /usr/src/immich_ml/models/facial_recognition/de │
                             │ tection.py:30 in _predict                       │
                             │                                                 │
                             │   27 │   def _predict(self, inputs: NDArray[np. │
                             │      FaceDetectionOutput:                       │
                             │   28 │   │   inputs = decode_cv2(inputs)        │
                             │   29 │   │                                      │
                             │ ❱ 30 │   │   bboxes, landmarks = self._detect(i │
                             │   31 │   │   return {                           │
                             │   32 │   │   │   "boxes": bboxes[:, :4].round() │
                             │   33 │   │   │   "scores": bboxes[:, 4],        │
                             │                                                 │
                             │ /usr/src/immich_ml/models/facial_recognition/de │
                             │ tection.py:38 in _detect                        │
                             │                                                 │
                             │   35 │   │   }                                  │
                             │   36 │                                          │
                             │   37 │   def _detect(self, inputs: NDArray[np.u │
                             │      NDArray[np.float32]]:                      │
                             │ ❱ 38 │   │   return self.model.detect(inputs)   │
                             │   39 │                                          │
                             │   40 │   def configure(self, **kwargs: Any) ->  │
                             │   41 │   │   self.model.det_thresh = kwargs.pop │
                             │                                                 │
                             │ /opt/venv/lib/python3.11/site-packages/insightf │
                             │ ace/model_zoo/retinaface.py:224 in detect       │
                             │                                                 │
                             │   221 │   │   det_img = np.zeros( (input_size[1 │
                             │   222 │   │   det_img[:new_height, :new_width,  │
                             │   223 │   │                                     │
                             │ ❱ 224 │   │   scores_list, bboxes_list, kpss_li │
                             │   225 │   │                                     │
                             │   226 │   │   scores = np.vstack(scores_list)   │
                             │   227 │   │   scores_ravel = scores.ravel()     │
                             │                                                 │
                             │ /opt/venv/lib/python3.11/site-packages/insightf │
                             │ ace/model_zoo/retinaface.py:152 in forward      │
                             │                                                 │
                             │   149 │   │   kpss_list = []                    │
                             │   150 │   │   input_size = tuple(img.shape[0:2] │
                             │   151 │   │   blob = cv2.dnn.blobFromImage(img, │
                             │       (self.input_mean, self.input_mean, self.i │
                             │ ❱ 152 │   │   net_outs = self.session.run(self. │
                             │   153 │   │                                     │
                             │   154 │   │   input_height = blob.shape[2]      │
                             │   155 │   │   input_width = blob.shape[3]       │
                             │                                                 │
                             │ /usr/src/immich_ml/sessions/ort.py:49 in run    │
                             │                                                 │
                             │    46 │   │   input_feed: dict[str, NDArray[np. │
                             │    47 │   │   run_options: Any = None,          │
                             │    48 │   ) -> list[NDArray[np.float32]]:       │
                             │ ❱  49 │   │   outputs: list[NDArray[np.float32] │
                             │       run_options)                              │
                             │    50 │   │   return outputs                    │
                             │    51 │                                         │
                             │    52 │   @property                             │
                             │                                                 │
                             │ /opt/venv/lib/python3.11/site-packages/onnxrunt │
                             │ ime/capi/onnxruntime_inference_collection.py:22 │
                             │ 0 in run                                        │
                             │                                                 │
                             │    217 │   │   if not output_names:             │
                             │    218 │   │   │   output_names = [output.name  │
                             │    219 │   │   try:                             │
                             │ ❱  220 │   │   │   return self._sess.run(output │
                             │    221 │   │   except C.EPFail as err:          │
                             │    222 │   │   │   if self._enable_fallback:    │
                             │    223 │   │   │   │   print(f"EP Error: {err!s │
                             ╰─────────────────────────────────────────────────╯
                             Fail: [ONNXRuntimeError] : 1 : FAIL : Non-zero     
                             status code returned while running FusedConv node. 
                             Name:'Conv_0' Status Message: CUDNN failure 5000:  
                             CUDNN_STATUS_EXECUTION_FAILED ; GPU=0 ;            
                             hostname=2887c47f5239 ;                            
                             file=/onnxruntime_src/onnxruntime/contrib_ops/cuda/
                             fused_conv.cc ; line=67 ;                          
                             expr=cudnnConvolutionForward(cudnnHandle, &alpha,  
                             Base::s_.x_tensor, Base::s_.x_data,                
                             Base::s_.w_desc, Base::s_.w_data,                  
                             Base::s_.conv_desc, Base::s_.algo, workspace.get(),
                             Base::s_.workspace_bytes, &beta, Base::s_.y_tensor,
                             Base::s_.y_data);