Giktator Giktator New Contributor Thursday Exception from photon based warehouse: data/20250128_152024_32868_wpkza_39d90943-d80c-476f-9d6f-bd9e15bed6ee. shaded.databricks.org.apache.hadoop.fs.s3a.AWSClientIOException at com.databricks.sql.io.FileReadException$.fileReadError(FileReadException.scala:48) at com.databricks.sql.io.FileReadException.fileReadError(FileReadException.scala) at 0xbc9f687 .RecordMissingOrCorruptFile(external/workspace_spark_3_5/photon/jni-wrappers/jni-io-broker.cc:128) at 0x6e0f037 .TryAndMaybeSkipFileOnError(external/workspace_spark_3_5/photon/exec-nodes/file-scan-node.cc:278) at 0x6e0d41f .WaitForIO(external/workspace_spark_3_5/photon/exec-nodes/file-scan-node.cc:873) at 0x6e0d41f .DoHasNextImpl(external/workspace_spark_3_5/photon/exec-nodes/file-scan-node.cc:692) at 0x6e0cb03 .HasNextImpl(external/workspace_spark_3_5/photon/exec-nodes/file-scan-node.cc:463) at 0x6d7c5af .OpenImpl(external/workspace_spark_3_5/photon/exec-nodes/grouping-agg-node.cc:126) at 0x6d7c3af .OpenImpl(external/workspace_spark_3_5/photon/exec-nodes/shuffle-sink-node.cc:171) at com.databricks.photon.JniApiImpl.open(Native Method) at com.databricks.photon.JniApi.open(JniApi.scala) at com.databricks.photon.JniExecNode.open(JniExecNode.java:73) at com.databricks.photon.PhotonPreShuffleResultHandler.$anonfun$getResult$1(PhotonExec.scala:1024) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at com.databricks.photon.PhotonResultHandler.timeit(PhotonResultHandler.scala:30) at com.databricks.photon.PhotonResultHandler.timeit$(PhotonResultHandler.scala:28) at com.databricks.photon.PhotonPreShuffleResultHandler.timeit(PhotonExec.scala:1017) at com.databricks.photon.PhotonPreShuffleResultHandler.getResult(PhotonExec.scala:1024) at com.databricks.photon.PhotonBasicEvaluatorFactory$PhotonBasicEvaluator$$anon$1.open(PhotonBasicEvaluatorFactory.scala:252) at com.databricks.photon.PhotonBasicEvaluatorFactory$PhotonBasicEvaluator$$anon$1.hasNextImpl(PhotonBasicEvaluatorFactory.scala:257) at com.databricks.photon.PhotonBasicEvaluatorFactory$PhotonBasicEvaluator$$anon$1.$anonfun$hasNext$1(PhotonBasicEvaluatorFactory.scala:275) at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) at org.apache.spark.TaskContext.runFuncAsBillable(TaskContext.scala:268) at com.databricks.photon.PhotonBasicEvaluatorFactory$PhotonBasicEvaluator$$anon$1.hasNext(PhotonBasicEvaluatorFactory.scala:275) at com.databricks.photon.CloseableIterator$$anon$10.hasNext(CloseableIterator.scala:211) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) at com.databricks.photon.MetadataOnlyShuffleWriter.write(MetadataOnlyShuffleWriter.scala:50) at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:56) at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:87) at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:82) at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:58) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:39) at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:227) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:204) at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:166) at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51) at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104) at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:109) at scala.util.Using$.resource(Using.scala:269) at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:108) at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:160) at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) at org.apache.spark.scheduler.Task.run(Task.scala:105) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$11(Executor.scala:1228) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:111) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1232) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:1088) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) at java.base/java.lang.Thread.run(Thread.java:840) Caused by: shaded.databricks.org.apache.hadoop.fs.s3a.AWSClientIOException: read on s3://path: com.amazonaws.SdkClientException: Data read has a different length than the expected: dataLength=0; expectedLength=829; includeSkipped=true; in.getClass()=class com.amazonaws.services.s3.AmazonS3Client$2; markedSupported=false; marked=0; resetSinceLastMarked=false; markCount=0; resetCount=0: Data read has a different length than the expected: dataLength=0; expectedLength=829; includeSkipped=true; in.getClass()=class com.amazonaws.services.s3.AmazonS3Client$2; markedSupported=false; marked=0; resetSinceLastMarked=false; markCount=0; resetCount=0 at shaded.databricks.org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:251) at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:136) at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$retry$4(Invoker.java:339) at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:435) at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:335) at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:310) at shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:587) at java.base/java.io.DataInputStream.read(DataInputStream.java:151) at com.databricks.common.filesystem.LokiS3AInputStream.$anonfun$read$3(LokiS3FS.scala:254) at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23) at com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:244) at com.databricks.common.filesystem.LokiS3AInputStream.read(LokiS3FS.scala:254) at java.base/java.io.DataInputStream.read(DataInputStream.java:151) at com.databricks.sql.io.HDFSStorage$ReadFileImpl.lambda$fetchRange$0(HDFSStorage.java:108) at com.databricks.sql.io.Futures.run(Futures.java:74) at com.databricks.sql.io.PendingFutures.submit(PendingFutures.java:64) at com.databricks.sql.io.Storage$ReadFile.submit(Storage.java:209) at com.databricks.sql.io.HDFSStorage$ReadFileImpl.fetchRange(HDFSStorage.java:102) at com.databricks.sql.io.NativeStorage$ReadFileImpl.fetchRange(NativeStorage.java:437) at com.databricks.sql.io.BlockCachingStorage$ReadFileImpl.lambda$fetchRange$0(BlockCachingStorage.java:146) at com.databricks.sql.io.PendingFutures.lambda$submit$0(PendingFutures.java:72) at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:161) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at com.databricks.sql.transaction.tahoe.mst.MSTThreadHelper$.runWithMstTxnId(MSTThreadHelper.scala:55) at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$5(SparkThreadLocalForwardingThreadPoolExecutor.scala:116) at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48) at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:115) at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51) at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:114) at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:91) at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:158) at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:161) ... 3 more Caused by: com.amazonaws.SdkClientException: Data read has a different length than the expected: dataLength=0; expectedLength=829; includeSkipped=true; in.getClass()=class com.amazonaws.services.s3.AmazonS3Client$2; markedSupported=false; marked=0; resetSinceLastMarked=false; markCount=0; resetCount=0 at com.amazonaws.util.LengthCheckInputStream.checkLength(LengthCheckInputStream.java:151) at com.amazonaws.util.LengthCheckInputStream.read(LengthCheckInputStream.java:109) at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90) at com.amazonaws.services.s3.internal.S3AbortableInputStream.read(S3AbortableInputStream.java:125) at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90) at