Giktator
Giktator
New Contributor

Thursday

Exception from photon based warehouse:

data/20250128_152024_32868_wpkza_39d90943-d80c-476f-9d6f-bd9e15bed6ee. shaded.databricks.org.apache.hadoop.fs.s3a.AWSClientIOException
	at com.databricks.sql.io.FileReadException$.fileReadError(FileReadException.scala:48)
	at com.databricks.sql.io.FileReadException.fileReadError(FileReadException.scala)
	at 0xbc9f687 <photon>.RecordMissingOrCorruptFile(external/workspace_spark_3_5/photon/jni-wrappers/jni-io-broker.cc:128)
	at 0x6e0f037 <photon>.TryAndMaybeSkipFileOnError(external/workspace_spark_3_5/photon/exec-nodes/file-scan-node.cc:278)
	at 0x6e0d41f <photon>.WaitForIO(external/workspace_spark_3_5/photon/exec-nodes/file-scan-node.cc:873)
	at 0x6e0d41f <photon>.DoHasNextImpl(external/workspace_spark_3_5/photon/exec-nodes/file-scan-node.cc:692)
	at 0x6e0cb03 <photon>.HasNextImpl(external/workspace_spark_3_5/photon/exec-nodes/file-scan-node.cc:463)
	at 0x6d7c5af <photon>.OpenImpl(external/workspace_spark_3_5/photon/exec-nodes/grouping-agg-node.cc:126)
	at 0x6d7c3af <photon>.OpenImpl(external/workspace_spark_3_5/photon/exec-nodes/shuffle-sink-node.cc:171)
	at com.databricks.photon.JniApiImpl.open(Native Method)
	at com.databricks.photon.JniApi.open(JniApi.scala)
	at com.databricks.photon.JniExecNode.open(JniExecNode.java:73)
	at com.databricks.photon.PhotonPreShuffleResultHandler.$anonfun$getResult$1(PhotonExec.scala:1024)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at com.databricks.photon.PhotonResultHandler.timeit(PhotonResultHandler.scala:30)
	at com.databricks.photon.PhotonResultHandler.timeit$(PhotonResultHandler.scala:28)
	at com.databricks.photon.PhotonPreShuffleResultHandler.timeit(PhotonExec.scala:1017)
	at com.databricks.photon.PhotonPreShuffleResultHandler.getResult(PhotonExec.scala:1024)
	at com.databricks.photon.PhotonBasicEvaluatorFactory$PhotonBasicEvaluator$$anon$1.open(PhotonBasicEvaluatorFactory.scala:252)
	at com.databricks.photon.PhotonBasicEvaluatorFactory$PhotonBasicEvaluator$$anon$1.hasNextImpl(PhotonBasicEvaluatorFactory.scala:257)
	at com.databricks.photon.PhotonBasicEvaluatorFactory$PhotonBasicEvaluator$$anon$1.$anonfun$hasNext$1(PhotonBasicEvaluatorFactory.scala:275)
	at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
	at org.apache.spark.TaskContext.runFuncAsBillable(TaskContext.scala:268)
	at com.databricks.photon.PhotonBasicEvaluatorFactory$PhotonBasicEvaluator$$anon$1.hasNext(PhotonBasicEvaluatorFactory.scala:275)
	at com.databricks.photon.CloseableIterator$$anon$10.hasNext(CloseableIterator.scala:211)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at com.databricks.photon.MetadataOnlyShuffleWriter.write(MetadataOnlyShuffleWriter.scala:50)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:56)
	at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:87)
	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
	at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:82)
	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:58)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:39)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:227)
	at org.apache.spark.scheduler.Task.doRunTask(Task.scala:204)
	at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:166)
	at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51)
	at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)
	at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:109)
	at scala.util.Using$.resource(Using.scala:269)
	at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:108)
	at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:160)
	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
	at org.apache.spark.scheduler.Task.run(Task.scala:105)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$11(Executor.scala:1228)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:80)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:77)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:111)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1232)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:1088)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:840)
Caused by: shaded.databricks.org.apache.hadoop.fs.s3a.AWSClientIOException: read on s3://path: com.amazonaws.SdkClientException: Data read has a different length than the expected: dataLength=0; expectedLength=829; includeSkipped=true; in.getClass()=class com.amazonaws.services.s3.AmazonS3Client$2; markedSupported=false; marked=0; resetSinceLastMarked=false; markCount=0; resetCount=0: Data read has a different length than the expected: dataLength=0; expectedLength=829; includeSkipped=true; in.getClass()=class com.amazonaws.services.s3.AmazonS3Client$2; markedSupported=false; marked=0; resetSinceLastMarked=false; markCount=0; resetCount=0
	at shaded.databricks.org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:251)
	at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:136)
	at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$retry$4(Invoker.java:339)
	at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:435)
	at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:335)
	at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:310)
	at shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:587)
	at java.base/java.io.DataInputStream.read(DataInputStream.java:151)
	at com.databricks.common.filesystem.LokiS3AInputStream.$anonfun$read$3(LokiS3FS.scala:254)
	at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
	at com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:244)
	at com.databricks.common.filesystem.LokiS3AInputStream.read(LokiS3FS.scala:254)
	at java.base/java.io.DataInputStream.read(DataInputStream.java:151)
	at com.databricks.sql.io.HDFSStorage$ReadFileImpl.lambda$fetchRange$0(HDFSStorage.java:108)
	at com.databricks.sql.io.Futures.run(Futures.java:74)
	at com.databricks.sql.io.PendingFutures.submit(PendingFutures.java:64)
	at com.databricks.sql.io.Storage$ReadFile.submit(Storage.java:209)
	at com.databricks.sql.io.HDFSStorage$ReadFileImpl.fetchRange(HDFSStorage.java:102)
	at com.databricks.sql.io.NativeStorage$ReadFileImpl.fetchRange(NativeStorage.java:437)
	at com.databricks.sql.io.BlockCachingStorage$ReadFileImpl.lambda$fetchRange$0(BlockCachingStorage.java:146)
	at com.databricks.sql.io.PendingFutures.lambda$submit$0(PendingFutures.java:72)
	at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
	at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:161)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at com.databricks.sql.transaction.tahoe.mst.MSTThreadHelper$.runWithMstTxnId(MSTThreadHelper.scala:55)
	at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$5(SparkThreadLocalForwardingThreadPoolExecutor.scala:116)
	at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
	at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:115)
	at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51)
	at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:114)
	at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
	at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:158)
	at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:161)
	... 3 more
Caused by: com.amazonaws.SdkClientException: Data read has a different length than the expected: dataLength=0; expectedLength=829; includeSkipped=true; in.getClass()=class com.amazonaws.services.s3.AmazonS3Client$2; markedSupported=false; marked=0; resetSinceLastMarked=false; markCount=0; resetCount=0
	at com.amazonaws.util.LengthCheckInputStream.checkLength(LengthCheckInputStream.java:151)
	at com.amazonaws.util.LengthCheckInputStream.read(LengthCheckInputStream.java:109)
	at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
	at com.amazonaws.services.s3.internal.S3AbortableInputStream.read(S3AbortableInputStream.java:125)
	at com.amazonaws.internal.SdkFilterInputStream.read(SdkFilterInputStream.java:90)
	at