From 2ee39f64fe4b0f5aadc21d901a48ea322a58a087 Mon Sep 17 00:00:00 2001
From: Stephen Hu <stephenhu@seismic.com>
Date: Mon, 9 Feb 2026 11:59:24 +0800
Subject: [PATCH] Refactor: improve ppt shape order logic (#13054)

### What problem does this PR solve?

improve ppt shape order logic

### Type of change

- [x] Refactoring
---
 deepdoc/parser/ppt_parser.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/deepdoc/parser/ppt_parser.py b/deepdoc/parser/ppt_parser.py
index 1b04b4d7c3..afff23d7de 100644
--- a/deepdoc/parser/ppt_parser.py
+++ b/deepdoc/parser/ppt_parser.py
@@ -22,6 +22,16 @@ from pptx import Presentation
 class RAGFlowPptParser:
     def __init__(self):
         super().__init__()
+        self._shape_cache = {}
+
+    def __sort_shapes(self, shapes):
+        cache_key = id(shapes)
+        if cache_key not in self._shape_cache:
+            self._shape_cache[cache_key] = sorted(
+                shapes, 
+                key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left if x.left is not None else 0)
+            )
+        return self._shape_cache[cache_key]
 
     def __get_bulleted_text(self, paragraph):
         is_bulleted = bool(paragraph._p.xpath("./a:pPr/a:buChar")) or bool(paragraph._p.xpath("./a:pPr/a:buAutoNum")) or bool(paragraph._p.xpath("./a:pPr/a:buBlip"))
@@ -62,7 +72,7 @@ class RAGFlowPptParser:
             # Handle group shape
             if shape_type == 6:
                 texts = []
-                for p in sorted(shape.shapes, key=lambda x: (x.top // 10, x.left)):
+                for p in self.__sort_shapes(shape.shapes):
                     t = self.__extract(p)
                     if t:
                         texts.append(t)
@@ -86,8 +96,7 @@ class RAGFlowPptParser:
             if i >= to_page:
                 break
             texts = []
-            for shape in sorted(
-                    slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left if x.left is not None else 0)):
+            for shape in self.__sort_shapes(slide.shapes):
                 txt = self.__extract(shape)
                 if txt:
                     texts.append(txt)