mindee · ianardee · Jun 24, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 24, 2026
diff --git a/mindee/image/__init__.py b/mindee/image/__init__.py
@@ -1,3 +0,0 @@
-from mindee.image.image_compressor import compress_image
-
-__all__ = ["compress_image"]

diff --git a/mindee/image/extracted_image.py b/mindee/image/extracted_image.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-import io
 from pathlib import Path
-from typing import Any
+from typing import Any, BinaryIO
 
 from mindee.dependencies.checkers import PILLOW_AVAILABLE
 from mindee.dependencies.decorators import requires_pillow
 from mindee.error.mindee_error import MindeeError
-from mindee.input.file_input import FileInput
-from mindee.input.local_input_source import LocalInputSource
+from mindee.input.bytes_input import BytesInput
 from mindee.logger import logger
 
 if PILLOW_AVAILABLE:
@@ -21,78 +19,68 @@
 class ExtractedImage:
     """Generic class for image extraction."""
 
+    buffer: BinaryIO
+    filename: str
     _page_id: int
     """Id of the page the image was extracted from."""
     _element_id: int
     """Id of the element on a given page."""
-    filename: str
-    """Name of the file the image was extracted from."""
 
     def __init__(
-        self, input_source: LocalInputSource, page_id: int, element_id: int
+        self,
+        img_byte_stream: BinaryIO,
+        filename: str,
+        page_id: int,
+        element_id: int,
     ) -> None:
         """
         Initialize the ExtractedImage with a buffer and an internal file name.
 
-        :param input_source: Local source for input.
+        :param img_byte_stream: The raw image bytes.
+        :param filename: Name of the file.
         :param page_id: ID of the page the element was found on.
         :param element_id: ID of the element in a page.
         """
-        self.buffer = io.BytesIO(input_source.file_object.read())
-        self.buffer.name = input_source.filename
-        self.filename = input_source.filename
-        if input_source.is_pdf():
-            extension = "jpg"
-        else:
-            extension = Path(input_source.filename).resolve().suffix
+        self.buffer = img_byte_stream
         self.buffer.seek(0)
-        pg_number = str(page_id).zfill(3)
-        elem_number = str(element_id).zfill(3)
-        self.internal_file_name = (
-            f"{input_source.filename}_page{pg_number}-{elem_number}.{extension}"
-        )
+        self.filename = filename
         self._page_id = page_id
         self._element_id = 0 if element_id is None else element_id
 
     @requires_pillow
-    def save_to_file(self, output_path: Path | str, file_format: str | None = None):
+    def save_to_file(self, output_path: Path | str):
         """
         Saves the document to a file.
 
         :param output_path: Path to save the file to.
-        :param file_format: Optional PIL-compatible format for the file. Inferred from file extension if not provided.
         :raises MindeeError: If an invalid path or filename is provided.
         """
+        out_path = Path(output_path)
+        if not out_path.resolve().is_dir():
+            raise MindeeError("Provided path is not a directory.")
+        out_file_path = out_path / self.filename
         try:
-            resolved_path = Path(output_path).resolve()
-            if not file_format and len(resolved_path.suffix) < 1:
-                raise ValueError("Invalid file format.")
             self.buffer.seek(0)
             image = Image.open(self.buffer)
-            if file_format:
-                image.save(resolved_path, format=file_format)
-            else:
-                image.save(resolved_path)
-            logger.info("File saved successfully to '%s'.", resolved_path)
-        except TypeError as e:
-            raise MindeeError("Invalid path/filename provided.") from e
+            image.save(out_file_path)
+            logger.info("File saved successfully to '%s'.", out_file_path)
         except Exception as e:
             print(e)
             raise MindeeError(f"Could not save file {Path(output_path).name}.") from e
 
-    def as_input_source(self) -> FileInput:
+    def as_input_source(self) -> BytesInput:
         """
         Return the file as a Mindee-compatible BufferInput source.
 
         :returns: A BufferInput source.
         """
         self.buffer.seek(0)
-        return FileInput(self.buffer)
+        return BytesInput(self.buffer.read(), self.filename)
 
     @property
     def page_id(self):
         """
-        ID of the page the receipt was found on.
+        ID of the page the image was found on.
 
         :return: A valid page ID.
         """

diff --git a/mindee/image/extracted_images.py b/mindee/image/extracted_images.py
@@ -0,0 +1,12 @@
+from pathlib import Path
+
+from mindee.image.extracted_image import ExtractedImage
+
+
+class ExtractedImages(list[ExtractedImage]):
+    """List of extracted images."""
+
+    def save_all_to_disk(self, output_path: Path | str) -> None:
+        """Save all extracted images to disk."""
+        for image in self:
+            image.save_to_file(output_path)
diff --git a/mindee/image/image_extractor.py b/mindee/image/image_extractor.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import io
+from pathlib import Path
 from typing import Any, BinaryIO
 
 from mindee.dependencies import requires_pypdfium2
@@ -10,7 +11,6 @@
 from mindee.geometry.point import Point
 from mindee.geometry.polygon import Polygon, get_min_max_x, get_min_max_y
 from mindee.image.extracted_image import ExtractedImage
-from mindee.input.bytes_input import BytesInput
 from mindee.input.local_input_source import LocalInputSource
 
 if PYPDFIUM2_AVAILABLE:
@@ -29,7 +29,7 @@
 
 @requires_pillow
 @requires_pypdfium2
-def attach_image_as_new_file(  # type: ignore
+def _attach_image_as_new_file(  # type: ignore
     input_buffer: BinaryIO,
 ) -> pdfium.PdfDocument:
     """
@@ -66,7 +66,7 @@ def extract_image_from_polygon(
     width: float,
     height: float,
     file_format: str,
-) -> bytes:
+) -> BinaryIO:
     """
     Crops the image from the given polygon.
 
@@ -87,11 +87,11 @@ def extract_image_from_polygon(
             int(min_max_y.max * height),
         )
     )
-    return save_image_to_buffer(cropped_image, file_format)
+    return _save_image_to_buffer(cropped_image, file_format)
 
 
 @requires_pillow
-def save_image_to_buffer(image: Image.Image, file_format: str) -> bytes:
+def _save_image_to_buffer(image: Image.Image, file_format: str) -> BinaryIO:
     """
     Saves an image as a buffer.
 
@@ -102,7 +102,7 @@ def save_image_to_buffer(image: Image.Image, file_format: str) -> bytes:
     buffer = io.BytesIO()
     image.save(buffer, format=file_format)
     buffer.seek(0)
-    return buffer.read()
+    return buffer
 
 
 @requires_pillow
@@ -145,7 +145,8 @@ def extract_multiple_images_from_source(
     :param polygons: List of coordinates to pull the elements from.
     :return: List of byte arrays representing the extracted elements.
     """
-    page = load_pdf_doc(input_source).get_page(page_id)
+    stem = Path(input_source.filename).stem
+    page = _load_pdf_doc(input_source).get_page(page_id)
     page_content = page.render().to_pil()
     width, height = page.get_size()
 
@@ -159,20 +160,17 @@ def extract_multiple_images_from_source(
         )
         extracted_elements.append(
             ExtractedImage(
-                BytesInput(
-                    image_data,
-                    f"{input_source.filename}_page{page_id + 1}-{element_id}.{file_extension}",
-                ),
+                image_data,
+                f"{stem}_page-{(page_id + 1):03d}-item-{(element_id + 1):03d}.{file_extension}",
                 page_id,
                 element_id,
             )
         )
-
     return extracted_elements
 
 
 @requires_pypdfium2
-def load_pdf_doc(input_file: LocalInputSource) -> pdfium.PdfDocument:  # type: ignore
+def _load_pdf_doc(input_file: LocalInputSource) -> pdfium.PdfDocument:  # type: ignore
     """
     Loads a PDF document from a local input source.
 
@@ -183,4 +181,4 @@ def load_pdf_doc(input_file: LocalInputSource) -> pdfium.PdfDocument:  # type: i
         input_file.file_object.seek(0)
         return pdfium.PdfDocument(input_file.file_object.read())
 
-    return attach_image_as_new_file(input_file.file_object)
+    return _attach_image_as_new_file(input_file.file_object)
diff --git a/mindee/input/local_input_source.py b/mindee/input/local_input_source.py
@@ -10,7 +10,7 @@
 from mindee.dependencies.checkers import PYPDFIUM2_AVAILABLE
 from mindee.error.mimetype_error import MimeTypeError
 from mindee.error.mindee_error import MindeeError, MindeeSourceError
-from mindee.image import compress_image
+from mindee.image.image_compressor import compress_image
 from mindee.input.page_options import KEEP_ONLY, REMOVE, PageOptions
 from mindee.logger import logger
 from mindee.pdf.pdf_compressor import compress_pdf

diff --git a/mindee/pdf/extracted_pdf.py b/mindee/pdf/extracted_pdf.py
@@ -18,18 +18,22 @@
 class ExtractedPDF:
     """An extracted sub-Pdf."""
 
-    pdf_bytes: BinaryIO
+    buffer: BinaryIO
     filename: str
+    _page_indexes: tuple[int, int]
 
-    def __init__(self, pdf_bytes: BinaryIO, filename: str):
-        self.pdf_bytes = pdf_bytes
+    def __init__(
+        self, pdf_byte_stream: BinaryIO, filename: str, page_indexes: tuple[int, int]
+    ):
+        self.buffer = pdf_byte_stream
         self.filename = filename
+        self._page_indexes = page_indexes
 
     @requires_pypdfium2
     def get_page_count(self) -> int:
         """Get the number of pages in the PDF file."""
         try:
-            pdf = pdfium.PdfDocument(self.pdf_bytes)
+            pdf = pdfium.PdfDocument(self.buffer)
             return len(pdf)
         except Exception as e:
             raise MindeeError(
@@ -40,21 +44,28 @@ def save_to_file(self, output_path: Path | str):
         """
         Writes the contents of the current PDF object to a file.
 
-        :param output_path: Path of the destination file. If
-         not extension is provided, pdf will be appended by default.
+        :param output_path: Path of the destination file.
+        If no extension is provided, '.pdf' will be appended by default.
         """
         out_path = Path(output_path)
-        if out_path.resolve().is_dir():
-            raise MindeeError("Provided path is not a file.")
-        if not output_path or not out_path.parent.exists():
-            raise MindeeError("Invalid save path provided {}.")
-        if out_path.suffix.lower() != "pdf":
-            out_path = out_path.parent / (out_path.stem + "." + "pdf")
-        self.pdf_bytes.seek(0)
-        with open(out_path, "wb") as out_file:
-            out_file.write(self.pdf_bytes.read())
+        if not out_path.resolve().is_dir():
+            raise MindeeError("Provided path is not a directory.")
+        out_file_path = out_path / self.filename
+
+        try:
+            self.buffer.seek(0)
+            with open(out_file_path, "wb") as out_file:
+                out_file.write(self.buffer.read())
+        except Exception as e:
+            print(e)
+            raise MindeeError(f"Could not save file {out_file_path}.") from e
 
     def as_input_source(self) -> BytesInput:
         """Returns the current PDF object as a usable BytesInput source."""
-        self.pdf_bytes.seek(0)
-        return BytesInput(self.pdf_bytes.read(), self.filename)
+        self.buffer.seek(0)
+        return BytesInput(self.buffer.read(), self.filename)
+
+    @property
+    def page_indexes(self) -> tuple[int, int]:
+        """This PDF was extracted from this page range of the original PDF."""
+        return self._page_indexes
diff --git a/mindee/pdf/extracted_pdfs.py b/mindee/pdf/extracted_pdfs.py
@@ -0,0 +1,13 @@
+from pathlib import Path
+
+from mindee.pdf.extracted_pdf import ExtractedPDF
+
+
+class ExtractedPDFs(list[ExtractedPDF]):
+    """List of extracted PDFs."""
+
+    def save_all_to_disk(self, output_path: Path | str) -> None:
+        """Save all extracted images to disk."""
+
+        for image in self:
+            image.save_to_file(output_path)
diff --git a/mindee/pdf/pdf_extractor.py b/mindee/pdf/pdf_extractor.py
@@ -68,7 +68,7 @@ def extract_sub_documents(
         """
         Extract the sub-documents from the main pdf, based on the given list of page indexes.
 
-        :param page_indexes: List of list of numbers, representing page indexes.
+        :param page_indexes: 2D list of numbers, representing page indexes.
         :return: A list of created PDFS.
         """
         extracted_pdfs: list[ExtractedPDF] = []
@@ -80,10 +80,12 @@ def extract_sub_documents(
             for page_index in page_index_elem:
                 if page_index > self.get_page_count():
                     raise MindeeError(f"Index {page_index} is out of range.")
-            formatted_max_index = f"{page_index_elem[len(page_index_elem) - 1] + 1:03d}"
-            field_filename = f"{stem}_{(page_index_elem[0] + 1):03d}-{formatted_max_index}{extension}"
+            first_page = page_index_elem[0]
+            last_page = page_index_elem[len(page_index_elem) - 1]
             extracted_pdf = ExtractedPDF(
-                self.cut_pages(page_index_elem), field_filename
+                self.cut_pages(page_index_elem),
+                f"{stem}_pages-{(first_page + 1):03d}-{(last_page + 1):03d}{extension}",
+                (first_page, last_page),
             )
             extracted_pdfs.append(extracted_pdf)
         return extracted_pdfs

diff --git a/mindee/v2/file_operations/crop.py b/mindee/v2/file_operations/crop.py
@@ -1,9 +1,9 @@
 from mindee.error import MindeeError
 from mindee.geometry import Point, Polygon
 from mindee.image.extracted_image import ExtractedImage
+from mindee.image.extracted_images import ExtractedImages
 from mindee.image.image_extractor import extract_multiple_images_from_source
 from mindee.input.local_input_source import LocalInputSource
-from mindee.v2.file_operations.crop_files import CropFiles
 from mindee.v2.parsing.inference.field import FieldLocation
 from mindee.v2.product.crop.crop_item import CropItem
 
@@ -25,7 +25,7 @@ def extract_single_crop(
 
 def extract_multiple_crops(
     input_source: LocalInputSource, crops: list[CropItem]
-) -> CropFiles:
+) -> ExtractedImages:
     """
     Extracts individual receipts from multi-receipts documents.
 
@@ -49,4 +49,4 @@ def extract_multiple_crops(
                 polygon,
             )
         )
-    return CropFiles(images)
+    return ExtractedImages(images)
diff --git a/mindee/v2/file_operations/crop_files.py b/mindee/v2/file_operations/crop_files.py
Original file line number	Diff line number	Diff line change
		@@ -1,3 +0,0 @@
		from mindee.image.image_compressor import compress_image

		__all__ = ["compress_image"]