# pylint: disable=line-too-long,useless-suppression
# coding=utf-8
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# Code generated by Microsoft (R) Python Code Generator.
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
# --------------------------------------------------------------------------
import pytest
import os
import re
from typing import Tuple, Union, Dict, Any, Optional, List, Set
from devtools_testutils import recorded_by_proxy
from testpreparer import ContentUnderstandingPreparer
from testpreparer import ContentUnderstandingClientTestBase
from azure.ai.contentunderstanding.models import ContentAnalyzer
from azure.ai.contentunderstanding import ContentUnderstandingClient
from azure.ai.contentunderstanding.models import AnalysisInput
from test_helpers import (
    generate_analyzer_id,
    new_simple_content_analyzer_object,
    new_marketing_video_analyzer_object,
    assert_poller_properties,
    assert_simple_content_analyzer_result,
    save_analysis_result_to_file,
    save_keyframe_image_to_file,
)

from devtools_testutils import is_live, is_live_and_not_recording


def create_analyzer_and_assert_sync(
    client: ContentUnderstandingClient, analyzer_id: str, resource: Union[ContentAnalyzer, Dict[str, Any]]
) -> Any:
    """Create an analyzer and perform basic assertions (sync version).

    Args:
        client: The ContentUnderstandingClient instance
        analyzer_id: The analyzer ID to create
        resource: The analyzer resource (ContentAnalyzer object or dict)

    Returns:
        Any: The poller object

    Raises:
        AssertionError: If the creation fails or assertions fail
    """
    print(f"\nCreating analyzer {analyzer_id}")

    # Start the analyzer creation operation
    poller = client.begin_create_analyzer(
        analyzer_id=analyzer_id,
        resource=resource,
    )

    # Wait for the operation to complete
    print(f"  Waiting for analyzer {analyzer_id} to be created")
    response = poller.result()
    assert response is not None
    assert poller.done()
    print(f"  Analyzer {analyzer_id} is created successfully")

    # Additional poller assertions
    assert poller is not None
    assert poller.status() is not None
    assert poller.status() != ""
    assert poller.continuation_token() is not None

    return poller


def delete_analyzer_and_assert_sync(
    client: ContentUnderstandingClient, analyzer_id: str, created_analyzer: bool
) -> None:
    """Delete an analyzer and assert it was deleted successfully (sync version).

    Args:
        client: The ContentUnderstandingClient instance
        analyzer_id: The analyzer ID to delete
        created_analyzer: Whether the analyzer was created (to determine if cleanup is needed)

    Raises:
        AssertionError: If the analyzer still exists after deletion
    """
    if created_analyzer:
        print(f"Cleaning up analyzer {analyzer_id}")
        try:
            client.delete_analyzer(analyzer_id=analyzer_id)
            # Verify deletion
            print(f"Analyzer {analyzer_id} is deleted successfully")
        except Exception as e:
            # If deletion fails, the test should fail
            raise AssertionError(f"Failed to delete analyzer {analyzer_id}: {e}") from e
    else:
        print(f"Analyzer {analyzer_id} was not created, no cleanup needed")


def download_keyframes_and_assert_sync(
    client: ContentUnderstandingClient,
    analysis_operation_id: str,
    result: Any,
    test_py_file_dir: str,
    identifier: Optional[str] = None,
) -> None:
    """Download keyframes from video analysis result and assert their existence (sync version).

    Downloads up to 3 keyframes: first, middle, and last frame to avoid duplicates.

    Args:
        client: The ContentUnderstandingClient instance
        analysis_operation_id: The operation ID from the analysis
        result: The analysis result containing markdown with keyframes
        test_py_file_dir: The directory where pytest files are located
        identifier: Optional unique identifier to avoid conflicts (e.g., analyzer_id)

    Returns:
        None

    Raises:
        AssertionError: If no keyframes are found in the analysis result
    """
    keyframe_ids: Set[str] = set()

    # Iterate over contents to find keyframes from markdown
    for content in result.contents:
        # Extract keyframe IDs from "markdown" if it exists and is a string
        markdown_content = getattr(content, "markdown", "")
        if isinstance(markdown_content, str):
            # Use the same regex pattern as the official sample: (keyFrame\.d+)\.jpg
            keyframe_ids.update(re.findall(r"(keyFrame\.\d+)\.jpg", markdown_content))

    print(f"Found keyframe IDs in markdown: {keyframe_ids}")

    # Assert that keyframe IDs were found in the video analysis
    assert (
        keyframe_ids
    ), "No keyframe IDs were found in the video analysis markdown content. Video analysis should generate keyframes that can be extracted using regex pattern."

    print(f"Successfully extracted {len(keyframe_ids)} keyframe IDs from video analysis")

    # Sort keyframes by frame number to get first, middle, and last
    # Extract numeric part from "keyFrame.22367" format and convert to "keyframes/22367" format
    def extract_frame_number(keyframe_id: str) -> int:
        # Extract number after "keyFrame."
        match = re.search(r"keyFrame\.(\d+)", keyframe_id)
        if match:
            return int(match.group(1))
        return 0

    # Build keyframe paths in the format expected by get_result_file API: "keyframes/{time_ms}"
    keyframe_paths = [f"keyframes/{extract_frame_number(kf)}" for kf in keyframe_ids]

    # Sort by frame number
    sorted_keyframes: List[str] = sorted(keyframe_paths, key=lambda x: int(x.split("/")[-1]))

    # Create a set with first, middle, and last frames (automatically removes duplicates)
    frames_set: Set[str] = {sorted_keyframes[0], sorted_keyframes[-1], sorted_keyframes[len(sorted_keyframes) // 2]}

    # Convert set to list for processing
    frames_to_download: List[str] = list(frames_set)

    print(f"Selected frames to download: {frames_to_download}")

    # Try to retrieve the selected keyframe images using get_result_file API
    files_retrieved: int = 0

    for keyframe_id in frames_to_download:
        print(f"Trying to get result file with path: {keyframe_id}")
        response = client.get_result_file(
            operation_id=analysis_operation_id,
            path=keyframe_id,  # Use keyframe_id directly as path, no .jpg extension
        )

        # Handle the response - it's an iterator that needs to be collected
        if hasattr(response, "__iter__"):
            # It's an iterator, collect all bytes efficiently
            chunks = []
            for chunk in response:
                chunks.append(chunk)
            response = b"".join(chunks)

        # Assert that we successfully get a response and it's valid image data
        assert response is not None, f"Response for path {keyframe_id} should not be None"
        assert isinstance(
            response, bytes
        ), f"Response for {keyframe_id} should be bytes (image data), got {type(response)}"
        assert len(response) > 0, f"Image file content for {keyframe_id} should not be empty"

        print(f"Successfully retrieved image file for path: {keyframe_id}")
        print(f"Image file content length: {len(response)} bytes")

        # Save the image file using the helper function
        saved_file_path = save_keyframe_image_to_file(
            image_content=response,
            keyframe_id=keyframe_id,
            test_name="test_content_analyzers_get_result_file",
            test_py_file_dir=test_py_file_dir,
            identifier=identifier,
        )

        # Verify the saved file exists and has content
        assert os.path.exists(saved_file_path), f"Saved image file should exist at {saved_file_path}"
        assert os.path.getsize(saved_file_path) > 0, f"Saved image file should not be empty"

        files_retrieved += 1
        print(f"Successfully downloaded keyframe image: {keyframe_id}")

    # Assert that we successfully downloaded all expected files
    assert files_retrieved == len(
        frames_to_download
    ), f"Expected to download {len(frames_to_download)} files, but only downloaded {files_retrieved}"
    print(f"Successfully completed get_result_file test - downloaded {files_retrieved} keyframe images")


class TestContentUnderstandingContentAnalyzersOperations(ContentUnderstandingClientTestBase):

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_begin_create_with_content_analyzer(self, contentunderstanding_endpoint: str) -> None:
        """
        Test Summary:
        - Create analyzer using ContentAnalyzer object
        - Verify analyzer creation and poller properties
        - Clean up created analyzer
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)
        analyzer_id = generate_analyzer_id(client, "create_sync", is_async=False)
        created_analyzer = False

        content_analyzer = new_simple_content_analyzer_object(
            analyzer_id=analyzer_id, description=f"test analyzer: {analyzer_id}", tags={"tag1_name": "tag1_value"}
        )

        try:
            # Create analyzer using the refactored function
            poller = create_analyzer_and_assert_sync(client, analyzer_id, content_analyzer)
            created_analyzer = True

        finally:
            # Always clean up the created analyzer, even if the test fails
            delete_analyzer_and_assert_sync(client, analyzer_id, created_analyzer)

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_begin_create_with_json(self, contentunderstanding_endpoint: str) -> None:
        """
        Test Summary:
        - Create analyzer using JSON dictionary
        - Verify analyzer creation and poller properties
        - Clean up created analyzer
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)
        analyzer_id = generate_analyzer_id(client, "create_json_sync", is_async=False)
        created_analyzer = False

        try:
            # Create analyzer using the refactored function with JSON resource
            poller = create_analyzer_and_assert_sync(
                client,
                analyzer_id,
                {
                    "analyzerId": analyzer_id,
                    "baseAnalyzerId": "prebuilt-document",
                    "config": {
                        "disableContentFiltering": False,
                        "disableFaceBlurring": False,
                        "enableFace": False,
                        "enableFormula": True,
                        "enableLayout": True,
                        "enableOcr": True,
                        "estimateFieldSourceAndConfidence": True,
                        "returnDetails": True,
                    },
                    "description": f"test analyzer: {analyzer_id}",
                    "processingLocation": "global",
                    "models": {"completion": "gpt-4.1"},
                    "tags": {"tag1_name": "tag1_value"},
                },
            )
            created_analyzer = True

        finally:
            # Always clean up the created analyzer, even if the test fails
            delete_analyzer_and_assert_sync(client, analyzer_id, created_analyzer)

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_update(self, contentunderstanding_endpoint: str) -> None:
        """
        Test Summary:
        - Create initial analyzer
        - Get analyzer before update to verify initial state
        - Update analyzer with new description and tags
        - Get analyzer after update to verify changes persisted
        - Clean up created analyzer
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)
        analyzer_id = generate_analyzer_id(client, "update_sync", is_async=False)
        created_analyzer = False

        # Create initial analyzer
        initial_analyzer = new_simple_content_analyzer_object(
            analyzer_id=analyzer_id,
            description=f"Initial analyzer for update test: {analyzer_id}",
            tags={"initial_tag": "initial_value"},
        )

        try:
            # Create the initial analyzer using the refactored function
            poller = create_analyzer_and_assert_sync(client, analyzer_id, initial_analyzer)
            created_analyzer = True

            # Get the analyzer before update to verify initial state
            print(f"Getting analyzer {analyzer_id} before update")
            analyzer_before_update = client.get_analyzer(analyzer_id=analyzer_id)
            assert analyzer_before_update is not None
            assert analyzer_before_update.analyzer_id == analyzer_id
            assert analyzer_before_update.description == f"Initial analyzer for update test: {analyzer_id}"
            assert analyzer_before_update.tags == {"initial_tag": "initial_value"}
            print(
                f"Initial analyzer state verified - description: {analyzer_before_update.description}, tags: {analyzer_before_update.tags}"
            )

            # Create updated analyzer with only allowed properties (description and tags)
            updated_analyzer = {
                "analyzerId": analyzer_id,
                "baseAnalyzerId": analyzer_before_update.base_analyzer_id,
                "models": analyzer_before_update.models,
                "description": f"Updated analyzer description: {analyzer_id}",
                "tags": {"updated_tag": "updated_value"},
            }

            # Update the analyzer
            print(f"Updating analyzer {analyzer_id}")
            response = client.update_analyzer(analyzer_id=analyzer_id, resource=updated_analyzer)
            assert response is not None
            assert response.analyzer_id == analyzer_id

            # Get the analyzer after update to verify changes persisted
            print(f"Getting analyzer {analyzer_id} after update")
            analyzer_after_update = client.get_analyzer(analyzer_id=analyzer_id)
            assert analyzer_after_update is not None
            assert analyzer_after_update.analyzer_id == analyzer_id
            assert analyzer_after_update.description == f"Updated analyzer description: {analyzer_id}"
            assert analyzer_after_update.tags == {"updated_tag": "updated_value"}
            print(
                f"Updated analyzer state verified - description: {analyzer_after_update.description}, tags: {analyzer_after_update.tags}"
            )

        finally:
            # Always clean up the created analyzer, even if the test fails
            delete_analyzer_and_assert_sync(client, analyzer_id, created_analyzer)

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_delete(self, contentunderstanding_endpoint: str) -> None:
        """
        Test Summary:
        - Create analyzer for deletion test
        - Delete analyzer
        - Clean up if deletion failed
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)
        analyzer_id = generate_analyzer_id(client, "delete_sync", is_async=False)
        created_analyzer = False

        # Create a simple analyzer for deletion test
        content_analyzer = new_simple_content_analyzer_object(
            analyzer_id=analyzer_id,
            description=f"test analyzer for deletion: {analyzer_id}",
            tags={"test_type": "deletion"},
        )

        try:
            # Create analyzer using the refactored function
            poller = create_analyzer_and_assert_sync(client, analyzer_id, content_analyzer)
            created_analyzer = True

            # Delete the analyzer
            print(f"Deleting analyzer {analyzer_id}")
            response = client.delete_analyzer(analyzer_id=analyzer_id)

            # Verify the delete response
            assert response is None

        finally:
            # Clean up if the analyzer was created but deletion failed
            delete_analyzer_and_assert_sync(client, analyzer_id, created_analyzer)

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_begin_analyze_url(self, contentunderstanding_endpoint: str) -> None:
        """
        Test Summary:
        - Create simple analyzer for URL analysis
        - Begin analysis operation with URL input
        - Wait for analysis completion
        - Save analysis result to output file
        - Verify fields node exists in first result
        - Verify amount_due field exists and equals 610
        - Clean up created analyzer
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)
        analyzer_id = generate_analyzer_id(client, "analyze_url_sync", is_async=False)
        created_analyzer = False

        # Create a simple analyzer for URL analysis
        content_analyzer = new_simple_content_analyzer_object(
            analyzer_id=analyzer_id,
            description=f"test analyzer for URL analysis: {analyzer_id}",
            tags={"test_type": "url_analysis"},
        )

        try:
            # Create analyzer using the refactored function
            poller = create_analyzer_and_assert_sync(client, analyzer_id, content_analyzer)
            created_analyzer = True

            # Use the provided URL for the invoice PDF
            invoice_url = "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/invoice.pdf"

            print(f"Starting URL analysis with analyzer {analyzer_id}")

            # Begin analysis operation with URL
            analysis_poller = client.begin_analyze(analyzer_id=analyzer_id, inputs=[AnalysisInput(url=invoice_url)])
            assert_poller_properties(analysis_poller, "Analysis poller")

            # Wait for the analysis to complete
            print(f"Waiting for analysis to complete")
            analysis_result = analysis_poller.result()
            assert_simple_content_analyzer_result(analysis_result, "URL analysis result")

            # Save the analysis result to a file
            test_file_dir = os.path.dirname(os.path.abspath(__file__))
            save_analysis_result_to_file(
                analysis_result, "test_content_analyzers_begin_analyze_url", test_file_dir, analyzer_id
            )

        finally:
            # Always clean up the created analyzer, even if the test fails
            delete_analyzer_and_assert_sync(client, analyzer_id, created_analyzer)

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_begin_analyze_binary(self, contentunderstanding_endpoint: str) -> None:
        """
        Test Summary:
        - Create simple analyzer for binary analysis
        - Read sample invoice PDF file
        - Begin binary analysis operation with analyzer
        - Wait for analysis completion
        - Save analysis result to output file
        - Verify fields node exists in first result
        - Verify amount_due field exists and equals 610
        - Clean up created analyzer
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)
        analyzer_id = generate_analyzer_id(client, "analyze_binary_sync", is_async=False)
        created_analyzer = False

        # Create a simple analyzer for binary analysis
        content_analyzer = new_simple_content_analyzer_object(
            analyzer_id=analyzer_id,
            description=f"test analyzer for binary analysis: {analyzer_id}",
            tags={"test_type": "binary_analysis"},
        )

        try:
            # Create analyzer using the refactored function
            poller = create_analyzer_and_assert_sync(client, analyzer_id, content_analyzer)
            created_analyzer = True

            # Read the sample invoice PDF file using absolute path based on this test file's location
            test_file_dir = os.path.dirname(os.path.abspath(__file__))
            pdf_path = os.path.join(test_file_dir, "test_data", "sample_invoice.pdf")
            with open(pdf_path, "rb") as pdf_file:
                pdf_content = pdf_file.read()

            print(f"Starting binary analysis with analyzer {analyzer_id}")

            # Begin binary analysis operation
            analysis_poller = client.begin_analyze_binary(analyzer_id=analyzer_id, binary_input=pdf_content)
            assert_poller_properties(analysis_poller, "Analysis poller")

            # Wait for the analysis to complete
            print(f"Waiting for analysis to complete")
            analysis_result = analysis_poller.result()
            assert_simple_content_analyzer_result(analysis_result, "Binary analysis result")

            # Save the analysis result to a file
            save_analysis_result_to_file(
                analysis_result, "test_content_analyzers_begin_analyze_binary", test_file_dir, analyzer_id
            )

        finally:
            # Always clean up the created analyzer, even if the test fails
            delete_analyzer_and_assert_sync(client, analyzer_id, created_analyzer)

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_get_result_file(self, contentunderstanding_endpoint: str) -> None:
        """
        Test Summary:
        - Create marketing video analyzer based on the marketing video template
        - Read FlightSimulator.mp4 file
        - Begin video analysis operation with analyzer
        - Wait for analysis completion
        - Use get_result_file to retrieve image files generated from video analysis
        - Verify image file content is returned and save to test_output
        - Clean up created analyzer
        """
        if not is_live():
            pytest.skip(
                "This test requires live mode to run, as it involves large video files that are too big for test proxy to record"
            )
            return
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)
        analyzer_id = generate_analyzer_id(client, "get_result_file_sync", is_async=False)
        created_analyzer = False

        # Create a marketing video analyzer based on the template
        video_analyzer = new_marketing_video_analyzer_object(
            analyzer_id=analyzer_id,
            description=f"marketing video analyzer for get result file test: {analyzer_id}",
            tags={"test_type": "get_result_file_video"},
        )

        try:
            # Create analyzer using the refactored function
            poller = create_analyzer_and_assert_sync(client, analyzer_id, video_analyzer)
            created_analyzer = True

            # Use the FlightSimulator.mp4 video file from remote location
            video_file_url = "https://github.com/Azure-Samples/azure-ai-content-understanding-assets/raw/refs/heads/main/videos/sdk_samples/FlightSimulator.mp4"
            print(f"Using video file from URL: {video_file_url}")

            # Get test file directory for saving output
            test_file_dir = os.path.dirname(os.path.abspath(__file__))

            print(f"Starting video analysis to get operation ID")

            # Begin video analysis operation using URL
            analysis_poller = client.begin_analyze(analyzer_id=analyzer_id, inputs=[AnalysisInput(url=video_file_url)])
            assert_poller_properties(analysis_poller, "Video analysis poller")

            # Wait for the analysis to complete
            print(f"Waiting for video analysis to complete")
            analysis_result = analysis_poller.result()

            # Get the operation ID from the poller using custom poller's operation_id property
            from azure.ai.contentunderstanding.models import AnalyzeLROPoller

            assert isinstance(analysis_poller, AnalyzeLROPoller), "Should return custom AnalyzeLROPoller"

            analysis_operation_id = analysis_poller.operation_id
            assert analysis_operation_id is not None, "Operation ID should not be None"
            assert len(analysis_operation_id) > 0, "Operation ID should not be empty"
            print(f"Analysis operation ID: {analysis_operation_id}")

            # Use the analysis result we already have from the poller to see what files are available
            result = analysis_result
            assert result is not None, "Analysis result should not be None"
            print(f"Analysis result contains {len(result.contents)} contents")

            # Use the refactored function to download keyframes by calling client.get_result_file
            download_keyframes_and_assert_sync(client, analysis_operation_id, result, test_file_dir, analyzer_id)

        finally:
            # Always clean up the created analyzer, even if the test fails
            delete_analyzer_and_assert_sync(client, analyzer_id, created_analyzer)

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_analyze_binary_extract_markdown(self, contentunderstanding_endpoint: str) -> None:
        """Test extracting markdown content from analyzed binary documents.

        This test corresponds to .NET AnalyzeBinary_ExtractMarkdown.
        Verifies that markdown is successfully extracted and is non-empty.
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)

        print("\n=== Test: Extract Markdown from Binary Document ===")

        # Get test file path
        current_dir = os.path.dirname(os.path.abspath(__file__))
        file_path = os.path.join(current_dir, "test_data", "sample_invoice.pdf")
        assert os.path.exists(file_path), f"Sample file should exist at {file_path}"
        print(f"Test file: {file_path}")

        # Read file content
        with open(file_path, "rb") as f:
            file_bytes = f.read()
        assert len(file_bytes) > 0, "File should not be empty"
        print(f"File size: {len(file_bytes)} bytes")

        # Analyze the document
        print("\nAnalyzing document with prebuilt-documentSearch...")
        poller = client.begin_analyze_binary(
            analyzer_id="prebuilt-documentSearch",
            binary_input=file_bytes,
            content_type="application/pdf",
        )

        # Wait for completion
        result = poller.result()
        assert_poller_properties(poller)

        # Verify result
        assert result is not None, "Analysis result should not be null"
        assert hasattr(result, "contents"), "Result should have contents attribute"
        assert result.contents is not None, "Result contents should not be null"
        assert len(result.contents) > 0, "Result should contain at least one content element"
        assert len(result.contents) == 1, "PDF file should have exactly one content element"
        print(f"✓ Analysis completed with {len(result.contents)} content element(s)")

        # Extract markdown from first content
        content = result.contents[0]
        assert content is not None, "Content should not be null"

        # Verify markdown content
        assert hasattr(content, "markdown"), "Content should have markdown attribute"
        assert content.markdown is not None, "Markdown content should not be null"
        assert isinstance(content.markdown, str), "Markdown should be a string"
        assert len(content.markdown) > 0, "Markdown content should not be empty"
        assert content.markdown.strip(), "Markdown content should not be just whitespace"

        print(f"\n✓ Markdown extraction successful:")
        print(f"  - Markdown length: {len(content.markdown)} characters")
        print(f"  - First 100 chars: {content.markdown[:100]}...")
        print(f"✓ Markdown extraction test completed successfully")

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_create_classifier(self, contentunderstanding_endpoint: str) -> None:
        """Test creating a classifier with content categories and document segmentation.

        This test corresponds to .NET CreateClassifier.
        Verifies that the classifier is created successfully with the specified categories
        and configuration, and can segment documents into different categories.
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)
        created_analyzer = False
        analyzer_id = generate_analyzer_id(client, "test_classifier", is_async=False)

        print(f"\n=== Test: Create Classifier with Segmentation ===")
        print(f"Analyzer ID: {analyzer_id}")

        try:
            # Define content categories for classification
            content_categories = {
                "Loan_Application": {
                    "description": "Documents submitted by individuals or businesses to request funding"
                },
                "Invoice": {
                    "description": "Billing documents issued by sellers or service providers to request payment"
                },
                "Bank_Statement": {
                    "description": "Official statements issued by banks that summarize account activity"
                },
            }

            # Create analyzer configuration with categories and segmentation enabled
            config = {"returnDetails": True, "enableSegment": True, "contentCategories": content_categories}

            # Create the classifier analyzer
            classifier = {
                "baseAnalyzerId": "prebuilt-document",
                "description": "Custom classifier for financial document categorization",
                "config": config,
                "models": {"completion": "gpt-4.1"},
            }

            print(f"\nCreating classifier with {len(content_categories)} categories...")
            print(f"Categories: {', '.join(content_categories.keys())}")

            # Create the classifier
            poller = create_analyzer_and_assert_sync(client, analyzer_id, classifier)
            created_analyzer = True

            # Get the created classifier to verify full details
            get_response = client.get_analyzer(analyzer_id=analyzer_id)
            assert get_response is not None, "Get analyzer response should not be null"

            result = get_response
            assert result is not None, "Classifier result should not be null"

            # Verify config
            if hasattr(result, "config") and result.config is not None:
                config_dict = result.config if isinstance(result.config, dict) else result.config.as_dict()
                if "contentCategories" in config_dict or "content_categories" in config_dict:
                    categories_key = "contentCategories" if "contentCategories" in config_dict else "content_categories"
                    categories = config_dict[categories_key]
                    assert len(categories) >= 3, "Should have at least 3 content categories"
                    print(f"✓ Classifier created successfully with {len(categories)} categories")
                else:
                    print("  (Config exists but contentCategories not verified - may be service behavior)")
            else:
                print("  (Config verification skipped - result.config is None)")

            print(f"✓ Classifier test completed successfully")

        finally:
            # Always clean up the created analyzer
            delete_analyzer_and_assert_sync(client, analyzer_id, created_analyzer)

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_analyze_configs(self, contentunderstanding_endpoint: str) -> None:
        """Test analyzing a document with specific configurations enabled.

        This test corresponds to .NET AnalyzeConfigs.
        Verifies that document features can be extracted with formulas, layout, and OCR enabled.
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)

        print("\n=== Test: Analyze with Specific Configurations ===")

        # Get test file path
        current_dir = os.path.dirname(os.path.abspath(__file__))
        file_path = os.path.join(current_dir, "test_data", "sample_invoice.pdf")

        assert os.path.exists(file_path), f"Test file should exist at {file_path}"
        print(f"Test file: {file_path}")

        # Read file content
        with open(file_path, "rb") as f:
            file_bytes = f.read()
        assert len(file_bytes) > 0, "File should not be empty"
        print(f"File size: {len(file_bytes)} bytes")

        # Analyze with prebuilt-documentSearch which has formulas, layout, and OCR enabled
        print("\nAnalyzing document with prebuilt-documentSearch (formulas, layout, OCR enabled)...")
        poller = client.begin_analyze_binary(
            analyzer_id="prebuilt-documentSearch",
            binary_input=file_bytes,
            content_type="application/pdf",
        )

        # Wait for completion
        result = poller.result()
        assert_poller_properties(poller)

        # Verify result
        assert result is not None, "Analysis result should not be null"
        assert hasattr(result, "contents"), "Result should have contents attribute"
        assert result.contents is not None, "Result should contain contents"
        assert len(result.contents) > 0, "Result should have at least one content"
        assert len(result.contents) == 1, "PDF file should have exactly one content element"
        print(f"✓ Analysis completed with {len(result.contents)} content element(s)")

        # Verify document content
        document_content = result.contents[0]
        assert document_content is not None, "Content should not be null"
        assert hasattr(document_content, "start_page_number"), "Should have start_page_number"
        start_page = getattr(document_content, "start_page_number", None)
        assert start_page is not None and start_page >= 1, "Start page should be >= 1"

        if hasattr(document_content, "end_page_number"):
            end_page = getattr(document_content, "end_page_number", None)
            assert end_page is not None and end_page >= start_page, "End page should be >= start page"
            print(f"✓ Document page range: {start_page}-{end_page}")

        # Verify markdown was extracted (OCR/layout result)
        if hasattr(document_content, "markdown") and document_content.markdown:
            print(f"✓ Markdown extracted ({len(document_content.markdown)} characters)")

        print(f"✓ Configuration test completed successfully")

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_analyze_return_raw_json(self, contentunderstanding_endpoint: str) -> None:
        """Test analyzing a document and returning raw JSON response.

        This test corresponds to .NET AnalyzeReturnRawJson.
        Verifies that the raw JSON response can be retrieved and parsed.
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)

        print("\n=== Test: Analyze and Return Raw JSON ===")

        # Get test file path
        current_dir = os.path.dirname(os.path.abspath(__file__))
        file_path = os.path.join(current_dir, "test_data", "sample_invoice.pdf")
        assert os.path.exists(file_path), f"Sample file should exist at {file_path}"
        print(f"Test file: {file_path}")

        # Read file content
        with open(file_path, "rb") as f:
            file_bytes = f.read()
        assert len(file_bytes) > 0, "File should not be empty"
        print(f"File size: {len(file_bytes)} bytes")

        # Analyze the document
        print("\nAnalyzing document with prebuilt-documentSearch...")
        poller = client.begin_analyze_binary(
            analyzer_id="prebuilt-documentSearch",
            binary_input=file_bytes,
            content_type="application/pdf",
        )

        # Wait for completion
        result = poller.result()
        assert_poller_properties(poller)

        # Verify operation completed successfully
        assert result is not None, "Analysis result should not be null"

        # Verify response can be serialized to JSON
        import json

        result_dict = result.as_dict() if hasattr(result, "as_dict") else dict(result)
        json_str = json.dumps(result_dict, indent=2)
        assert len(json_str) > 0, "JSON string should not be empty"

        # Verify JSON can be parsed back
        parsed = json.loads(json_str)
        assert parsed is not None, "Parsed JSON should not be null"
        assert isinstance(parsed, dict), "Parsed JSON should be a dictionary"

        print(f"✓ JSON serialization successful:")
        print(f"  - JSON length: {len(json_str)} characters")
        print(f"  - Top-level keys: {', '.join(list(parsed.keys())[:5])}...")
        print(f"✓ Raw JSON test completed successfully")

    @ContentUnderstandingPreparer()
    @recorded_by_proxy
    def test_content_analyzers_delete_result(self, contentunderstanding_endpoint: str) -> None:
        """Test deleting an analysis result.

        This test corresponds to .NET DeleteResult.
        Verifies that an analysis result can be deleted using its operation ID.
        """
        client: ContentUnderstandingClient = self.create_client(endpoint=contentunderstanding_endpoint)

        print("\n=== Test: Delete Analysis Result ===")

        # Get test file URI
        document_url = "https://github.com/Azure-Samples/azure-ai-content-understanding-python/raw/refs/heads/main/data/invoice.pdf"
        print(f"Document URL: {document_url}")

        # Start the analysis operation
        print("\nStarting analysis operation...")
        poller = client.begin_analyze(
            analyzer_id="prebuilt-invoice",
            inputs=[AnalysisInput(url=document_url)],
        )

        # Wait for completion
        print("Waiting for analysis to complete...")
        result = poller.result()

        # Get the operation ID using the public property
        operation_id = poller.operation_id
        assert operation_id is not None, "Operation ID should not be null"
        assert len(operation_id) > 0, "Operation ID should not be empty"
        print(f"Operation ID: {operation_id}")

        # Verify analysis completed successfully
        assert result is not None, "Analysis result should not be null"
        assert hasattr(result, "contents"), "Result should have contents"
        assert result.contents is not None, "Result should contain contents"
        assert len(result.contents) > 0, "Result should have at least one content"
        print(f"✓ Analysis completed successfully")

        # Delete the analysis result
        print(f"\nDeleting analysis result (operation ID: {operation_id})...")
        client.delete_result(operation_id=operation_id)

        print(f"✓ Delete result completed successfully")
        print("Note: Deletion success verified by no exception thrown")
        print(f"✓ Delete result test completed successfully")
