From cb47cbdc5d3bbaaccff9afdc6b6063d13b96206d Mon Sep 17 00:00:00 2001
From: Sam Mirazi <sasan345@gmail.com>
Date: Sun, 1 Jun 2025 00:03:34 -0700
Subject: [PATCH] 10

---
 tests/test_benchmark.py | 179 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 tests/test_benchmark.py

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
new file mode 100644
index 0000000..10c596b
--- /dev/null
+++ b/tests/test_benchmark.py
@@ -0,0 +1,179 @@
+import subprocess
+import sys
+import os
+import re
+import pytest
+
+# Path to the benchmark script
+BENCHMARK_SCRIPT_PATH = os.path.join(
+    os.path.dirname(__file__), "..", "benchmark", "run_benchmark.py"
+)
+
+def run_benchmark_script(framework_name: str) -> float:
+    \"\"\"
+    Runs the benchmark script for the given framework and returns the reported time.
+    Raises RuntimeError if the script fails or output cannot be parsed.
+    \"\"\"
+    if not os.path.exists(BENCHMARK_SCRIPT_PATH):
+        raise FileNotFoundError(f"Benchmark script not found at {BENCHMARK_SCRIPT_PATH}")
+
+    command = [sys.executable, BENCHMARK_SCRIPT_PATH, framework_name]
+    
+    try:
+        # Ensure the script is run from the project root for consistent module resolution if any
+        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+        process = subprocess.run(
+            command,
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=300, # 5 minutes timeout, adjust as needed
+            cwd=project_root 
+        )
+        output = process.stdout
+        # print(f"--- Benchmark output for {framework_name} ---\\n{output}\\n-----------------------------------------") # For debugging
+
+        # Regex to find "X.XX seconds." and capture X.XX
+        # Example line: "Flask benchmark: 100/100 successful requests in 15.32 seconds."
+        match = re.search(r"(\\d+\\.\\d{2}) seconds\\.", output)
+        if match:
+            return float(match.group(1))
+        else:
+            raise ValueError(
+                f"Could not parse execution time from benchmark output for {framework_name}.\\nOutput:\\n{output}"
+            )
+    except FileNotFoundError: # Should be caught by the initial check, but as a safeguard for command itself
+        raise RuntimeError(f"Python executable not found at {sys.executable} or script path incorrect.")
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(
+            f"Benchmark script for {framework_name} failed with exit code {e.returncode}.\\n"
+            f"Stdout:\\n{e.stdout}\\n"
+            f"Stderr:\\n{e.stderr}"
+        )
+    except subprocess.TimeoutExpired as e:
+        raise RuntimeError(
+            f"Benchmark script for {framework_name} timed out after {e.timeout} seconds.\\n"
+            f"Stdout:\\n{e.stdout}\\n"
+            f"Stderr:\\n{e.stderr}"
+        )
+    except ValueError as e: # Re-raise specific parsing error
+        raise e
+    except Exception as e: # Catch any other unexpected errors
+        raise RuntimeError(f"An unexpected error occurred while running benchmark for {framework_name}: {e}")
+
+
+# --- Test Cases ---
+
+# Note: These tests will run the actual servers and benchmark script.
+# Ensure the Flask and FastAPI applications (app_flask/flask_application.py and app_fastapi/app.py)
+# are correctly implemented to listen on FLASK_URL and FASTAPI_URL specified in run_benchmark.py
+# (http://127.0.0.1:3000/ and http://127.0.0.1:8000/ respectively).
+# The tests also depend on the external Flask and FastAPI servers being started by the benchmark script's logic
+# (implicitly, as the benchmark script itself doesn't start servers; it assumes they are running).
+# This is a deviation from the unit tests which start servers.
+# The benchmark script ITSELF is what's being tested here, not the servers ability to start.
+# The benchmark script expects servers to be ALREADY running at the specified URLs.
+
+# THIS IS A PROBLEM: The benchmark script `run_benchmark.py` does NOT start the servers.
+# The tests in test_flask_route.py and test_fastapi_route.py DO start the servers.
+# The TDD for Phase 5 benchmark harness does not explicitly state the benchmark script should start servers,
+# but the integration tests for the benchmark WILL require servers to be running.
+
+# For these integration tests to work as intended by the TDD (testing the benchmark script),
+# we will need to:
+# 1. Start the Flask server.
+# 2. Run the Flask benchmark script and get its time.
+# 3. Stop the Flask server.
+# 4. Start the FastAPI server.
+# 5. Run the FastAPI benchmark script and get its time.
+# 6. Stop the FastAPI server.
+# This makes the test_benchmark.py more complex as it needs to manage server processes
+# similar to how test_flask_route.py and test_fastapi_route.py do.
+
+# Let's adjust `run_benchmark_script` to accept server management functions or integrate them.
+# For now, I will write the tests assuming the servers are MANUALLY started before running pytest for these.
+# This is a point to clarify or improve based on TDD Phase 5 goals.
+# The TDD says: "Assert that Flask total time > 3 seconds" etc.
+# This implies the benchmark script is run against live servers.
+
+
+# Global variables to store benchmark times to avoid re-running for each assertion if not necessary
+# However, for true isolation and to ensure each test measures fresh, it's better to run the benchmark for each.
+# Let's run it each time for now. A setup_module could optimize later if too slow.
+
+FLASK_BENCH_TIME = -1.0
+FASTAPI_BENCH_TIME = -1.0
+
+# Pytest markers for skipping if servers aren't intended to be up, or for explicit server setup.
+# For now, these tests will FAIL if servers are not running at the expected ports.
+
+@pytest.mark.benchmark_integration  # Custom marker
+def test_flask_benchmark_timing():
+    \"\"\"Tests the Flask benchmark timing.\"\"\"
+    global FLASK_BENCH_TIME
+    # This requires the Flask server to be running on http://127.0.0.1:3000/
+    # AND the FastAPI server on http://127.0.0.1:8000/ (though not directly used by this specific call)
+    # because the benchmark script itself doesn't conditionally import/run parts.
+    # Actually, run_benchmark.py only needs the specific server it's targeting.
+    
+    print("\\nRunning Flask benchmark for timing test...")
+    flask_time = run_benchmark_script("flask")
+    FLASK_BENCH_TIME = flask_time # Store for potential use in other tests
+    print(f"Flask benchmark reported: {flask_time:.2f}s")
+    
+    # TDD: Assert that Flask total time > 3 seconds (actual time depends on concurrency)
+    # Given 100 requests and a 3s delay each, serially it's 300s.
+    # With ThreadPoolExecutor, it's much less but still significant.
+    # A single request is 3s. For 100 requests, even with threads, it must be > 3s.
+    # It should be substantially more if NUM_REQUESTS / num_cores > a few cycles of 3s.
+    # For 100 requests, if 8 cores, roughly 100/8 * 3 = 12.5 * 3 = ~37.5s (very rough).
+    # The TDD example shows "Flask (100 req): 18.4 s".
+    assert flask_time > 3.0, "Flask benchmark time should be greater than the artificial 3s delay."
+    # A more realistic lower bound for 100 requests might be NUM_REQUESTS * 3 / MAX_POSSIBLE_CONCURRENCY_FACTOR
+    # For now, TDD's "> 3s" is the primary guide, but we expect it to be higher.
+
+
+@pytest.mark.benchmark_integration
+def test_fastapi_benchmark_timing():
+    \"\"\"Tests the FastAPI benchmark timing.\"\"\"
+    global FASTAPI_BENCH_TIME
+    # This requires the FastAPI server to be running on http://127.0.0.1:8000/
+    print("\\nRunning FastAPI benchmark for timing test...")
+    fastapi_time = run_benchmark_script("fastapi")
+    FASTAPI_BENCH_TIME = fastapi_time
+    print(f"FastAPI benchmark reported: {fastapi_time:.2f}s")
+
+    # TDD: Assert that FastAPI total time â 3â4 seconds (more truly concurrent)
+    assert 2.9 < fastapi_time < 5.0, "FastAPI benchmark time should be close to 3-4s (e.g., 2.9s to 5s for tolerance)."
+    # The TDD example shows "FastAPI(100 req): 3.7 s"
+
+@pytest.mark.benchmark_integration
+def test_fastapi_is_faster_than_flask():
+    \"\"\"Tests that FastAPI benchmark is faster than Flask benchmark.\"\"\"
+    # Re-run benchmarks to ensure fresh comparison unless already run and stored
+    # For reliability, let's re-run, though it adds to test time.
+    print("\\nRe-running Flask benchmark for comparison...")
+    flask_comparison_time = run_benchmark_script("flask")
+    print(f"Flask benchmark for comparison reported: {flask_comparison_time:.2f}s")
+    
+    print("\\nRe-running FastAPI benchmark for comparison...")
+    fastapi_comparison_time = run_benchmark_script("fastapi")
+    print(f"FastAPI benchmark for comparison reported: {fastapi_comparison_time:.2f}s")
+
+    # TDD: Add a test test_fastapi_faster(): assert fast_time < flask_time
+    assert fastapi_comparison_time < flask_comparison_time, "FastAPI should be significantly faster than Flask for this benchmark."
+
+# To run these tests:
+# 1. Ensure Flask server (app_flask/flask_application.py) can be started on http://127.0.0.1:3000/
+# 2. Ensure FastAPI server (app_fastapi/app.py) can be started on http://127.0.0.1:8000/
+# 3. Manually start BOTH servers in separate terminals before running pytest.
+#    - Terminal 1: python app_flask/flask_application.py
+#    - Terminal 2: uvicorn app_fastapi.app:app --host 127.0.0.1 --port 8000
+# 4. Then run: pytest tests/test_benchmark.py -m benchmark_integration -s -v
+#    (-s for stdout, -v for verbose, -m to pick only these tests)
+
+# Future improvement:
+# - Use pytest fixtures (e.g., session-scoped) to start/stop the Flask and FastAPI servers
+#   automatically before and after these benchmark tests run. This would make them self-contained.
+#   This would involve adapting the server-starting logic from test_flask_route.py and test_fastapi_route.py.
+#   For example, a fixture could yield a server process, and the tests would use that. 
\ No newline at end of file
-- 
2.25.1