test_musehub_json_alternate.py
python
| 1 | """Tests for MuseHub JSON alternate content negotiation. |
| 2 | |
| 3 | Verifies: |
| 4 | - Accept: application/json returns JSONResponse with data/meta envelope |
| 5 | - Accept: text/html (or no header) returns the HTML path |
| 6 | - Bot User-Agents receive X-MuseHub-JSON-Available header |
| 7 | - Non-bot User-Agents do NOT receive X-MuseHub-JSON-Available header |
| 8 | - Helper function behaviour in isolation |
| 9 | """ |
| 10 | from __future__ import annotations |
| 11 | |
| 12 | import pytest |
| 13 | from fastapi import FastAPI, Request |
| 14 | from fastapi.responses import HTMLResponse |
| 15 | from fastapi.testclient import TestClient |
| 16 | from starlette.responses import Response |
| 17 | |
| 18 | from musehub.api.routes.musehub.json_alternate import ( |
| 19 | add_json_available_header, |
| 20 | is_bot_user_agent, |
| 21 | json_or_html, |
| 22 | ) |
| 23 | |
| 24 | |
| 25 | # --------------------------------------------------------------------------- |
| 26 | # Minimal test app that exercises json_or_html via a real ASGI route |
| 27 | # --------------------------------------------------------------------------- |
| 28 | |
| 29 | _app = FastAPI() |
| 30 | |
| 31 | |
| 32 | @_app.get("/test-page") |
| 33 | async def _test_page(request: Request) -> Response: |
| 34 | """Minimal route exercising json_or_html.""" |
| 35 | ctx = {"title": "Test", "value": 42} |
| 36 | return json_or_html( |
| 37 | request, |
| 38 | lambda: HTMLResponse(content="<html>test</html>"), |
| 39 | ctx, |
| 40 | ) |
| 41 | |
| 42 | |
| 43 | @_app.get("/bot-header-test") |
| 44 | async def _bot_header_test(request: Request) -> Response: |
| 45 | """Route that exercises add_json_available_header.""" |
| 46 | response = HTMLResponse(content="<html>ok</html>") |
| 47 | return add_json_available_header(response, request) |
| 48 | |
| 49 | |
| 50 | _client = TestClient(_app, raise_server_exceptions=True) |
| 51 | |
| 52 | |
| 53 | # --------------------------------------------------------------------------- |
| 54 | # json_or_html — content negotiation |
| 55 | # --------------------------------------------------------------------------- |
| 56 | |
| 57 | |
| 58 | class TestJsonOrHtml: |
| 59 | """json_or_html dispatches based on Accept header.""" |
| 60 | |
| 61 | def test_accept_json_returns_json_response(self) -> None: |
| 62 | resp = _client.get("/test-page", headers={"Accept": "application/json"}) |
| 63 | assert resp.status_code == 200 |
| 64 | assert resp.headers["content-type"].startswith("application/json") |
| 65 | body = resp.json() |
| 66 | assert "data" in body |
| 67 | assert "meta" in body |
| 68 | |
| 69 | def test_json_data_envelope_contains_context(self) -> None: |
| 70 | resp = _client.get("/test-page", headers={"Accept": "application/json"}) |
| 71 | data = resp.json()["data"] |
| 72 | assert data["title"] == "Test" |
| 73 | assert data["value"] == 42 |
| 74 | |
| 75 | def test_json_meta_contains_url(self) -> None: |
| 76 | resp = _client.get("/test-page", headers={"Accept": "application/json"}) |
| 77 | meta = resp.json()["meta"] |
| 78 | assert "url" in meta |
| 79 | assert "test-page" in meta["url"] |
| 80 | |
| 81 | def test_accept_html_returns_html_response(self) -> None: |
| 82 | resp = _client.get("/test-page", headers={"Accept": "text/html"}) |
| 83 | assert resp.status_code == 200 |
| 84 | assert resp.headers["content-type"].startswith("text/html") |
| 85 | assert b"<html>" in resp.content |
| 86 | |
| 87 | def test_no_accept_header_returns_html(self) -> None: |
| 88 | resp = _client.get("/test-page") |
| 89 | assert resp.status_code == 200 |
| 90 | assert resp.headers["content-type"].startswith("text/html") |
| 91 | |
| 92 | def test_accept_star_returns_html(self) -> None: |
| 93 | resp = _client.get("/test-page", headers={"Accept": "*/*"}) |
| 94 | assert resp.status_code == 200 |
| 95 | assert resp.headers["content-type"].startswith("text/html") |
| 96 | |
| 97 | def test_bot_ua_html_response_includes_discovery_header(self) -> None: |
| 98 | """json_or_html wires add_json_available_header into the HTML path.""" |
| 99 | resp = _client.get( |
| 100 | "/test-page", |
| 101 | headers={"User-Agent": "claude-agent/1.0"}, |
| 102 | ) |
| 103 | assert resp.status_code == 200 |
| 104 | assert resp.headers["content-type"].startswith("text/html") |
| 105 | assert resp.headers.get("x-musehub-json-available") == "true" |
| 106 | |
| 107 | def test_browser_ua_html_response_omits_discovery_header(self) -> None: |
| 108 | """json_or_html does not add discovery header for browser User-Agents.""" |
| 109 | resp = _client.get( |
| 110 | "/test-page", |
| 111 | headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14) AppleWebKit/537.36"}, |
| 112 | ) |
| 113 | assert resp.status_code == 200 |
| 114 | assert "x-musehub-json-available" not in resp.headers |
| 115 | |
| 116 | def test_accept_json_with_quality_returns_json(self) -> None: |
| 117 | resp = _client.get( |
| 118 | "/test-page", |
| 119 | headers={"Accept": "application/json;q=0.9, text/html"}, |
| 120 | ) |
| 121 | assert resp.status_code == 200 |
| 122 | assert resp.headers["content-type"].startswith("application/json") |
| 123 | |
| 124 | |
| 125 | # --------------------------------------------------------------------------- |
| 126 | # is_bot_user_agent — User-Agent detection |
| 127 | # --------------------------------------------------------------------------- |
| 128 | |
| 129 | |
| 130 | @pytest.mark.anyio |
| 131 | async def test_is_bot_ua_detects_bot_keyword() -> None: |
| 132 | """is_bot_user_agent returns True for 'bot' User-Agents.""" |
| 133 | from starlette.testclient import TestClient as _STC |
| 134 | |
| 135 | app = FastAPI() |
| 136 | |
| 137 | @app.get("/ua") |
| 138 | async def _ua(request: Request) -> Response: |
| 139 | result = "bot" if is_bot_user_agent(request) else "human" |
| 140 | return HTMLResponse(content=result) |
| 141 | |
| 142 | client = _STC(app) |
| 143 | resp = client.get("/ua", headers={"User-Agent": "Googlebot/2.1"}) |
| 144 | assert resp.text == "bot" |
| 145 | |
| 146 | |
| 147 | @pytest.mark.anyio |
| 148 | async def test_is_bot_ua_detects_claude() -> None: |
| 149 | app = FastAPI() |
| 150 | |
| 151 | @app.get("/ua") |
| 152 | async def _ua(request: Request) -> Response: |
| 153 | result = "bot" if is_bot_user_agent(request) else "human" |
| 154 | return HTMLResponse(content=result) |
| 155 | |
| 156 | client = TestClient(app) |
| 157 | resp = client.get("/ua", headers={"User-Agent": "claude-agent/1.0"}) |
| 158 | assert resp.text == "bot" |
| 159 | |
| 160 | |
| 161 | @pytest.mark.anyio |
| 162 | async def test_is_bot_ua_detects_gpt() -> None: |
| 163 | app = FastAPI() |
| 164 | |
| 165 | @app.get("/ua") |
| 166 | async def _ua(request: Request) -> Response: |
| 167 | result = "bot" if is_bot_user_agent(request) else "human" |
| 168 | return HTMLResponse(content=result) |
| 169 | |
| 170 | client = TestClient(app) |
| 171 | resp = client.get("/ua", headers={"User-Agent": "OpenAI-GPT/4"}) |
| 172 | assert resp.text == "bot" |
| 173 | |
| 174 | |
| 175 | @pytest.mark.anyio |
| 176 | async def test_is_bot_ua_detects_cursor() -> None: |
| 177 | app = FastAPI() |
| 178 | |
| 179 | @app.get("/ua") |
| 180 | async def _ua(request: Request) -> Response: |
| 181 | result = "bot" if is_bot_user_agent(request) else "human" |
| 182 | return HTMLResponse(content=result) |
| 183 | |
| 184 | client = TestClient(app) |
| 185 | resp = client.get("/ua", headers={"User-Agent": "Cursor/0.42"}) |
| 186 | assert resp.text == "bot" |
| 187 | |
| 188 | |
| 189 | @pytest.mark.anyio |
| 190 | async def test_is_bot_ua_returns_false_for_browser() -> None: |
| 191 | app = FastAPI() |
| 192 | |
| 193 | @app.get("/ua") |
| 194 | async def _ua(request: Request) -> Response: |
| 195 | result = "bot" if is_bot_user_agent(request) else "human" |
| 196 | return HTMLResponse(content=result) |
| 197 | |
| 198 | client = TestClient(app) |
| 199 | resp = client.get( |
| 200 | "/ua", |
| 201 | headers={ |
| 202 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14) AppleWebKit/537.36" |
| 203 | }, |
| 204 | ) |
| 205 | assert resp.text == "human" |
| 206 | |
| 207 | |
| 208 | # --------------------------------------------------------------------------- |
| 209 | # add_json_available_header |
| 210 | # --------------------------------------------------------------------------- |
| 211 | |
| 212 | |
| 213 | class TestAddJsonAvailableHeader: |
| 214 | """add_json_available_header attaches header only for bot UAs.""" |
| 215 | |
| 216 | def test_bot_ua_receives_header(self) -> None: |
| 217 | resp = _client.get( |
| 218 | "/bot-header-test", headers={"User-Agent": "claude-agent/1.0"} |
| 219 | ) |
| 220 | assert resp.headers.get("x-musehub-json-available") == "true" |
| 221 | |
| 222 | def test_browser_ua_does_not_receive_header(self) -> None: |
| 223 | resp = _client.get( |
| 224 | "/bot-header-test", |
| 225 | headers={ |
| 226 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14) AppleWebKit/537.36" |
| 227 | }, |
| 228 | ) |
| 229 | assert "x-musehub-json-available" not in resp.headers |
| 230 | |
| 231 | def test_no_ua_does_not_receive_header(self) -> None: |
| 232 | resp = _client.get("/bot-header-test") |
| 233 | assert "x-musehub-json-available" not in resp.headers |
| 234 | |
| 235 | def test_agent_ua_receives_header(self) -> None: |
| 236 | resp = _client.get( |
| 237 | "/bot-header-test", headers={"User-Agent": "my-agent/2.0"} |
| 238 | ) |
| 239 | assert resp.headers.get("x-musehub-json-available") == "true" |
| 240 | |
| 241 | |
| 242 | # --------------------------------------------------------------------------- |
| 243 | # Unit tests for is_bot_user_agent in isolation |
| 244 | # --------------------------------------------------------------------------- |
| 245 | |
| 246 | |
| 247 | class TestIsBotUserAgentUnit: |
| 248 | """Pure unit tests for the bot UA detection regex.""" |
| 249 | |
| 250 | def _make_request(self, ua: str) -> Request: |
| 251 | """Build a minimal Starlette Request with the given User-Agent.""" |
| 252 | from starlette.datastructures import Headers |
| 253 | from starlette.types import Scope |
| 254 | |
| 255 | scope: Scope = { |
| 256 | "type": "http", |
| 257 | "method": "GET", |
| 258 | "path": "/", |
| 259 | "query_string": b"", |
| 260 | "headers": Headers(headers={"user-agent": ua}).raw, |
| 261 | } |
| 262 | return Request(scope) |
| 263 | |
| 264 | def test_bot_keyword_case_insensitive(self) -> None: |
| 265 | assert is_bot_user_agent(self._make_request("Moz-Bot/1.0")) is True |
| 266 | assert is_bot_user_agent(self._make_request("MOZ-BOT/1.0")) is True |
| 267 | |
| 268 | def test_agent_keyword(self) -> None: |
| 269 | assert is_bot_user_agent(self._make_request("my-agent/1.0")) is True |
| 270 | |
| 271 | def test_claude_keyword(self) -> None: |
| 272 | assert is_bot_user_agent(self._make_request("claude-code")) is True |
| 273 | |
| 274 | def test_gpt_keyword(self) -> None: |
| 275 | assert is_bot_user_agent(self._make_request("gpt4-client")) is True |
| 276 | |
| 277 | def test_cursor_keyword(self) -> None: |
| 278 | assert is_bot_user_agent(self._make_request("Cursor/0.42")) is True |
| 279 | |
| 280 | def test_empty_ua(self) -> None: |
| 281 | assert is_bot_user_agent(self._make_request("")) is False |
| 282 | |
| 283 | def test_regular_browser(self) -> None: |
| 284 | assert ( |
| 285 | is_bot_user_agent( |
| 286 | self._make_request( |
| 287 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_3) " |
| 288 | "AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36" |
| 289 | ) |
| 290 | ) |
| 291 | is False |
| 292 | ) |