import requests import json from cachetools import cached, TTLCache import httpx BASE_URL = "https://api.mangadex.org" def _cover_url_from_manga_payload(data: dict, manga_id: str) -> str | None: """Build 256px cover URL from MangaDex /manga/{id} JSON (needs cover_art relationship).""" rels = data.get("relationships") or [] for rel in rels: if rel.get("type") != "cover_art": continue attrs = rel.get("attributes") or {} fn = attrs.get("fileName") if isinstance(fn, str) and fn.strip(): return f"https://uploads.mangadex.org/covers/{manga_id}/{fn}.256.jpg" return None async def get_manga_cover_url_256(manga_id: str) -> str | None: """GET /manga/{id} with cover_art included; returns CDN URL or None.""" url = f"{BASE_URL}/manga/{manga_id}" params = {"includes[]": ["cover_art"]} async with httpx.AsyncClient() as client: try: response = await client.get(url, params=params, timeout=10.0) response.raise_for_status() payload = response.json() except httpx.HTTPStatusError: return None except Exception as e: print(f"get_manga_cover_url_256: {e}") return None data = payload.get("data") if not isinstance(data, dict): return None return _cover_url_from_manga_payload(data, manga_id) async def search_manga(title: str, limit: int =20, offset: int = 0, order_by: str = "followedCount", order_direction: str = "desc", cover_art: bool = True): """ Todo: filters by tags (include, exclude) Mostly for testing at the moment """ search_url = f"{BASE_URL}/manga" params = { "limit": limit, "offset": offset, f"order[{order_by}]": order_direction, } if cover_art: params["includes[]"] = ["cover_art"] if title.strip(): params["title"] = title async with httpx.AsyncClient() as client: try: response = await client.get(search_url, params=params, timeout=10.0) response.raise_for_status() data = response.json() return data except httpx.HTTPStatusError as e: return {"error": f"MangaDex API error: {e.response.status_code}", "details": e.response.text} except Exception as e: print(f"Error fetching from MangaDex: {e}") return {"error": "Internal server error", "details": str(e)} return {"error": "end of function"} async def get_manga_chapters( manga_id: str, limit: int = 100, languages: list[str] = None, offset: int = 0, order_by: str = "chapter", order_direction: str = "desc", content_rating: list[str] = None, include_empty: int = 0 ): """ Get the chapters of a given manga id """ url = f"{BASE_URL}/manga/{manga_id}/feed" params = { "order[chapter]": "desc", "limit": limit, "offset": offset, "contentRating[]": content_rating, "includeEmptyPages": include_empty, f"order[{order_by}]": order_direction, "includes[]": ["scanlation_group"] } if languages: params["translatedLanguage[]"] = languages print(f"get_chapters called with params: {params}") url = f"https://api.mangadex.org/manga/{manga_id}/feed" async with httpx.AsyncClient() as client: try: response = await client.get( url, params=params, timeout=10.0 ) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: return {"error": f"MangaDex API error: {e.response.status_code}", "details": e.response.text} except Exception as e: return {"error": "Internal server error", "details": str(e)} # Cache up to 100 panels, each for 300 seconds (5 minutes) cache = TTLCache(maxsize=100, ttl=300) @cached(cache) def get_chapter_panel_urls(chapter_id: str, img_quality: str = "dataSaver"): """ Fetches the actual image URLs for a given MangaDex Chapter ID. """ print(f"[CACHE MISS] Fetching fresh URLs from MangaDex for: {chapter_id}") if img_quality not in ['data', 'dataSaver']: raise ValueError("img_quality must be 'data' or 'dataSaver'.") try: HEADERS = { "User-Agent": "Manglify (Capstone project) - https://github.com/TonyLiu2004/Multimodal-Manga-Translator" } r = requests.get(f"{BASE_URL}/at-home/server/{chapter_id}", headers=HEADERS, timeout=10) r.raise_for_status() data = r.json() # 2. Grab the base URL and the chapter-specific hash base_url = data["baseUrl"] chapter_hash = data["chapter"]["hash"] file_names = data["chapter"][img_quality] # "data" is high quality, "dataSaver" is compressed # 3. Construct the full URL for every page # Format: {baseUrl}/data/{hash}/{filename} # - the "data" section can be "data" or "data-saver" depending on img_quality url_point = "data" if img_quality == "dataSaver": url_point = "data-saver" page_urls = [f"{base_url}/{url_point}/{chapter_hash}/{name}" for name in file_names] return page_urls except requests.exceptions.RequestException as err: print(f"HTTP error occurred: {err}") return None ### testing # mangas = search_manga("", 15, 0, order={"followedCount": "desc"}) # print(mangas) # first_manga_id = mangas[0]['id'] # chapters = get_chapters(first_manga_id) # first_chapter_id = chapters[0]['id'] # first_chapter_panels = get_chapter_panel_urls(first_chapter_id, "dataSaver") # print(first_manga_id) # print(first_chapter_id) # print(first_chapter_panels)