Files
wiki_crawler/backend/utils/common.py
2026-01-20 01:51:39 +08:00

29 lines
1.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from urllib.parse import urlparse, urlunparse
def make_response(code: int, msg: str = "Success", data: any = None):
"""统一 API 响应格式封装"""
return {"code": code, "msg": msg, "data": data}
def normalize_url(url: str) -> str:
"""
URL 标准化处理
1. 去除首尾空格
2. 移除 fragment (#后面的内容)
3. 移除 query 参数 (视业务需求而定,这里假设不同 query 是同一页面)
4. 移除尾部斜杠
示例:
"https://www.example.com/path/" -> "https://www.example.com/path"
"https://www.example.com/path?query=1" -> "https://www.example.com/path"
"""
if not url:
return ""
parsed = urlparse(url.strip())
# 重新组合scheme, netloc, path, params, query, fragment
# 这里我们只保留 scheme, netloc, path
clean_path = parsed.path.rstrip('/')
# 构造新的 parsed 对象 (param, query, fragment 置空)
new_parsed = parsed._replace(path=clean_path, params='', query='', fragment='')
return urlunparse(new_parsed)