Files
wiki_crawler/backend/utils/common.py

29 lines
1.0 KiB
Python
Raw Normal View History

2026-01-13 01:37:26 +08:00
from urllib.parse import urlparse, urlunparse
def make_response(code: int, msg: str = "Success", data: any = None):
"""统一 API 响应格式封装"""
return {"code": code, "msg": msg, "data": data}
def normalize_url(url: str) -> str:
"""
URL 标准化处理
1. 去除首尾空格
2. 移除 fragment (#后面的内容)
3. 移除 query 参数 (视业务需求而定这里假设不同 query 是同一页面)
4. 移除尾部斜杠
2026-01-20 01:51:39 +08:00
示例:
"https://www.example.com/path/" -> "https://www.example.com/path"
"https://www.example.com/path?query=1" -> "https://www.example.com/path"
2026-01-13 01:37:26 +08:00
"""
if not url:
return ""
parsed = urlparse(url.strip())
# 重新组合scheme, netloc, path, params, query, fragment
# 这里我们只保留 scheme, netloc, path
clean_path = parsed.path.rstrip('/')
# 构造新的 parsed 对象 (param, query, fragment 置空)
new_parsed = parsed._replace(path=clean_path, params='', query='', fragment='')
return urlunparse(new_parsed)