角度编码转移状态 https://angular.io/api/platform-browser/TransferState使用位于的特殊转义函数here https://github.com/angular/angular/blob/61bfa3d9dfc7c9daecde098aca595b731c3312a0/packages/platform-browser/src/browser/transfer_state.ts#L12-L32:
export function escapeHtml(text: string): string {
const escapedText: {[k: string]: string} = {
'&': '&a;',
'"': '&q;',
'\'': '&s;',
'<': '&l;',
'>': '&g;',
};
return text.replace(/[&"'<>]/g, s => escapedText[s]);
}
export function unescapeHtml(text: string): string {
const unescapedText: {[k: string]: string} = {
'&a;': '&',
'&q;': '"',
'&s;': '\'',
'&l;': '<',
'&g;': '>',
};
return text.replace(/&[^;]+;/g, s => unescapedText[s]);
}
您可以重现unescapeHtml
python 中的函数,并添加html.unescape
解析额外的 html 实体:
import json
import requests
from bs4 import BeautifulSoup
import html
unescapedText = {
'&a;': '&',
'&q;': '"',
'&s;': '\'',
'&l;': '<',
'&g;': '>',
}
def unescape(str):
for key, value in unescapedText.items():
str = str.replace(key, value)
return html.unescape(str)
url = "https://www.londonstockexchange.com/news-article/ESNT/date-for-fy-2020-results-announcement/14850033"
script = BeautifulSoup(requests.get(url).text, "lxml").find("script", {
"id": "ng-lseg-state"
})
payload = json.loads(unescape(script.string))
main_key = "G.{{api_endpoint}}/api/v1/pages?parameters=newsId%3D14850033&path=news-article"
article_body = payload[main_key]["body"]["components"][1]["content"]["newsArticle"]["value"]
print(BeautifulSoup(article_body, "lxml").find_all("p"))
你失踪了&s;
and &a;
复制它:https://replit.com/@bertrandmartel/AngularTransferStateDecode https://replit.com/@bertrandmartel/AngularTransferStateDecode