某查查爬取统一社会信用代码
0x01 First Glance


0x02 Debug











0x03 Robot Build
Last updated













Last updated
var codes = ["W", "l", "k", "B", "Q", "g", "f", "i", "i", "r", "v", "6", "A", "K", "N", "k", "4", "L", "1", "8"];
var r = function () {
for (var e = (arguments.length > 0 && void 0 !== arguments[0] ? arguments[0] : "/").toLowerCase(), t = e + e, n = "", i = 0; i < t.length; ++i) {
var a = t[i].charCodeAt() % 20;
n += codes[a]
}
return n
};import hmac
import hashlib
res = hmac.new(salt.encode(), data.encode(), digestmod=hashlib.sha512).hexdigest()import execjs
import hmac
import hashlib
import json
import requests
import datetime
url = 'https://xxx/api/search/searchMulti'
today = datetime.datetime.now()
proxy = {
'https': '127.0.0.1:8080'
}
with open('foo.js', 'r') as f:
context = execjs.compile(f.read())
suffix = "6554a513cb0931121c07fa9e8da5968d"
path = "/api/search/searchmulti"
salt = context.call('r', path).encode()
credit_code = open('credit.txt', 'w')
for i in range(10000):
for page in range(2):
day = (today + datetime.timedelta(days=-15 - i)).strftime("%Y%m%d") # 网站不支持查询近15天成立的企业
param = {
'd': [{'start': day, 'end': day, 'value': f'{day}-{day}', 'x': True}],
'r': [{'pr': 'GD', 'cc': [440100]}]
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
'Cookie': 'xxxx',
'Content-Type': 'application/json'
}
obj = {
"searchKey": "餐饮",
"pageIndex": page + 1,
"pageSize": 20,
"filter": json.dumps(param, separators=(',', ':')) # json.dumps会自动添加多余的空格,separators可以解决
}
obj_str = json.dumps(obj, separators=(',', ':'), ensure_ascii=False) # ensure_ascii=False才不会导致中文被unicode编码
_key = hmac.new(salt, (path + obj_str).lower().encode(), digestmod=hashlib.sha512).hexdigest()[8: 28]
arg = path + "pathString" + obj_str.lower() + suffix
_val = hmac.new(salt, arg.encode(), digestmod=hashlib.sha512).hexdigest()
headers[_key] = _val
try:
res = requests.post(url=url, headers=headers, data=json.dumps(obj, ensure_ascii=False).encode('utf-8'),
proxies=proxy,
verify=False).text
res_text = json.loads(res)
for item in res_text['Result']:
code = item['CreditCode']
print(code)
credit_code.write(code + '\n')
except:
continue