파이썬 flask와 BeautifulSoup를 이용한 네이버 인기 검색어 파싱(수정)


네이버 개편으로 인한 내용 변경

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import json
from functools import wraps

import requests
from bs4 import BeautifulSoup
from flask import Flask, Response
from urllib3 import request

app = Flask(__name__)

@app.route('/')
def hello_world():
return 'Hello World!'


def as_json(f):
@wraps(f)
def decorated_function(*args, **kwargs):
res = f(*args, **kwargs)
res = json.dumps(res, ensure_ascii=False).encode('utf8')
return Response(res, content_type='application/json; charset=utf-8')

return decorated_function

@app.route('/hotkeyword', methods=['GET', 'POST'])
@as_json
def keyword_json():
url = 'http://www.naver.com'
source_code = requests.get(url)
plain_text = source_code.text

soup = BeautifulSoup(plain_text, 'lxml')
rank = soup.find_all("div", {"class": "PM_CL_realtimeKeyword_rolling"})

retSet = rank[0].find_all("span", {"class": "ah_k"})

ret = []

for i in range(len(retSet)):
t = retSet[i].string
ret.append({"rank_id": i + 1, "rank_text": t})

return ret


if __name__ == '__main__':
app.run(host='0.0.0.0')

이전 코드

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import json
from functools import wraps

import requests
from bs4 import BeautifulSoup
from flask import Flask, Response
from urllib3 import request

app = Flask(__name__)


@app.route('/')
def hello_world():
return 'Hello World!'


def as_json(f):
@wraps(f)
def decorated_function(*args, **kwargs):
res = f(*args, **kwargs)
res = json.dumps(res, ensure_ascii=False).encode('utf8')
return Response(res, content_type='application/json; charset=utf-8')

return decorated_function


@app.route('/hotkeyword', methods=['GET', 'POST'])
@as_json
def keyword_json():
url = 'http://www.naver.com'
source_code = requests.get(url)
plain_text = source_code.text

soup = BeautifulSoup(plain_text, 'lxml')
rank = soup.find(id="realrank").find_all('a')

ret = []

for i in range(10):
t = rank[i].attrs['title']
ret.append({"rank_id": i + 1, "rank_text": t})

return ret


if __name__ == '__main__':
app.run(host='0.0.0.0')