并发编程学习笔记 基于Python( 三 )


P10 在Flask服务中使用进程池加速
import flaskimport mathimport jsonfrom concurrent.futures import ProcessPoolExecutorprocess_pool = ProcessPoolExecutor()app = flask.Flask(__name__)# PRIMRS = [112272535095293] * 100def is_prime(n):if n < 2:return Falseif n == 2:return Trueif n % 2 == 0:return Falsesqrt_n = int(math.floor(math.sqrt(n)))for i in range(3, sqrt_n + 1, 2):if n % i == 0:return Falsereturn True# 接口@app.route("/is_prime/")def api_is_prime(numbers):number_list = [int(x) for x in numbers.split(",")]results = process_pool.map(is_prime, number_list)return json.dumps(dict(zip(number_list, results)))if __name__ == "__main__":process_pool = ProcessPoolExecutor()app.run()
P11
import asyncioimport aiohttpimport blog_spiderimport timeasync def async_craw(url):print("craw url:", url)async with aiohttp.ClientSession() as session:async with session.get(url) as resp:result = await resp.text()print(f"craw url:{url}, {len(result)}")loop = asyncio.get_event_loop()tasks = [loop.create_task(async_craw(url))for url in blog_spider.urls]start = time.time()loop.run_until_complete(asyncio.wait(tasks))end = time.time()print("spend time:", end - start, "sec")
P12 在异步IO中使用信号量使用信号量控制爬虫并发度
import asyncioimport aiohttpimport blog_spiderimport timesemaphore = asyncio.Semaphore(1)async def async_craw(url):async with semaphore:print("craw url:", url)async with aiohttp.ClientSession() as session:async with session.get(url) as resp:result = await resp.text()print(f"craw url:{url}, {len(result)}")loop = asyncio.get_event_loop()tasks = [loop.create_task(async_craw(url))for url in blog_spider.urls]start = time.time()loop.run_until_complete(asyncio.wait(tasks))end = time.time()print("spend time:", end - start, "sec")