"Python 重试模块 retrying 工作中经常碰到的问题就是，某个方法出现了异常，重试几次。循环重复一个方法是很常见的。比如爬虫中的获取代理，对获取失败的情况进行重试。刚开始搜的几个博客讲的有点问题，建议看官方文档，还有自己动手实验。参考： [链接] [链接] 最初的版本 Copyimpo ...."

someone1
AI专业者开发社区 1 号成员
AI开发者社区 • 0 回帖 • 37 浏览 • 5 个月前

Python重试模块retrying

Python 重试模块 retrying

工作中经常碰到的问题就是，某个方法出现了异常，重试几次。循环重复一个方法是很常见的。比如爬虫中的获取代理，对获取失败的情况进行重试。
刚开始搜的几个博客讲的有点问题，建议看官方文档，还有自己动手实验。

参考：
https://segmentfault.com/a/1190000004085023
https://pypi.org/project/retrying/

最初的版本

Copy
import requests
class ProxyUtil:
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
    self._get_proxy_count = <span class="hljs-number">0</span>

<span class="hljs-keyword">def</span> <span class="hljs-title function_">get_proxies</span>(<span class="hljs-params">self</span>):
    <span class="hljs-keyword">try</span>:
        r = requests.get(<span class="hljs-string">'代理服务器地址'</span>)
        <span class="hljs-comment"># print('正在获取')</span>
        <span class="hljs-comment"># raise Exception("异常")</span>
        <span class="hljs-comment"># print('获取到最新代理 = %s' % r.text)</span>
        params = <span class="hljs-built_in">dict</span>()
        <span class="hljs-keyword">if</span> r <span class="hljs-keyword">and</span> r.status_code == <span class="hljs-number">200</span>:
            proxy = <span class="hljs-built_in">str</span>(r.content, encoding=<span class="hljs-string">'utf-8'</span>)
            params[<span class="hljs-string">'http'</span>] = <span class="hljs-string">'http://'</span> + proxy
            params[<span class="hljs-string">'https'</span>] = <span class="hljs-string">'https://'</span> + proxy
        <span class="hljs-keyword">else</span>:
            <span class="hljs-keyword">raise</span> Exception(<span class="hljs-string">"获取代理失败,状态码%s"</span>%(r.status_code))

        <span class="hljs-keyword">return</span> params
    <span class="hljs-keyword">except</span> Exception:
        <span class="hljs-keyword">if</span> self._get_proxy_count &lt; <span class="hljs-number">5</span>:
            <span class="hljs-built_in">print</span>(<span class="hljs-string">'第%d次获取代理失败，准备重试'</span> % self._get_proxy_count)
            self._get_proxy_count += <span class="hljs-number">1</span>
            self.get_proxies()
        <span class="hljs-keyword">else</span>:
            <span class="hljs-built_in">print</span>(<span class="hljs-string">'第%d次获取代理失败，退出'</span> % self._get_proxy_count)
            self._get_proxy_count = <span class="hljs-number">0</span>
            <span class="hljs-keyword">return</span> <span class="hljs-built_in">dict</span>()

if name == 'main':
proxy = ProxyUtil()
proxy.get_proxies()

以上代码通过try...except...捕获异常，并通过一个计数器判断获取代理的次数，获取失败递归调用自己，直到达到最大次数为止。
为了模拟失败，可以解开抛出异常的注释

下面来试试 retrying 模块
安装
pip install retrying

retrying 提供一个装饰器函数 retry，被装饰的函数会在运行失败的情况下重新执行，默认一直报错就一直重试。

Copy
import requests
from retrying import retry
class ProxyUtil:
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
    self._get_proxy_count = <span class="hljs-number">0</span>

    @retry
def get_proxies(self):
    r = requests.get(<span class="hljs-string">'代理地址'</span>)
    <span class="hljs-built_in">print</span>(<span class="hljs-string">'正在获取'</span>)
    <span class="hljs-keyword">raise</span> Exception(<span class="hljs-string">"异常"</span>)
    <span class="hljs-built_in">print</span>(<span class="hljs-string">'获取到最新代理 = %s'</span> % r.text)
    params = <span class="hljs-built_in">dict</span>()
    <span class="hljs-keyword">if</span> r <span class="hljs-keyword">and</span> r.status_code == <span class="hljs-number">200</span>:
        proxy = <span class="hljs-built_in">str</span>(r.content, encoding=<span class="hljs-string">'utf-8'</span>)
        params[<span class="hljs-string">'http'</span>] = <span class="hljs-string">'http://'</span> + proxy
        params[<span class="hljs-string">'https'</span>] = <span class="hljs-string">'https://'</span> + proxy

if name == 'main':
proxy = ProxyUtil()
proxy.get_proxies()

结果：

正在获取
正在获取
正在获取
...
正在获取 (一直重复下去)
没有添加任何参数，默认情况下会一直重试，没有等待时间

Copy
# 设置最大重试次数
@retry(stop_max_attempt_number=5)
def get_proxies(self):
    r = requests.get('代理地址')
    print('正在获取')
    raise Exception("异常")
    print('获取到最新代理 = %s' % r.text)
    params = dict()
    if r and r.status_code == 200:
        proxy = str(r.content, encoding='utf-8')
        params['http'] = 'http://' + proxy
        params['https'] = 'https://' + proxy

Copy
# 设置方法的最大延迟时间，默认为 100 毫秒 (是执行这个方法重试的总时间)
@retry(stop_max_attempt_number=5,stop_max_delay=50)
# 通过设置为 50，我们会发现，任务并没有执行 5 次才结束！

Copy
# 添加每次方法执行之间的等待时间
@retry(stop_max_attempt_number=5,wait_fixed=2000)
# 随机的等待时间
@retry(stop_max_attempt_number=5,wait_random_min=100,wait_random_max=2000)
# 每调用一次增加固定时长
@retry(stop_max_attempt_number=5,wait_incrementing_increment=1000)

Copy
# 根据异常重试，先看个简单的例子
def retry_if_io_error(exception):
    return isinstance(exception, IOError)
@retry(retry_on_exception=retry_if_io_error)
def read_a_file():
with open("file", "r") as f:
return f.read()

read_a_file函数如果抛出了异常，会去retry_on_exception指向的函数去判断返回的是True还是False，如果是True则运行指定的重试次数后，抛出异常，False的话直接抛出异常。
当时自己测试的时候网上一大堆抄来抄去的，意思是retry_on_exception指定一个函数，函数返回指定异常，会重试，不是异常会退出。真坑人啊！
来看看获取代理的应用 (仅仅是为了测试 retrying 模块)

Copy
# 定义一个函数用于判断返回的是否是 IOError
def wraper(args):
    return isinstance(args,IOError)
class ProxyUtil:
def get_proxies(self):
r = requests.get('http://47.98.163.40:17000/get?country=local')
print('正在获取')
raise IOError
# raise IndexError
print('获取到最新代理 = %s' % r.text)
params = dict()
if r and r.status_code == 200:
proxy = str(r.content, encoding='utf-8')
params['http'] = 'http://' + proxy
params['https'] = 'https://' + proxy
<span class="hljs-comment"># @retry_handler(retry_time=2, retry_interval=5, retry_on_exception=[IOError,IndexError])</span>

    @retry(stop_max_attempt_number=5,retry_on_exception=wraper)
def retry_test(self):
self.get_proxies()
print('io')

这种方法只能判断单一的异常，而且扩展性不够高

Copy
# 通过返回值判断是否重试
    def retry_if_result_none(result):
        """Return True if we should retry (in this case when result is None), False otherwise"""
        # return result is None
        if result =="111":
            return True
    @retry(stop_max_attempt_number=5,retry_on_result=retry_if_result_none)
def might_return_none():
print("Retry forever ignoring Exceptions with no wait if return value is None")
return "111"
might_return_none()

might_return_none函数的返回值传递给retry_if_result_none的result，通过判断 result, 返回Treu或者None表示需要重试，重试结束后抛出RetryError，返回False表示不重试。
扩展默认的 retry 装饰器：

Copy
def retry_handler(retry_time: int, retry_interval: float, retry_on_exception: [BaseException], *args, **kwargs):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">is_exception</span>(<span class="hljs-params">exception: [BaseException]</span>):
    <span class="hljs-keyword">for</span> exp <span class="hljs-keyword">in</span> retry_on_exception:
        <span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(exception,exp):
            <span class="hljs-keyword">return</span> <span class="hljs-literal">True</span>
    <span class="hljs-keyword">return</span> <span class="hljs-literal">False</span>
    <span class="hljs-comment"># return isinstance(exception, retry_on_exception)</span>

<span class="hljs-keyword">def</span> <span class="hljs-title function_">_retry</span>(<span class="hljs-params">*args, **kwargs</span>):
    <span class="hljs-keyword">return</span> Retrying(wait_fixed=retry_interval * <span class="hljs-number">1000</span>).fixed_sleep(*args, **kwargs)

<span class="hljs-keyword">return</span> retry(
    wait_func=_retry,
    stop_max_attempt_number=retry_time,
    retry_on_exception=is_exception
)

class ProxyUtil:
def get_proxies(self):
r = requests.get('代理地址')
print('正在获取')
raise IOError
# raise IndexError
print('获取到最新代理 = %s' % r.text)
params = dict()
if r and r.status_code == 200:
proxy = str(r.content, encoding='utf-8')
params['http'] = 'http://' + proxy
params['https'] = 'https://' + proxy
    @retry_handler(retry_time=2, retry_interval=5, retry_on_exception=[IOError,IndexError])
# @retry(stop_max_attempt_number=5,retry_on_exception=wraper)
def retry_test(self):
self.get_proxies()
print('io')
if name == 'main':
proxy = ProxyUtil()
proxy.retry_test()

Python重试模块retrying

Python 重试模块 retrying

相关帖子

随便看看

Python重试模块retrying