From 6d9fa0e718e455792ecaa5728f2817be290e3321 Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Tue, 11 Jul 2023 14:14:38 +0200 Subject: [PATCH] Added exponential backoff on ratelimit error, also print the token usage in output. --- fjerkroa_bot/ai_responder.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/fjerkroa_bot/ai_responder.py b/fjerkroa_bot/ai_responder.py index 5e22aa4..faed325 100644 --- a/fjerkroa_bot/ai_responder.py +++ b/fjerkroa_bot/ai_responder.py @@ -1,4 +1,6 @@ import json +import asyncio +import random import multiline import openai import aiohttp @@ -31,6 +33,27 @@ def parse_json(content: str) -> Dict: raise err +def exponential_backoff(base=2, max_delay=60, factor=1, jitter=0.1): + """Generate sleep intervals for exponential backoff with jitter. + + Args: + base: Base of the exponentiation operation + max_delay: Maximum delay + factor: Multiplication factor for each increase in backoff + jitter: Additional randomness range to prevent thundering herd problem + + Yields: + Delay for backoff as a floating point number. + """ + attempt = 0 + while True: + sleep = min(max_delay, factor * base ** attempt) + jitter_amount = jitter * sleep + sleep += random.uniform(-jitter_amount, jitter_amount) + yield sleep + attempt += 1 + + def parse_maybe_json(json_string): if json_string is None: return None @@ -94,6 +117,7 @@ class AIResponder(object): self.history: List[Dict[str, Any]] = [] self.channel = channel if channel is not None else 'system' openai.api_key = self.config['openai-token'] + self.rate_limit_backoff = exponential_backoff() self.history_file: Optional[Path] = None if 'history-directory' in self.config: self.history_file = Path(self.config['history-directory']).expanduser() / f'{self.channel}.dat' @@ -182,7 +206,8 @@ class AIResponder(object): answer = result['choices'][0]['message'] if type(answer) != dict: answer = answer.to_dict() - logging.info(f"generated response: {repr(answer)}") + self.rate_limit_backoff = exponential_backoff() + logging.info(f"generated response {result.get('usage')}: {repr(answer)}") return answer, limit except openai.error.InvalidRequestError as err: if 'maximum context length is' in str(err) and limit > 4: @@ -190,6 +215,10 @@ class AIResponder(object): limit -= 1 return None, limit raise err + except openai.error.RateLimitError as err: + rate_limit_sleep = next(self.rate_limit_backoff) + logging.warning(f"got an rate limit error, sleep for {rate_limit_sleep} seconds: {str(err)}") + await asyncio.sleep(rate_limit_sleep) except Exception as err: logging.warning(f"failed to generate response: {repr(err)}") return None, limit