Use fallback BPE if the language model doesn't have one (#6848)

Release Notes:

- Added a fallback BPE if the language model doesn't have one.

---------

Co-authored-by: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com>
Co-authored-by: Marshall Bowers <elliott.codes@gmail.com>
This commit is contained in:
Todsaporn Banjerdkit 2024-01-30 10:42:03 +07:00 committed by GitHub
parent 9cb5a84b8d
commit 1ab49fdbe6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 6 additions and 4 deletions

View file

@ -30,7 +30,7 @@ use crate::providers::open_ai::OpenAiLanguageModel;
use crate::providers::open_ai::OPEN_AI_API_URL;
lazy_static! {
static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap();
pub(crate) static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap();
}
#[derive(Clone)]

View file

@ -1,9 +1,10 @@
use anyhow::anyhow;
use tiktoken_rs::CoreBPE;
use util::ResultExt;
use crate::models::{LanguageModel, TruncationDirection};
use super::OPEN_AI_BPE_TOKENIZER;
#[derive(Clone)]
pub struct OpenAiLanguageModel {
name: String,
@ -12,10 +13,11 @@ pub struct OpenAiLanguageModel {
impl OpenAiLanguageModel {
pub fn load(model_name: &str) -> Self {
let bpe = tiktoken_rs::get_bpe_from_model(model_name).log_err();
let bpe =
tiktoken_rs::get_bpe_from_model(model_name).unwrap_or(OPEN_AI_BPE_TOKENIZER.to_owned());
OpenAiLanguageModel {
name: model_name.to_string(),
bpe,
bpe: Some(bpe),
}
}
}