Stop sending data to Clickhouse (#21763)

Release Notes:

- N/A
This commit is contained in:
Conrad Irwin 2024-12-10 08:47:29 -07:00 committed by GitHub
parent 43ba0c9fa6
commit 03efd0d1d9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 18 additions and 1280 deletions

88
Cargo.lock generated
View file

@ -2475,49 +2475,6 @@ dependencies = [
"util",
]
[[package]]
name = "clickhouse"
version = "0.11.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0875e527e299fc5f4faba42870bf199a39ab0bb2dbba1b8aef0a2151451130f"
dependencies = [
"bstr",
"bytes 1.8.0",
"clickhouse-derive",
"clickhouse-rs-cityhash-sys",
"futures 0.3.31",
"hyper 0.14.31",
"hyper-tls",
"lz4",
"sealed",
"serde",
"static_assertions",
"thiserror 1.0.69",
"tokio",
"url",
]
[[package]]
name = "clickhouse-derive"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18af5425854858c507eec70f7deb4d5d8cec4216fcb086283a78872387281ea5"
dependencies = [
"proc-macro2",
"quote",
"serde_derive_internals 0.26.0",
"syn 1.0.109",
]
[[package]]
name = "clickhouse-rs-cityhash-sys"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4baf9d4700a28d6cb600e17ed6ae2b43298a5245f1f76b4eab63027ebfd592b9"
dependencies = [
"cc",
]
[[package]]
name = "client"
version = "0.1.0"
@ -2668,7 +2625,6 @@ dependencies = [
"call",
"channel",
"chrono",
"clickhouse",
"client",
"clock",
"collab_ui",
@ -7336,25 +7292,6 @@ dependencies = [
"url",
]
[[package]]
name = "lz4"
version = "1.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d1febb2b4a79ddd1980eede06a8f7902197960aa0383ffcfdd62fe723036725"
dependencies = [
"lz4-sys",
]
[[package]]
name = "lz4-sys"
version = "1.11.1+lz4-1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "mac"
version = "0.1.1"
@ -11034,7 +10971,7 @@ checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e"
dependencies = [
"proc-macro2",
"quote",
"serde_derive_internals 0.29.1",
"serde_derive_internals",
"syn 2.0.87",
]
@ -11171,18 +11108,6 @@ version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]]
name = "sealed"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b5e421024b5e5edfbaa8e60ecf90bda9dbffc602dbb230e6028763f85f0c68c"
dependencies = [
"heck 0.3.3",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "search"
version = "0.1.0"
@ -11330,17 +11255,6 @@ dependencies = [
"syn 2.0.87",
]
[[package]]
name = "serde_derive_internals"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85bf8229e7920a9f636479437026331ce11aa132b4dde37d121944a44d6e5f3c"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "serde_derive_internals"
version = "0.29.1"

View file

@ -360,7 +360,6 @@ cargo_metadata = "0.19"
cargo_toml = "0.20"
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.4", features = ["derive"] }
clickhouse = "0.11.6"
cocoa = "0.26"
cocoa-foundation = "0.2.0"
convert_case = "0.6.0"

View file

@ -19,11 +19,6 @@ LLM_DATABASE_URL = "postgres://postgres@localhost/zed_llm"
LLM_DATABASE_MAX_CONNECTIONS = 5
LLM_API_SECRET = "llm-secret"
# CLICKHOUSE_URL = ""
# CLICKHOUSE_USER = "default"
# CLICKHOUSE_PASSWORD = ""
# CLICKHOUSE_DATABASE = "default"
# SLACK_PANICS_WEBHOOK = ""
# RUST_LOG=info

View file

@ -29,7 +29,6 @@ axum = { version = "0.6", features = ["json", "headers", "ws"] }
axum-extra = { version = "0.4", features = ["erased-json"] }
base64.workspace = true
chrono.workspace = true
clickhouse.workspace = true
clock.workspace = true
collections.workspace = true
dashmap.workspace = true

View file

@ -214,26 +214,6 @@ spec:
secretKeyRef:
name: blob-store
key: bucket
- name: CLICKHOUSE_URL
valueFrom:
secretKeyRef:
name: clickhouse
key: url
- name: CLICKHOUSE_USER
valueFrom:
secretKeyRef:
name: clickhouse
key: user
- name: CLICKHOUSE_PASSWORD
valueFrom:
secretKeyRef:
name: clickhouse
key: password
- name: CLICKHOUSE_DATABASE
valueFrom:
secretKeyRef:
name: clickhouse
key: database
- name: SLACK_PANICS_WEBHOOK
valueFrom:
secretKeyRef:

File diff suppressed because it is too large Load diff

View file

@ -1,28 +0,0 @@
use serde::Serialize;
/// Writes the given rows to the specified Clickhouse table.
pub async fn write_to_table<T: clickhouse::Row + Serialize + std::fmt::Debug>(
table: &str,
rows: &[T],
clickhouse_client: &clickhouse::Client,
) -> anyhow::Result<()> {
if rows.is_empty() {
return Ok(());
}
let mut insert = clickhouse_client.insert(table)?;
for event in rows {
insert.write(event).await?;
}
insert.end().await?;
let event_count = rows.len();
log::info!(
"wrote {event_count} {event_specifier} to '{table}'",
event_specifier = if event_count == 1 { "event" } else { "events" }
);
Ok(())
}

View file

@ -1,7 +1,6 @@
pub mod api;
pub mod auth;
mod cents;
pub mod clickhouse;
pub mod db;
pub mod env;
pub mod executor;
@ -151,10 +150,6 @@ pub struct Config {
pub seed_path: Option<PathBuf>,
pub database_max_connections: u32,
pub api_token: String,
pub clickhouse_url: Option<String>,
pub clickhouse_user: Option<String>,
pub clickhouse_password: Option<String>,
pub clickhouse_database: Option<String>,
pub invite_link_prefix: String,
pub livekit_server: Option<String>,
pub livekit_key: Option<String>,
@ -236,10 +231,6 @@ impl Config {
prediction_api_url: None,
prediction_api_key: None,
prediction_model: None,
clickhouse_url: None,
clickhouse_user: None,
clickhouse_password: None,
clickhouse_database: None,
zed_client_checksum_seed: None,
slack_panics_webhook: None,
auto_join_channel_id: None,
@ -289,7 +280,6 @@ pub struct AppState {
pub stripe_billing: Option<Arc<StripeBilling>>,
pub rate_limiter: Arc<RateLimiter>,
pub executor: Executor,
pub clickhouse_client: Option<::clickhouse::Client>,
pub kinesis_client: Option<::aws_sdk_kinesis::Client>,
pub config: Config,
}
@ -343,10 +333,6 @@ impl AppState {
stripe_client,
rate_limiter: Arc::new(RateLimiter::new(db)),
executor,
clickhouse_client: config
.clickhouse_url
.as_ref()
.and_then(|_| build_clickhouse_client(&config).log_err()),
kinesis_client: if config.kinesis_access_key.is_some() {
build_kinesis_client(&config).await.log_err()
} else {
@ -429,31 +415,3 @@ async fn build_kinesis_client(config: &Config) -> anyhow::Result<aws_sdk_kinesis
Ok(aws_sdk_kinesis::Client::new(&kinesis_config))
}
fn build_clickhouse_client(config: &Config) -> anyhow::Result<::clickhouse::Client> {
Ok(::clickhouse::Client::default()
.with_url(
config
.clickhouse_url
.as_ref()
.ok_or_else(|| anyhow!("missing clickhouse_url"))?,
)
.with_user(
config
.clickhouse_user
.as_ref()
.ok_or_else(|| anyhow!("missing clickhouse_user"))?,
)
.with_password(
config
.clickhouse_password
.as_ref()
.ok_or_else(|| anyhow!("missing clickhouse_password"))?,
)
.with_database(
config
.clickhouse_database
.as_ref()
.ok_or_else(|| anyhow!("missing clickhouse_database"))?,
))
}

View file

@ -1,14 +1,11 @@
mod authorization;
pub mod db;
mod telemetry;
mod token;
use crate::api::events::SnowflakeRow;
use crate::api::CloudflareIpCountryHeader;
use crate::build_kinesis_client;
use crate::{
build_clickhouse_client, db::UserId, executor::Executor, Cents, Config, Error, Result,
};
use crate::{db::UserId, executor::Executor, Cents, Config, Error, Result};
use anyhow::{anyhow, Context as _};
use authorization::authorize_access_to_language_model;
use axum::routing::get;
@ -40,7 +37,6 @@ use std::{
task::{Context, Poll},
};
use strum::IntoEnumIterator;
use telemetry::{report_llm_rate_limit, report_llm_usage, LlmRateLimitEventRow, LlmUsageEventRow};
use tokio::sync::RwLock;
use util::ResultExt;
@ -52,7 +48,6 @@ pub struct LlmState {
pub db: Arc<LlmDatabase>,
pub http_client: ReqwestClient,
pub kinesis_client: Option<aws_sdk_kinesis::Client>,
pub clickhouse_client: Option<clickhouse::Client>,
active_user_count_by_model:
RwLock<HashMap<(LanguageModelProvider, String), (DateTime<Utc>, ActiveUserCount)>>,
}
@ -89,10 +84,6 @@ impl LlmState {
} else {
None
},
clickhouse_client: config
.clickhouse_url
.as_ref()
.and_then(|_| build_clickhouse_client(&config).log_err()),
active_user_count_by_model: RwLock::new(HashMap::default()),
config,
};
@ -630,34 +621,6 @@ async fn check_usage_limit(
.await
.log_err();
if let Some(client) = state.clickhouse_client.as_ref() {
report_llm_rate_limit(
client,
LlmRateLimitEventRow {
time: Utc::now().timestamp_millis(),
user_id: claims.user_id as i32,
is_staff: claims.is_staff,
plan: match claims.plan {
Plan::Free => "free".to_string(),
Plan::ZedPro => "zed_pro".to_string(),
},
model: model.name.clone(),
provider: provider.to_string(),
usage_measure: resource.to_string(),
requests_this_minute: usage.requests_this_minute as u64,
tokens_this_minute: usage.tokens_this_minute as u64,
tokens_this_day: usage.tokens_this_day as u64,
users_in_recent_minutes: users_in_recent_minutes as u64,
users_in_recent_days: users_in_recent_days as u64,
max_requests_per_minute: per_user_max_requests_per_minute as u64,
max_tokens_per_minute: per_user_max_tokens_per_minute as u64,
max_tokens_per_day: per_user_max_tokens_per_day as u64,
},
)
.await
.log_err();
}
return Err(Error::http(
StatusCode::TOO_MANY_REQUESTS,
format!("Rate limit exceeded. Maximum {} reached.", resource),
@ -765,44 +728,6 @@ impl<S> Drop for TokenCountingStream<S> {
.write(&state.kinesis_client, &state.config.kinesis_stream)
.await
.log_err();
if let Some(clickhouse_client) = state.clickhouse_client.as_ref() {
report_llm_usage(
clickhouse_client,
LlmUsageEventRow {
time: Utc::now().timestamp_millis(),
user_id: claims.user_id as i32,
is_staff: claims.is_staff,
plan: match claims.plan {
Plan::Free => "free".to_string(),
Plan::ZedPro => "zed_pro".to_string(),
},
model,
provider: provider.to_string(),
input_token_count: tokens.input as u64,
cache_creation_input_token_count: tokens.input_cache_creation as u64,
cache_read_input_token_count: tokens.input_cache_read as u64,
output_token_count: tokens.output as u64,
requests_this_minute: usage.requests_this_minute as u64,
tokens_this_minute: usage.tokens_this_minute as u64,
tokens_this_day: usage.tokens_this_day as u64,
input_tokens_this_month: usage.tokens_this_month.input as u64,
cache_creation_input_tokens_this_month: usage
.tokens_this_month
.input_cache_creation
as u64,
cache_read_input_tokens_this_month: usage
.tokens_this_month
.input_cache_read
as u64,
output_tokens_this_month: usage.tokens_this_month.output as u64,
spending_this_month: usage.spending_this_month.0 as u64,
lifetime_spending: usage.lifetime_spending.0 as u64,
},
)
.await
.log_err();
}
}
})
}

View file

@ -1,65 +0,0 @@
use anyhow::{Context, Result};
use serde::Serialize;
use crate::clickhouse::write_to_table;
#[derive(Serialize, Debug, clickhouse::Row)]
pub struct LlmUsageEventRow {
pub time: i64,
pub user_id: i32,
pub is_staff: bool,
pub plan: String,
pub model: String,
pub provider: String,
pub input_token_count: u64,
pub cache_creation_input_token_count: u64,
pub cache_read_input_token_count: u64,
pub output_token_count: u64,
pub requests_this_minute: u64,
pub tokens_this_minute: u64,
pub tokens_this_day: u64,
pub input_tokens_this_month: u64,
pub cache_creation_input_tokens_this_month: u64,
pub cache_read_input_tokens_this_month: u64,
pub output_tokens_this_month: u64,
pub spending_this_month: u64,
pub lifetime_spending: u64,
}
#[derive(Serialize, Debug, clickhouse::Row)]
pub struct LlmRateLimitEventRow {
pub time: i64,
pub user_id: i32,
pub is_staff: bool,
pub plan: String,
pub model: String,
pub provider: String,
pub usage_measure: String,
pub requests_this_minute: u64,
pub tokens_this_minute: u64,
pub tokens_this_day: u64,
pub users_in_recent_minutes: u64,
pub users_in_recent_days: u64,
pub max_requests_per_minute: u64,
pub max_tokens_per_minute: u64,
pub max_tokens_per_day: u64,
}
pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {
const LLM_USAGE_EVENTS_TABLE: &str = "llm_usage_events";
write_to_table(LLM_USAGE_EVENTS_TABLE, &[row], client)
.await
.with_context(|| format!("failed to upload to table '{LLM_USAGE_EVENTS_TABLE}'"))?;
Ok(())
}
pub async fn report_llm_rate_limit(
client: &clickhouse::Client,
row: LlmRateLimitEventRow,
) -> Result<()> {
const LLM_RATE_LIMIT_EVENTS_TABLE: &str = "llm_rate_limit_events";
write_to_table(LLM_RATE_LIMIT_EVENTS_TABLE, &[row], client)
.await
.with_context(|| format!("failed to upload to table '{LLM_RATE_LIMIT_EVENTS_TABLE}'"))?;
Ok(())
}

View file

@ -518,7 +518,6 @@ impl TestServer {
stripe_billing: None,
rate_limiter: Arc::new(RateLimiter::new(test_db.db().clone())),
executor,
clickhouse_client: None,
kinesis_client: None,
config: Config {
http_port: 0,
@ -549,10 +548,6 @@ impl TestServer {
prediction_api_url: None,
prediction_api_key: None,
prediction_model: None,
clickhouse_url: None,
clickhouse_user: None,
clickhouse_password: None,
clickhouse_database: None,
zed_client_checksum_seed: None,
slack_panics_webhook: None,
auto_join_channel_id: None,

View file

@ -22,8 +22,8 @@ The telemetry settings can also be configured via the welcome screen, which can
Telemetry is sent from the application to our servers. Data is proxied through our servers to enable us to easily switch analytics services. We currently use:
- [Axiom](https://axiom.co): Cloud-monitoring service - stores diagnostic events
- [Clickhouse](https://clickhouse.com): Business Intelligence platform - stores both diagnostic and metric events
- [Metabase](https://www.metabase.com): Dashboards - dashboards built around data pulled from Clickhouse
- [Snowflake](https://snowflake.com): Business Intelligence platform - stores both diagnostic and metric events
- [Metabase](https://www.metabase.com): Dashboards - dashboards built around data pulled from Snowflake
## Types of Telemetry

View file

@ -10,13 +10,14 @@ This page provides information about the Subprocessors Zed has engaged to provid
| Cloudflare | Cloud Infrastructure | Worldwide |
| Vercel | Cloud Infrastructure | United States |
| DigitalOcean | Cloud Infrastructure | United States |
| AWS | Cloud Infrastructure | United States |
| ConvertKit | Email Marketing | United States |
| Axiom | Analytics | United States |
| ClickHouse | Analytics | United States |
| Snowflake | Analytics | United States |
| Metabase | Analytics | United States |
| GitHub | Authentication | United States |
| LiveKit | Audio Conferencing | United States |
| Anthropic | AI Services | United States |
| OpenAI | AI Services | United States |
**DATE: August 19, 2024**
**DATE: December 9, 2024**

View file

@ -54,9 +54,6 @@ extend-ignore-re = [
'"ba"',
# :/ crates/collab/migrations/20231009181554_add_release_channel_to_rooms.sql
"COLUMN enviroment",
# Typo in ClickHouse column name.
# crates/collab/src/api/events.rs
"rename = \"sesssion_id\"",
"doas",
# ProtoLS crate with tree-sitter Protobuf grammar.
"protols",