Implement panic reporting saving and uploads (#19932)

TODO: 
- [x] check that the app version is well formatted for zed.dev

Release Notes:

- N/A

---------

Co-authored-by: Trace <violet.white.batt@gmail.com>
This commit is contained in:
Mikayla Maki 2024-10-29 23:54:00 -07:00 committed by GitHub
parent a5f52f0f04
commit b3f0ba1430
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 278 additions and 45 deletions

5
Cargo.lock generated
View file

@ -9553,6 +9553,7 @@ dependencies = [
"async-watch",
"backtrace",
"cargo_toml",
"chrono",
"clap",
"client",
"clock",
@ -9572,6 +9573,8 @@ dependencies = [
"node_runtime",
"paths",
"project",
"proto",
"release_channel",
"remote",
"reqwest_client",
"rpc",
@ -9581,6 +9584,7 @@ dependencies = [
"settings",
"shellexpand 2.1.2",
"smol",
"telemetry_events",
"toml 0.8.19",
"util",
"worktree",
@ -15092,6 +15096,7 @@ dependencies = [
"project",
"project_panel",
"project_symbols",
"proto",
"quick_action_bar",
"recent_projects",
"release_channel",

View file

@ -217,6 +217,7 @@ pub(crate) type KeystrokeObserver =
type QuitHandler = Box<dyn FnOnce(&mut AppContext) -> LocalBoxFuture<'static, ()> + 'static>;
type ReleaseListener = Box<dyn FnOnce(&mut dyn Any, &mut AppContext) + 'static>;
type NewViewListener = Box<dyn FnMut(AnyView, &mut WindowContext) + 'static>;
type NewModelListener = Box<dyn FnMut(AnyModel, &mut AppContext) + 'static>;
/// Contains the state of the full application, and passed as a reference to a variety of callbacks.
/// Other contexts such as [ModelContext], [WindowContext], and [ViewContext] deref to this type, making it the most general context type.
@ -237,6 +238,7 @@ pub struct AppContext {
http_client: Arc<dyn HttpClient>,
pub(crate) globals_by_type: FxHashMap<TypeId, Box<dyn Any>>,
pub(crate) entities: EntityMap,
pub(crate) new_model_observers: SubscriberSet<TypeId, NewModelListener>,
pub(crate) new_view_observers: SubscriberSet<TypeId, NewViewListener>,
pub(crate) windows: SlotMap<WindowId, Option<Window>>,
pub(crate) window_handles: FxHashMap<WindowId, AnyWindowHandle>,
@ -296,6 +298,7 @@ impl AppContext {
globals_by_type: FxHashMap::default(),
entities,
new_view_observers: SubscriberSet::new(),
new_model_observers: SubscriberSet::new(),
window_handles: FxHashMap::default(),
windows: SlotMap::with_key(),
keymap: Rc::new(RefCell::new(Keymap::default())),
@ -1016,6 +1019,7 @@ impl AppContext {
activate();
subscription
}
/// Arrange for the given function to be invoked whenever a view of the specified type is created.
/// The function will be passed a mutable reference to the view along with an appropriate context.
pub fn observe_new_views<V: 'static>(
@ -1035,6 +1039,31 @@ impl AppContext {
)
}
pub(crate) fn new_model_observer(&self, key: TypeId, value: NewModelListener) -> Subscription {
let (subscription, activate) = self.new_model_observers.insert(key, value);
activate();
subscription
}
/// Arrange for the given function to be invoked whenever a view of the specified type is created.
/// The function will be passed a mutable reference to the view along with an appropriate context.
pub fn observe_new_models<T: 'static>(
&self,
on_new: impl 'static + Fn(&mut T, &mut ModelContext<T>),
) -> Subscription {
self.new_model_observer(
TypeId::of::<T>(),
Box::new(move |any_model: AnyModel, cx: &mut AppContext| {
any_model
.downcast::<T>()
.unwrap()
.update(cx, |model_state, cx| {
on_new(model_state, cx);
})
}),
)
}
/// Observe the release of a model or view. The callback is invoked after the model or view
/// has no more strong references but before it has been dropped.
pub fn observe_release<E, T>(
@ -1346,8 +1375,21 @@ impl Context for AppContext {
) -> Model<T> {
self.update(|cx| {
let slot = cx.entities.reserve();
let model = slot.clone();
let entity = build_model(&mut ModelContext::new(cx, slot.downgrade()));
cx.entities.insert(slot, entity)
cx.entities.insert(slot, entity);
// Non-generic part to avoid leaking SubscriberSet to invokers of `new_view`.
fn notify_observers(cx: &mut AppContext, tid: TypeId, model: AnyModel) {
cx.new_model_observers.clone().retain(&tid, |observer| {
let any_model = model.clone();
(observer)(any_model, cx);
true
});
}
notify_observers(cx, TypeId::of::<T>(), AnyModel::from(model.clone()));
model
})
}

View file

@ -289,7 +289,10 @@ message Envelope {
ActiveToolchainResponse active_toolchain_response = 277;
GetPathMetadata get_path_metadata = 278;
GetPathMetadataResponse get_path_metadata_response = 279; // current max
GetPathMetadataResponse get_path_metadata_response = 279;
GetPanicFiles get_panic_files = 280;
GetPanicFilesResponse get_panic_files_response = 281; // current max
}
reserved 87 to 88;
@ -2489,5 +2492,11 @@ message UpdateGitBranch {
uint64 project_id = 1;
string branch_name = 2;
ProjectPath repository = 3;
}
message GetPanicFiles {
}
message GetPanicFilesResponse {
repeated string file_contents = 2;
}

View file

@ -363,7 +363,9 @@ messages!(
(ActiveToolchain, Foreground),
(ActiveToolchainResponse, Foreground),
(GetPathMetadata, Background),
(GetPathMetadataResponse, Background)
(GetPathMetadataResponse, Background),
(GetPanicFiles, Background),
(GetPanicFilesResponse, Background),
);
request_messages!(
@ -483,7 +485,8 @@ request_messages!(
(ListToolchains, ListToolchainsResponse),
(ActivateToolchain, Ack),
(ActiveToolchain, ActiveToolchainResponse),
(GetPathMetadata, GetPathMetadataResponse)
(GetPathMetadata, GetPathMetadataResponse),
(GetPanicFiles, GetPanicFilesResponse)
);
entity_messages!(
@ -566,7 +569,7 @@ entity_messages!(
ListToolchains,
ActivateToolchain,
ActiveToolchain,
GetPathMetadata
GetPathMetadata,
);
entity_messages!(

View file

@ -22,9 +22,10 @@ debug-embed = ["dep:rust-embed"]
test-support = ["fs/test-support"]
[dependencies]
async-watch.workspace = true
anyhow.workspace = true
async-watch.workspace = true
backtrace = "0.3"
chrono.workspace = true
clap.workspace = true
client.workspace = true
env_logger.workspace = true
@ -39,8 +40,10 @@ languages.workspace = true
log.workspace = true
lsp.workspace = true
node_runtime.workspace = true
project.workspace = true
paths = { workspace = true }
project.workspace = true
proto.workspace = true
release_channel.workspace = true
remote.workspace = true
reqwest_client.workspace = true
rpc.workspace = true
@ -50,6 +53,7 @@ serde_json.workspace = true
settings.workspace = true
shellexpand.workspace = true
smol.workspace = true
telemetry_events.workspace = true
util.workspace = true
worktree.workspace = true

View file

@ -1,12 +1,13 @@
use crate::headless_project::HeadlessAppState;
use crate::HeadlessProject;
use anyhow::{anyhow, Context, Result};
use client::ProxySettings;
use chrono::Utc;
use client::{telemetry, ProxySettings};
use fs::{Fs, RealFs};
use futures::channel::mpsc;
use futures::{select, select_biased, AsyncRead, AsyncWrite, AsyncWriteExt, FutureExt, SinkExt};
use git::GitHostingProviderRegistry;
use gpui::{AppContext, Context as _, ModelContext, UpdateGlobal as _};
use gpui::{AppContext, Context as _, Model, ModelContext, UpdateGlobal as _};
use http_client::{read_proxy_from_env, Uri};
use language::LanguageRegistry;
use node_runtime::{NodeBinaryOptions, NodeRuntime};
@ -21,19 +22,23 @@ use remote::{
};
use reqwest_client::ReqwestClient;
use rpc::proto::{self, Envelope, SSH_PROJECT_ID};
use rpc::{AnyProtoClient, TypedEnvelope};
use settings::{watch_config_file, Settings, SettingsStore};
use smol::channel::{Receiver, Sender};
use smol::io::AsyncReadExt;
use smol::Async;
use smol::{net::unix::UnixListener, stream::StreamExt as _};
use std::ffi::OsStr;
use std::ops::ControlFlow;
use std::{env, thread};
use std::{
io::Write,
mem,
path::{Path, PathBuf},
sync::Arc,
};
use telemetry_events::LocationData;
use util::ResultExt;
fn init_logging_proxy() {
@ -131,16 +136,97 @@ fn init_panic_hook() {
backtrace.drain(0..=ix);
}
let thread = thread::current();
let thread_name = thread.name().unwrap_or("<unnamed>");
log::error!(
"panic occurred: {}\nBacktrace:\n{}",
payload,
backtrace.join("\n")
&payload,
(&backtrace).join("\n")
);
let panic_data = telemetry_events::Panic {
thread: thread_name.into(),
payload: payload.clone(),
location_data: info.location().map(|location| LocationData {
file: location.file().into(),
line: location.line(),
}),
app_version: format!(
"remote-server-{}",
option_env!("ZED_COMMIT_SHA").unwrap_or(&env!("ZED_PKG_VERSION"))
),
release_channel: release_channel::RELEASE_CHANNEL.display_name().into(),
os_name: telemetry::os_name(),
os_version: Some(telemetry::os_version()),
architecture: env::consts::ARCH.into(),
panicked_on: Utc::now().timestamp_millis(),
backtrace,
system_id: None, // Set on SSH client
installation_id: None, // Set on SSH client
session_id: "".to_string(), // Set on SSH client
};
if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() {
let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string();
let panic_file_path = paths::logs_dir().join(format!("zed-{timestamp}.panic"));
let panic_file = std::fs::OpenOptions::new()
.append(true)
.create(true)
.open(&panic_file_path)
.log_err();
if let Some(mut panic_file) = panic_file {
writeln!(&mut panic_file, "{panic_data_json}").log_err();
panic_file.flush().log_err();
}
}
std::process::abort();
}));
}
fn handle_panic_requests(project: &Model<HeadlessProject>, client: &Arc<ChannelClient>) {
let client: AnyProtoClient = client.clone().into();
client.add_request_handler(
project.downgrade(),
|_, _: TypedEnvelope<proto::GetPanicFiles>, _cx| async move {
let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
let mut panic_files = Vec::new();
while let Some(child) = children.next().await {
let child = child?;
let child_path = child.path();
if child_path.extension() != Some(OsStr::new("panic")) {
continue;
}
let filename = if let Some(filename) = child_path.file_name() {
filename.to_string_lossy()
} else {
continue;
};
if !filename.starts_with("zed") {
continue;
}
let file_contents = smol::fs::read_to_string(&child_path)
.await
.context("error reading panic file")?;
panic_files.push(file_contents);
// We've done what we can, delete the file
std::fs::remove_file(child_path)
.context("error removing panic")
.log_err();
}
anyhow::Ok(proto::GetPanicFilesResponse {
file_contents: panic_files,
})
},
);
}
struct ServerListeners {
stdin: UnixListener,
stdout: UnixListener,
@ -368,7 +454,7 @@ pub fn execute_run(
HeadlessProject::new(
HeadlessAppState {
session,
session: session.clone(),
fs,
http_client,
node_runtime,
@ -378,6 +464,8 @@ pub fn execute_run(
)
});
handle_panic_requests(&project, &session);
mem::forget(project);
});
log::info!("gpui app is shut down. quitting.");

View file

@ -222,13 +222,13 @@ pub struct HangReport {
pub installation_id: Option<String>,
}
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct LocationData {
pub file: String,
pub line: u32,
}
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Panic {
/// The name of the thread that panicked
pub thread: String,

View file

@ -77,6 +77,7 @@ profiling.workspace = true
project.workspace = true
project_panel.workspace = true
project_symbols.workspace = true
proto.workspace = true
quick_action_bar.workspace = true
recent_projects.workspace = true
release_channel.workspace = true

View file

@ -329,7 +329,7 @@ fn main() {
telemetry.start(
system_id.as_ref().map(|id| id.to_string()),
installation_id.as_ref().map(|id| id.to_string()),
session_id,
session_id.clone(),
cx,
);
@ -365,7 +365,9 @@ fn main() {
auto_update::init(client.http_client(), cx);
reliability::init(
client.http_client(),
system_id.as_ref().map(|id| id.to_string()),
installation_id.clone().map(|id| id.to_string()),
session_id.clone(),
cx,
);

View file

@ -1,13 +1,14 @@
use anyhow::{Context, Result};
use backtrace::{self, Backtrace};
use chrono::Utc;
use client::telemetry;
use client::{telemetry, TelemetrySettings};
use db::kvp::KEY_VALUE_STORE;
use gpui::{AppContext, SemanticVersion};
use http_client::{HttpRequestExt, Method};
use http_client::{self, HttpClient, HttpClientWithUrl};
use paths::{crashes_dir, crashes_retired_dir};
use project::Project;
use release_channel::ReleaseChannel;
use release_channel::RELEASE_CHANNEL;
use settings::Settings;
@ -21,6 +22,7 @@ use std::{io::Write, panic, sync::atomic::AtomicU32, thread};
use telemetry_events::LocationData;
use telemetry_events::Panic;
use telemetry_events::PanicRequest;
use url::Url;
use util::ResultExt;
use crate::stdout_is_a_pty;
@ -133,13 +135,73 @@ pub fn init_panic_hook(
pub fn init(
http_client: Arc<HttpClientWithUrl>,
system_id: Option<String>,
installation_id: Option<String>,
session_id: String,
cx: &mut AppContext,
) {
#[cfg(target_os = "macos")]
monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
upload_panics_and_crashes(http_client, installation_id, cx)
let Some(panic_report_url) = http_client
.build_zed_api_url("/telemetry/panics", &[])
.log_err()
else {
return;
};
upload_panics_and_crashes(
http_client.clone(),
panic_report_url.clone(),
installation_id.clone(),
cx,
);
cx.observe_new_models(move |project: &mut Project, cx| {
let http_client = http_client.clone();
let panic_report_url = panic_report_url.clone();
let session_id = session_id.clone();
let installation_id = installation_id.clone();
let system_id = system_id.clone();
if let Some(ssh_client) = project.ssh_client() {
ssh_client.update(cx, |client, cx| {
if TelemetrySettings::get_global(cx).diagnostics {
let request = client.proto_client().request(proto::GetPanicFiles {});
cx.background_executor()
.spawn(async move {
let panic_files = request.await?;
for file in panic_files.file_contents {
let panic: Option<Panic> = serde_json::from_str(&file)
.log_err()
.or_else(|| {
file.lines()
.next()
.and_then(|line| serde_json::from_str(line).ok())
})
.unwrap_or_else(|| {
log::error!("failed to deserialize panic file {:?}", file);
None
});
if let Some(mut panic) = panic {
panic.session_id = session_id.clone();
panic.system_id = system_id.clone();
panic.installation_id = installation_id.clone();
upload_panic(&http_client, &panic_report_url, panic, &mut None)
.await?;
}
}
anyhow::Ok(())
})
.detach_and_log_err(cx);
}
})
}
})
.detach();
}
#[cfg(target_os = "macos")]
@ -346,16 +408,18 @@ pub fn monitor_main_thread_hangs(
fn upload_panics_and_crashes(
http: Arc<HttpClientWithUrl>,
panic_report_url: Url,
installation_id: Option<String>,
cx: &AppContext,
) {
let telemetry_settings = *client::TelemetrySettings::get_global(cx);
cx.background_executor()
.spawn(async move {
let most_recent_panic = upload_previous_panics(http.clone(), telemetry_settings)
.await
.log_err()
.flatten();
let most_recent_panic =
upload_previous_panics(http.clone(), &panic_report_url, telemetry_settings)
.await
.log_err()
.flatten();
upload_previous_crashes(http, most_recent_panic, installation_id, telemetry_settings)
.await
.log_err()
@ -366,9 +430,9 @@ fn upload_panics_and_crashes(
/// Uploads panics via `zed.dev`.
async fn upload_previous_panics(
http: Arc<HttpClientWithUrl>,
panic_report_url: &Url,
telemetry_settings: client::TelemetrySettings,
) -> Result<Option<(i64, String)>> {
let panic_report_url = http.build_zed_api_url("/telemetry/panics", &[])?;
) -> anyhow::Result<Option<(i64, String)>> {
let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
let mut most_recent_panic = None;
@ -396,7 +460,7 @@ async fn upload_previous_panics(
.context("error reading panic file")?;
let panic: Option<Panic> = serde_json::from_str(&panic_file_content)
.ok()
.log_err()
.or_else(|| {
panic_file_content
.lines()
@ -409,26 +473,8 @@ async fn upload_previous_panics(
});
if let Some(panic) = panic {
most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap();
let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
if !upload_panic(&http, &panic_report_url, panic, &mut most_recent_panic).await? {
continue;
};
let Ok(request) = http_client::Request::builder()
.method(Method::POST)
.uri(panic_report_url.as_ref())
.header("x-zed-checksum", checksum)
.body(json_bytes.into())
else {
continue;
};
let response = http.send(request).await.context("error sending panic")?;
if !response.status().is_success() {
log::error!("Error uploading panic to server: {}", response.status());
}
}
}
@ -438,9 +484,42 @@ async fn upload_previous_panics(
.context("error removing panic")
.log_err();
}
Ok::<_, anyhow::Error>(most_recent_panic)
Ok(most_recent_panic)
}
async fn upload_panic(
http: &Arc<HttpClientWithUrl>,
panic_report_url: &Url,
panic: telemetry_events::Panic,
most_recent_panic: &mut Option<(i64, String)>,
) -> Result<bool> {
*most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
let json_bytes = serde_json::to_vec(&PanicRequest {
panic: panic.clone(),
})
.unwrap();
let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
return Ok(false);
};
let Ok(request) = http_client::Request::builder()
.method(Method::POST)
.uri(panic_report_url.as_ref())
.header("x-zed-checksum", checksum)
.body(json_bytes.into())
else {
return Ok(false);
};
let response = http.send(request).await.context("error sending panic")?;
if !response.status().is_success() {
log::error!("Error uploading panic to server: {}", response.status());
}
Ok(true)
}
const LAST_CRASH_UPLOADED: &str = "LAST_CRASH_UPLOADED";
/// upload crashes from apple's diagnostic reports to our server.