diff --git a/crates/semantic_index/eval/gpt-engineer.json b/crates/semantic_index/eval/gpt-engineer.json index d7c08cd505..64322e8384 100644 --- a/crates/semantic_index/eval/gpt-engineer.json +++ b/crates/semantic_index/eval/gpt-engineer.json @@ -12,7 +12,7 @@ { "query": "What version of the openai package is active?", "matches": [ - "pyproject.toml:14" + "pyprojet.toml:14" ] }, { diff --git a/crates/semantic_index/eval/tree-sitter.json b/crates/semantic_index/eval/tree-sitter.json index 4f2edfb063..52d1e9df16 100644 --- a/crates/semantic_index/eval/tree-sitter.json +++ b/crates/semantic_index/eval/tree-sitter.json @@ -48,7 +48,7 @@ "query": "Handle conflict when numerous actions occur on the same symbol", "matches": [ "cli/src/generate/build_tables/build_parse_table.rs:363", - "cli/src/generate/build_tables/build_parse_table.rs:442", + "cli/src/generate/build_tables/build_parse_table.rs:442" ] }, { diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs index c3950757ce..f666f5c281 100644 --- a/crates/semantic_index/examples/eval.rs +++ b/crates/semantic_index/examples/eval.rs @@ -1,19 +1,36 @@ use git2::{Object, Oid, Repository}; +use semantic_index::SearchResult; use serde::Deserialize; use std::path::{Path, PathBuf}; use std::{env, fs}; #[derive(Deserialize, Clone)] -struct QueryMatches { +struct EvaluationQuery { query: String, matches: Vec, } +impl EvaluationQuery { + fn match_pairs(&self) -> Vec<(PathBuf, usize)> { + let mut pairs = Vec::new(); + for match_identifier in self.matches { + let match_parts = match_identifier.split(":"); + + if let Some(file_path) = match_parts.next() { + if let Some(row_number) = match_parts.next() { + pairs.push((PathBuf::from(file_path), from_str::(row_number))); + } + } + + pairs + } +} + #[derive(Deserialize, Clone)] struct RepoEval { repo: String, commit: String, - assertions: Vec, + assertions: Vec, } const TMP_REPO_PATH: &str = "./target/eval_repos"; @@ -77,7 +94,60 @@ fn clone_repo(repo_eval: RepoEval) -> anyhow::Result { Ok(clone_path) } +fn dcg(hits: Vec) -> f32 { + let mut result = 0.0; + for (idx, hit) in hits.iter().enumerate() { + result += *hit as f32 / (2.0 + idx as f32).log2(); + } + + println!("DCG: {:?}", result); + result +} + +fn evaluate_ndcg(eval_query: EvaluationQuery, search_results: Vec, k: usize) -> f32 { + + // NDCG or Normalized Discounted Cumulative Gain, is determined by comparing the relevance of + // items returned by the search engine relative to the hypothetical ideal. + // Relevance is represented as a series of booleans, in which each search result returned + // is identified as being inside the test set of matches (1) or not (0). + + // For example, if result 1, 3 and 5 match the 3 relevant results provided + // actual dcg is calculated against a vector of [1, 0, 1, 0, 1] + // whereas ideal dcg is calculated against a vector of [1, 1, 1, 0, 0] + // as this ideal vector assumes the 3 relevant results provided were returned first + // normalized dcg is then calculated as actual dcg / ideal dcg. + + // NDCG ranges from 0 to 1, which higher values indicating better performance + // Commonly NDCG is expressed as NDCG@k, in which k represents the metric calculated + // including only the top k values returned. + // The @k metrics can help you identify, at what point does the relevant results start to fall off. + // Ie. a NDCG@1 of 0.9 and a NDCG@3 of 0.5 may indicate that the first result returned in usually + // very high quality, whereas rank results quickly drop off after the first result. + + let ideal = vec![1; cmp::min(eval_query.matches.len(), k)]; + + return dcg(hits) / dcg(ideal); +} + +fn evaluate_map(eval_query: EvaluationQuery, search_results: Vec, k: usize) -> f32 { + +} + +fn evaluate_repo(repo_eval: RepoEval, clone_path: PathBuf) { + + // Launch new repo as a new Zed workspace/project + // Index the project + // Search each eval_query + // Calculate Statistics + +} + fn main() { + + // zed/main.rs + // creating an app and running it, gives you the context. + // create a project, find_or_create_local_worktree. + if let Ok(repo_evals) = parse_eval() { for repo in repo_evals { let cloned = clone_repo(repo.clone()); @@ -85,8 +155,12 @@ fn main() { Ok(clone_path) => { println!( "Cloned {:?} @ {:?} into {:?}", - repo.repo, repo.commit, clone_path + repo.repo, repo.commit, &clone_path ); + + // Evaluate Repo + evaluate_repo(repo, clone_path); + } Err(err) => { println!("Error Cloning: {:?}", err);