Coverage for apps/inners/use_cases/utilities/search_statistic.py: 12%
40 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-22 19:03 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-22 19:03 +0000
1from typing import List, Dict
4class SearchStatistic:
5 def get_document_indexes_with_overlapped_scores(self, result_windowed_documents: List) -> Dict[int, Dict]:
6 result_document_indexes_with_overlapped_scores: dict = {}
7 for windowed_document in result_windowed_documents:
8 windowed_document_source_indexes: List[int] = [
9 windowed_document.meta["index_window"] + i for i in range(windowed_document.meta["window_size"])]
11 for windowed_document_source_index in windowed_document_source_indexes:
12 if result_document_indexes_with_overlapped_scores.get(windowed_document_source_index, None) is None:
13 result_document_indexes_with_overlapped_scores[windowed_document_source_index] = {
14 "count": 1,
15 "score_mean": windowed_document.score / 1
16 }
17 else:
18 old_count = result_document_indexes_with_overlapped_scores[
19 windowed_document_source_index]["count"]
20 new_count = old_count + 1
21 result_document_indexes_with_overlapped_scores[
22 windowed_document_source_index]["count"] = new_count
23 new_value = windowed_document.score / 1
24 old_score_mean = result_document_indexes_with_overlapped_scores[
25 windowed_document_source_index]["score_mean"]
26 new_score_mean = old_score_mean + ((new_value - old_score_mean) / new_count)
27 result_document_indexes_with_overlapped_scores[
28 windowed_document_source_index]["score_mean"] = new_score_mean
30 return result_document_indexes_with_overlapped_scores
32 def get_selected_labels(self, document_indexes_with_overlapped_scores: Dict[int, Dict], top_k: float) -> List[str]:
33 items = document_indexes_with_overlapped_scores.items()
34 selected_labels = []
35 max_selection = top_k
36 count_selection = 0
37 for index, stats in sorted(items, key=lambda item: item[1]["score_mean"], reverse=True):
38 if count_selection >= max_selection:
39 break
40 score = f"{stats['score_mean']: .4f}"
41 selected_labels.append(score)
42 count_selection += 1
44 return selected_labels
46 def get_selected_documents(self, document_indexes_with_overlapped_scores: Dict[int, Dict], top_k: float,
47 source_documents: List[str]) -> List[str]:
48 items = document_indexes_with_overlapped_scores.items()
49 selected_documents = []
50 max_selection = top_k
51 count_selection = 0
52 for index, stats in sorted(items, key=lambda item: item[1]["score_mean"], reverse=True):
53 if count_selection >= max_selection:
54 break
55 selected_documents.append(source_documents[index])
56 count_selection += 1
58 return selected_documents