Coverage for apps/inners/use_cases/utilities/search_statistic.py: 12%

40 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-22 19:03 +0000

1from typing import List, Dict 

2 

3 

4class SearchStatistic: 

5 def get_document_indexes_with_overlapped_scores(self, result_windowed_documents: List) -> Dict[int, Dict]: 

6 result_document_indexes_with_overlapped_scores: dict = {} 

7 for windowed_document in result_windowed_documents: 

8 windowed_document_source_indexes: List[int] = [ 

9 windowed_document.meta["index_window"] + i for i in range(windowed_document.meta["window_size"])] 

10 

11 for windowed_document_source_index in windowed_document_source_indexes: 

12 if result_document_indexes_with_overlapped_scores.get(windowed_document_source_index, None) is None: 

13 result_document_indexes_with_overlapped_scores[windowed_document_source_index] = { 

14 "count": 1, 

15 "score_mean": windowed_document.score / 1 

16 } 

17 else: 

18 old_count = result_document_indexes_with_overlapped_scores[ 

19 windowed_document_source_index]["count"] 

20 new_count = old_count + 1 

21 result_document_indexes_with_overlapped_scores[ 

22 windowed_document_source_index]["count"] = new_count 

23 new_value = windowed_document.score / 1 

24 old_score_mean = result_document_indexes_with_overlapped_scores[ 

25 windowed_document_source_index]["score_mean"] 

26 new_score_mean = old_score_mean + ((new_value - old_score_mean) / new_count) 

27 result_document_indexes_with_overlapped_scores[ 

28 windowed_document_source_index]["score_mean"] = new_score_mean 

29 

30 return result_document_indexes_with_overlapped_scores 

31 

32 def get_selected_labels(self, document_indexes_with_overlapped_scores: Dict[int, Dict], top_k: float) -> List[str]: 

33 items = document_indexes_with_overlapped_scores.items() 

34 selected_labels = [] 

35 max_selection = top_k 

36 count_selection = 0 

37 for index, stats in sorted(items, key=lambda item: item[1]["score_mean"], reverse=True): 

38 if count_selection >= max_selection: 

39 break 

40 score = f"{stats['score_mean']: .4f}" 

41 selected_labels.append(score) 

42 count_selection += 1 

43 

44 return selected_labels 

45 

46 def get_selected_documents(self, document_indexes_with_overlapped_scores: Dict[int, Dict], top_k: float, 

47 source_documents: List[str]) -> List[str]: 

48 items = document_indexes_with_overlapped_scores.items() 

49 selected_documents = [] 

50 max_selection = top_k 

51 count_selection = 0 

52 for index, stats in sorted(items, key=lambda item: item[1]["score_mean"], reverse=True): 

53 if count_selection >= max_selection: 

54 break 

55 selected_documents.append(source_documents[index]) 

56 count_selection += 1 

57 

58 return selected_documents