Coverage for apps/inners/use_cases/document_converters/libre_office_document_converter.py: 44%

62 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-22 19:03 +0000

1import subprocess 

2from pathlib import Path 

3from uuid import UUID 

4 

5import pdfkit 

6from starlette.datastructures import State 

7 

8from apps.inners.exceptions import use_case_exception 

9from apps.inners.models.daos.document import Document 

10from apps.inners.models.daos.file_document import FileDocument 

11from apps.inners.models.daos.text_document import TextDocument 

12from apps.inners.models.daos.web_document import WebDocument 

13from apps.inners.models.dtos.constants.document_type_constant import DocumentTypeConstant 

14from apps.outers.repositories.document_repository import DocumentRepository 

15from apps.outers.repositories.file_document_repository import FileDocumentRepository 

16from apps.outers.repositories.text_document_repository import TextDocumentRepository 

17from apps.outers.repositories.web_document_repository import WebDocumentRepository 

18 

19 

20class LibreOfficeDocumentConverter: 

21 def __init__( 

22 self, 

23 document_repository: DocumentRepository, 

24 file_document_repository: FileDocumentRepository, 

25 text_document_repository: TextDocumentRepository, 

26 web_document_repository: WebDocumentRepository, 

27 ): 

28 self.document_repository = document_repository 

29 self.file_document_repository = file_document_repository 

30 self.text_document_repository = text_document_repository 

31 self.web_document_repository = web_document_repository 

32 self.file_path: Path = self.file_document_repository.file_path / "libre_office_converted_documents" 

33 self.file_path.mkdir(exist_ok=True) 

34 self.pdf_options = { 

35 'page-size': 'Letter', 

36 'margin-top': '0.25in', 

37 'margin-right': '1.00in', 

38 'margin-bottom': '0.25in', 

39 'margin-left': '1.00in', 

40 } 

41 

42 async def convert_from_data(self, input_file_data: bytes, input_format: str, output_format: str) -> bytes: 

43 id: UUID = UUID(int=0) 

44 input_file_path: Path = self.file_path / f"{id}.{input_format}" 

45 self.file_document_repository.save_file( 

46 relative_file_path=Path(input_file_path.parent.name) / input_file_path.name, 

47 file_data=input_file_data 

48 ) 

49 command: str = f"libreoffice --headless --convert-to {output_format} --outdir {self.file_path} {self.file_path / input_file_path.name}" 

50 subprocess.run(command, shell=True) 

51 output_file_path: Path = self.file_path / f"{id}.{output_format}" 

52 output_file_data: bytes = self.file_document_repository.read_file_data( 

53 relative_file_path=Path(output_file_path.parent.name) / output_file_path.name 

54 ) 

55 self.file_document_repository.remove_file( 

56 relative_file_path=Path(input_file_path.parent.name) / input_file_path.name 

57 ) 

58 self.file_document_repository.remove_file( 

59 relative_file_path=Path(output_file_path.parent.name) / output_file_path.name 

60 ) 

61 

62 return output_file_data 

63 

64 async def convert_from_document_id(self, state: State, document_id: UUID, output_format: str) -> bytes: 

65 found_document: Document = await self.document_repository.find_one_by_id_and_accound_id( 

66 session=state.session, 

67 id=document_id, 

68 account_id=state.authorized_session.account_id 

69 ) 

70 if found_document.document_type_id == DocumentTypeConstant.FILE: 

71 found_file_document: FileDocument = await self.file_document_repository.find_one_by_id_and_account_id( 

72 session=state.session, 

73 id=document_id, 

74 account_id=state.authorized_session.account_id 

75 ) 

76 input_file_path: Path = Path(self.file_path.name) / found_file_document.file_name 

77 file_data: bytes = self.file_document_repository.get_object_data( 

78 object_name=found_file_document.file_name 

79 ) 

80 self.file_document_repository.save_file( 

81 relative_file_path=input_file_path, 

82 file_data=file_data 

83 ) 

84 elif found_document.document_type_id == DocumentTypeConstant.TEXT: 

85 found_text_document: TextDocument = await self.text_document_repository.find_one_by_id_and_account_id( 

86 session=state.session, 

87 id=document_id, 

88 account_id=state.authorized_session.account_id 

89 ) 

90 input_file_path: Path = Path(self.file_path.name) / f"{document_id}.txt" 

91 self.file_document_repository.save_file( 

92 relative_file_path=input_file_path, 

93 file_data=found_text_document.text_content.encode() 

94 ) 

95 elif found_document.document_type_id == DocumentTypeConstant.WEB: 

96 found_web_document: WebDocument = await self.web_document_repository.find_one_by_id_and_account_id( 

97 session=state.session, 

98 id=document_id, 

99 account_id=state.authorized_session.account_id 

100 ) 

101 input_file_path: Path = Path(self.file_path.name) / f"{document_id}.pdf" 

102 self.file_document_repository.save_file( 

103 relative_file_path=input_file_path, 

104 file_data=pdfkit.from_url(found_web_document.web_url, False, options=self.pdf_options) 

105 ) 

106 else: 

107 raise use_case_exception.DocumentTypeNotSupported() 

108 

109 if input_file_path.suffix[1:] == output_format: 

110 output_file_data: bytes = self.file_document_repository.read_file_data( 

111 relative_file_path=input_file_path 

112 ) 

113 self.file_document_repository.remove_file( 

114 relative_file_path=input_file_path 

115 ) 

116 

117 return output_file_data 

118 

119 command: str = f"libreoffice --headless --convert-to {output_format} --outdir {self.file_path} {self.file_path / input_file_path.name}" 

120 subprocess.run(command, shell=True) 

121 output_file_path: Path = Path(self.file_path.name) / f"{input_file_path.stem}.{output_format}" 

122 output_file_data: bytes = self.file_document_repository.read_file_data( 

123 relative_file_path=output_file_path 

124 ) 

125 self.file_document_repository.remove_file( 

126 relative_file_path=input_file_path 

127 ) 

128 self.file_document_repository.remove_file( 

129 relative_file_path=output_file_path 

130 ) 

131 

132 return output_file_data