Coverage for apps/inners/use_cases/document_converters/libre_office_document

1import subprocess

2from pathlib import Path

3from uuid import UUID

5import pdfkit

6from starlette.datastructures import State

8from apps.inners.exceptions import use_case_exception

9from apps.inners.models.daos.document import Document

10from apps.inners.models.daos.file_document import FileDocument

11from apps.inners.models.daos.text_document import TextDocument

12from apps.inners.models.daos.web_document import WebDocument

13from apps.inners.models.dtos.constants.document_type_constant import DocumentTypeConstant

14from apps.outers.repositories.document_repository import DocumentRepository

15from apps.outers.repositories.file_document_repository import FileDocumentRepository

16from apps.outers.repositories.text_document_repository import TextDocumentRepository

17from apps.outers.repositories.web_document_repository import WebDocumentRepository

20class LibreOfficeDocumentConverter:

21 def __init__(

22 self,

23 document_repository: DocumentRepository,

24 file_document_repository: FileDocumentRepository,

25 text_document_repository: TextDocumentRepository,

26 web_document_repository: WebDocumentRepository,

27 ):

28 self.document_repository = document_repository

29 self.file_document_repository = file_document_repository

30 self.text_document_repository = text_document_repository

31 self.web_document_repository = web_document_repository

32 self.file_path: Path = self.file_document_repository.file_path / "libre_office_converted_documents"

33 self.file_path.mkdir(exist_ok=True)

34 self.pdf_options = {

35 'page-size': 'Letter',

36 'margin-top': '0.25in',

37 'margin-right': '1.00in',

38 'margin-bottom': '0.25in',

39 'margin-left': '1.00in',

40 }

42 async def convert_from_data(self, input_file_data: bytes, input_format: str, output_format: str) -> bytes:

43 id: UUID = UUID(int=0)

44 input_file_path: Path = self.file_path / f"{id}.{input_format}"

45 self.file_document_repository.save_file(

46 relative_file_path=Path(input_file_path.parent.name) / input_file_path.name,

47 file_data=input_file_data

48 )

49 command: str = f"libreoffice --headless --convert-to {output_format} --outdir {self.file_path} {self.file_path / input_file_path.name}"

50 subprocess.run(command, shell=True)

51 output_file_path: Path = self.file_path / f"{id}.{output_format}"

52 output_file_data: bytes = self.file_document_repository.read_file_data(

53 relative_file_path=Path(output_file_path.parent.name) / output_file_path.name

54 )

55 self.file_document_repository.remove_file(

56 relative_file_path=Path(input_file_path.parent.name) / input_file_path.name

57 )

58 self.file_document_repository.remove_file(

59 relative_file_path=Path(output_file_path.parent.name) / output_file_path.name

60 )

62 return output_file_data

64 async def convert_from_document_id(self, state: State, document_id: UUID, output_format: str) -> bytes:

65 found_document: Document = await self.document_repository.find_one_by_id_and_accound_id(

66 session=state.session,

67 id=document_id,

68 account_id=state.authorized_session.account_id

69 )

70 if found_document.document_type_id == DocumentTypeConstant.FILE:

71 found_file_document: FileDocument = await self.file_document_repository.find_one_by_id_and_account_id(

72 session=state.session,

73 id=document_id,

74 account_id=state.authorized_session.account_id

75 )

76 input_file_path: Path = Path(self.file_path.name) / found_file_document.file_name

77 file_data: bytes = self.file_document_repository.get_object_data(

78 object_name=found_file_document.file_name

79 )

80 self.file_document_repository.save_file(

81 relative_file_path=input_file_path,

82 file_data=file_data

83 )

84 elif found_document.document_type_id == DocumentTypeConstant.TEXT:

85 found_text_document: TextDocument = await self.text_document_repository.find_one_by_id_and_account_id(

86 session=state.session,

87 id=document_id,

88 account_id=state.authorized_session.account_id

89 )

90 input_file_path: Path = Path(self.file_path.name) / f"{document_id}.txt"

91 self.file_document_repository.save_file(

92 relative_file_path=input_file_path,

93 file_data=found_text_document.text_content.encode()

94 )

95 elif found_document.document_type_id == DocumentTypeConstant.WEB:

96 found_web_document: WebDocument = await self.web_document_repository.find_one_by_id_and_account_id(

97 session=state.session,

98 id=document_id,

99 account_id=state.authorized_session.account_id

100 )

101 input_file_path: Path = Path(self.file_path.name) / f"{document_id}.pdf"

102 self.file_document_repository.save_file(

103 relative_file_path=input_file_path,

104 file_data=pdfkit.from_url(found_web_document.web_url, False, options=self.pdf_options)

105 )

106 else:

107 raise use_case_exception.DocumentTypeNotSupported()

108

109 if input_file_path.suffix[1:] == output_format:

110 output_file_data: bytes = self.file_document_repository.read_file_data(

111 relative_file_path=input_file_path

112 )

113 self.file_document_repository.remove_file(

114 relative_file_path=input_file_path

115 )

116

117 return output_file_data

118

119 command: str = f"libreoffice --headless --convert-to {output_format} --outdir {self.file_path} {self.file_path / input_file_path.name}"

120 subprocess.run(command, shell=True)

121 output_file_path: Path = Path(self.file_path.name) / f"{input_file_path.stem}.{output_format}"

122 output_file_data: bytes = self.file_document_repository.read_file_data(

123 relative_file_path=output_file_path

124 )

125 self.file_document_repository.remove_file(

126 relative_file_path=input_file_path

127 )

128 self.file_document_repository.remove_file(

129 relative_file_path=output_file_path

130 )

131

132 return output_file_data

Coverage for apps/inners/use_cases/document_converters/libre_office_document_converter.py: 44%

62 statements