Coverage for apps/inners/use_cases/document_converters/libre_office_document_converter.py: 44%
62 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-22 19:03 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-22 19:03 +0000
1import subprocess
2from pathlib import Path
3from uuid import UUID
5import pdfkit
6from starlette.datastructures import State
8from apps.inners.exceptions import use_case_exception
9from apps.inners.models.daos.document import Document
10from apps.inners.models.daos.file_document import FileDocument
11from apps.inners.models.daos.text_document import TextDocument
12from apps.inners.models.daos.web_document import WebDocument
13from apps.inners.models.dtos.constants.document_type_constant import DocumentTypeConstant
14from apps.outers.repositories.document_repository import DocumentRepository
15from apps.outers.repositories.file_document_repository import FileDocumentRepository
16from apps.outers.repositories.text_document_repository import TextDocumentRepository
17from apps.outers.repositories.web_document_repository import WebDocumentRepository
20class LibreOfficeDocumentConverter:
21 def __init__(
22 self,
23 document_repository: DocumentRepository,
24 file_document_repository: FileDocumentRepository,
25 text_document_repository: TextDocumentRepository,
26 web_document_repository: WebDocumentRepository,
27 ):
28 self.document_repository = document_repository
29 self.file_document_repository = file_document_repository
30 self.text_document_repository = text_document_repository
31 self.web_document_repository = web_document_repository
32 self.file_path: Path = self.file_document_repository.file_path / "libre_office_converted_documents"
33 self.file_path.mkdir(exist_ok=True)
34 self.pdf_options = {
35 'page-size': 'Letter',
36 'margin-top': '0.25in',
37 'margin-right': '1.00in',
38 'margin-bottom': '0.25in',
39 'margin-left': '1.00in',
40 }
42 async def convert_from_data(self, input_file_data: bytes, input_format: str, output_format: str) -> bytes:
43 id: UUID = UUID(int=0)
44 input_file_path: Path = self.file_path / f"{id}.{input_format}"
45 self.file_document_repository.save_file(
46 relative_file_path=Path(input_file_path.parent.name) / input_file_path.name,
47 file_data=input_file_data
48 )
49 command: str = f"libreoffice --headless --convert-to {output_format} --outdir {self.file_path} {self.file_path / input_file_path.name}"
50 subprocess.run(command, shell=True)
51 output_file_path: Path = self.file_path / f"{id}.{output_format}"
52 output_file_data: bytes = self.file_document_repository.read_file_data(
53 relative_file_path=Path(output_file_path.parent.name) / output_file_path.name
54 )
55 self.file_document_repository.remove_file(
56 relative_file_path=Path(input_file_path.parent.name) / input_file_path.name
57 )
58 self.file_document_repository.remove_file(
59 relative_file_path=Path(output_file_path.parent.name) / output_file_path.name
60 )
62 return output_file_data
64 async def convert_from_document_id(self, state: State, document_id: UUID, output_format: str) -> bytes:
65 found_document: Document = await self.document_repository.find_one_by_id_and_accound_id(
66 session=state.session,
67 id=document_id,
68 account_id=state.authorized_session.account_id
69 )
70 if found_document.document_type_id == DocumentTypeConstant.FILE:
71 found_file_document: FileDocument = await self.file_document_repository.find_one_by_id_and_account_id(
72 session=state.session,
73 id=document_id,
74 account_id=state.authorized_session.account_id
75 )
76 input_file_path: Path = Path(self.file_path.name) / found_file_document.file_name
77 file_data: bytes = self.file_document_repository.get_object_data(
78 object_name=found_file_document.file_name
79 )
80 self.file_document_repository.save_file(
81 relative_file_path=input_file_path,
82 file_data=file_data
83 )
84 elif found_document.document_type_id == DocumentTypeConstant.TEXT:
85 found_text_document: TextDocument = await self.text_document_repository.find_one_by_id_and_account_id(
86 session=state.session,
87 id=document_id,
88 account_id=state.authorized_session.account_id
89 )
90 input_file_path: Path = Path(self.file_path.name) / f"{document_id}.txt"
91 self.file_document_repository.save_file(
92 relative_file_path=input_file_path,
93 file_data=found_text_document.text_content.encode()
94 )
95 elif found_document.document_type_id == DocumentTypeConstant.WEB:
96 found_web_document: WebDocument = await self.web_document_repository.find_one_by_id_and_account_id(
97 session=state.session,
98 id=document_id,
99 account_id=state.authorized_session.account_id
100 )
101 input_file_path: Path = Path(self.file_path.name) / f"{document_id}.pdf"
102 self.file_document_repository.save_file(
103 relative_file_path=input_file_path,
104 file_data=pdfkit.from_url(found_web_document.web_url, False, options=self.pdf_options)
105 )
106 else:
107 raise use_case_exception.DocumentTypeNotSupported()
109 if input_file_path.suffix[1:] == output_format:
110 output_file_data: bytes = self.file_document_repository.read_file_data(
111 relative_file_path=input_file_path
112 )
113 self.file_document_repository.remove_file(
114 relative_file_path=input_file_path
115 )
117 return output_file_data
119 command: str = f"libreoffice --headless --convert-to {output_format} --outdir {self.file_path} {self.file_path / input_file_path.name}"
120 subprocess.run(command, shell=True)
121 output_file_path: Path = Path(self.file_path.name) / f"{input_file_path.stem}.{output_format}"
122 output_file_data: bytes = self.file_document_repository.read_file_data(
123 relative_file_path=output_file_path
124 )
125 self.file_document_repository.remove_file(
126 relative_file_path=input_file_path
127 )
128 self.file_document_repository.remove_file(
129 relative_file_path=output_file_path
130 )
132 return output_file_data