Alex Lyashok
01/17/2024, 2:01 AMJay Ganbat
01/17/2024, 3:33 AMAlex Lyashok
01/17/2024, 4:02 AM@workflow()
def crack_documents(filters: List[str], limit: Optional[int] = None) -> str:
doc_ids = filter_documents(filters=filters, limit=limit)
page_images = map_task(extract_page_images, min_success_ratio=0)(doc_id=doc_ids)
thumbs = map_task(extract_thumbnails, min_success_ratio=0)(doc_id=doc_ids)
insets = map_task(extract_insets, min_success_ratio=0)(doc_id=doc_ids)
ocr_google = map_task(extract_ocr_google, min_success_ratio=0)(doc_id=doc_ids)
ocr_msft = map_task(extract_ocr_msft, min_success_ratio=0)(doc_id=doc_ids)
captions = map_task(caption_images, min_success_ratio=0)(doc_id=doc_ids)
text_embeddings = map_task(embed_text, min_success_ratio=0)(doc_id=doc_ids)
page_images >> thumbs
page_images >> insets
thumbs >> ocr_google
thumbs >> ocr_msft
ocr_msft >> text_embeddings
ocr_google >> text_embeddings
insets >> captions
captions >> text_embeddings
return "SUCCESS"
Alex Lyashok
01/17/2024, 4:03 AMJay Ganbat
01/17/2024, 4:56 AMJay Ganbat
01/17/2024, 4:57 AMJay Ganbat
01/17/2024, 4:58 AMAlex Lyashok
01/17/2024, 12:14 PMAlex Lyashok
01/17/2024, 12:15 PMDan Rammer (hamersaw)
01/17/2024, 3:18 PM