numerous-sunset-21589
01/17/2024, 2:01 AMnumerous-actor-35946
01/17/2024, 3:33 AMnumerous-sunset-21589
01/17/2024, 4:02 AM@workflow()
def crack_documents(filters: List[str], limit: Optional[int] = None) -> str:
doc_ids = filter_documents(filters=filters, limit=limit)
page_images = map_task(extract_page_images, min_success_ratio=0)(doc_id=doc_ids)
thumbs = map_task(extract_thumbnails, min_success_ratio=0)(doc_id=doc_ids)
insets = map_task(extract_insets, min_success_ratio=0)(doc_id=doc_ids)
ocr_google = map_task(extract_ocr_google, min_success_ratio=0)(doc_id=doc_ids)
ocr_msft = map_task(extract_ocr_msft, min_success_ratio=0)(doc_id=doc_ids)
captions = map_task(caption_images, min_success_ratio=0)(doc_id=doc_ids)
text_embeddings = map_task(embed_text, min_success_ratio=0)(doc_id=doc_ids)
page_images >> thumbs
page_images >> insets
thumbs >> ocr_google
thumbs >> ocr_msft
ocr_msft >> text_embeddings
ocr_google >> text_embeddings
insets >> captions
captions >> text_embeddings
return "SUCCESS"
numerous-sunset-21589
01/17/2024, 4:03 AMnumerous-actor-35946
01/17/2024, 4:56 AMnumerous-actor-35946
01/17/2024, 4:57 AMnumerous-actor-35946
01/17/2024, 4:58 AMnumerous-sunset-21589
01/17/2024, 12:14 PMnumerous-sunset-21589
01/17/2024, 12:15 PMhallowed-mouse-14616
01/17/2024, 3:18 PM