Please obtain your token in the User Center. If task_name is not filled in, PdfParserDemo will be used as the default task_name .
Copy from undatasio.undatasio import UnDatasIO
token = 'Your API token'
task_name = 'your task name'
# 1. Initialize the UnDatasIO client
client = UnDatasIO(token=token, task_name=task_name)
Copy # 2. Upload files
upload_response = client.upload(file_dir_path='./example_files')
if upload_response.code == 200:
print("File upload successful!")
else:
print(f"File upload failed: {upload_response.msg}")
Copy # 3. View all uploaded files
upload_filename_response = client.show_upload()
if upload_filename_response.code == 200:
print(upload_filename_response.data)
else:
print(f"File upload failed: {upload_filename_response.msg}")
Copy # 4. Parse files
# All parameter:['fast', 'accurate']
# All language:{'English': 'en', '한국어': 'korean', '日本語': 'japan', '中文': 'ch',
# '繁體中文': 'chinese_cht', 'Deutsch': 'de', 'Español': 'es', 'Bahasa Indonesia': 'id',
# 'Français': 'fr', 'Português': 'pt', 'Italiano': 'it', 'Türkçe': 'tr',
# 'Polski': 'pl', 'Čeština': 'cs', 'Dansk': 'da', 'हिंदी': 'hi', 'தமிழ்': 'ta',
# 'తెలుగు': 'te', 'ಕನ್ನಡ': 'ka', 'Afrikaans': 'af', 'Azərbaycan': 'az',
# 'Bosanski': 'bs', 'Cymraeg': 'cy', 'Eesti': 'et', 'Gaeilge': 'ga',
# 'Hrvatski': 'hr', 'Magyar': 'hu', 'Íslenska': 'is', 'Kurdî': 'ku', 'Latina': 'la',
# 'Lietuvių': 'lt', 'Latviešu': 'lv', 'Māori': 'mi', 'Bahasa Melayu': 'ms',
# 'Malti': 'mt', 'Nederlands': 'nl', 'Norsk': 'no', 'Occitan': 'oc', 'पालि': 'pi',
# 'Română': 'ro', 'Srpski': 'rs_latin', 'Slovenčina': 'sk', 'Slovenščina': 'sl',
# 'Shqip': 'sq', 'Svenska': 'sv', 'Kiswahili': 'sw', 'Tagalog': 'tl', "O'zbek": 'uz',
# 'Tiếng Việt': 'vi'}
parse_response = client.parser(
file_name_list=['example_file1.pdf', 'example_file2.pdf'],
lang='en',
parameter='fast'
)
if parse_response.code == 200:
print("File parsing successful")
else:
print(f"File parsing request failed: {parse_response.msg}")
Copy # 5. View historical parsing results
parse_filename_response = client.show_version()
if parse_filename_response.code == 200:
print(parse_filename_response.data)
else:
print(f"File upload failed: {parse_filename_response.msg}")
Copy # 6. View parsing results (assuming you know the version number is 'v1' and want to get the title and table information in the parsing results)
# All types:['title', 'table', 'text', 'image', 'interline_equation']
results = client.get_result_type(type_info=['title', 'table'], file_name='example_file.pdf', version='v1')
if results.code == 200:
print(f"Parsing results: {results.data}")
else:
print(f"Failed to get parsing results: {results.msg}")