xcode 在哪里可以找到 tesseract->setvariable 函数的第一个参数的可用属性名称列表?
声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow
原文地址: http://stackoverflow.com/questions/13087252/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me):
StackOverFlow
Where I can find the list of available property name for tesseract->setvariable function's first parameter?
提问by The iOSDev
From the lots of goggling I am able to find only few of them as the below example for tesseract's setVariable(1st param, 2nd param)
从大量的护目镜中,我只能找到其中的几个作为 tesseract 的 setVariable(1st param, 2nd param) 的以下示例
tesseract->SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
tesseract->SetVariable("language_model_penalty_non_freq_dict_word", "0");
tesseract->SetVariable("language_model_penalty_non_dict_word", "0");
tesseract->SetVariable("tessedit_char_blacklist", "xyz");
tesseract->SetVariable("classify_bln_numeric_mode", "1");
I would like to know how much more of them are there?
我想知道他们还有多少?
Is there any list available? If any please give me the link to it.
有可用的列表吗?如果有请给我链接。
采纳答案by kineticfocus
This code should do the trick for iOS... might need a bit of tweaking.
这段代码应该对 iOS 有用……可能需要一些调整。
bool verboseOut = true;
if (verboseOut) { //Write to console current Settings
char buffer [256];
FILE * pFile = tmpfile();
tesseract->PrintVariables(pFile);
rewind(pFile);
while (!feof(pFile)) {
if (fgets (buffer,256,pFile))
fputs (buffer,stdout);
} fclose(pFile);
}
回答by roocell
For all you frustrated iOS coders out there. Found the list in the header tesseractclass.h here's the list
对于那些让 iOS 程序员感到沮丧的人。在标题 tesseractclass.h 中找到列表,这里是列表
BOOL_VAR_H(tessedit_resegment_from_boxes, false,
"Take segmentation and labeling from box file");
BOOL_VAR_H(tessedit_resegment_from_line_boxes, false,
"Conversion of word/line box file to char box file");
BOOL_VAR_H(tessedit_train_from_boxes, false,
"Generate training data from boxed chars");
BOOL_VAR_H(tessedit_make_boxes_from_boxes, false,
"Generate more boxes from boxed chars");
BOOL_VAR_H(tessedit_dump_pageseg_images, false,
"Dump intermediate images made during page segmentation");
INT_VAR_H(tessedit_pageseg_mode, PSM_SINGLE_BLOCK,
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
" 5=line, 6=word, 7=char"
" (Values from PageSegMode enum in publictypes.h)");
INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
"Which OCR engine(s) to run (Tesseract, Cube, both). Defaults"
" to loading and running only Tesseract (no Cube, no combiner)."
" (Values from OcrEngineMode enum in tesseractclass.h)");
STRING_VAR_H(tessedit_char_blacklist, "",
"Blacklist of chars not to recognize");
STRING_VAR_H(tessedit_char_whitelist, "",
"Whitelist of chars to recognize");
BOOL_VAR_H(tessedit_ambigs_training, false,
"Perform training for ambiguities");
INT_VAR_H(pageseg_devanagari_split_strategy,
tesseract::ShiroRekhaSplitter::NO_SPLIT,
"Whether to use the top-line splitting process for Devanagari "
"documents while performing page-segmentation.");
INT_VAR_H(ocr_devanagari_split_strategy,
tesseract::ShiroRekhaSplitter::NO_SPLIT,
"Whether to use the top-line splitting process for Devanagari "
"documents while performing ocr.");
STRING_VAR_H(tessedit_write_params_to_file, "",
"Write all parameters to the given file.");
BOOL_VAR_H(tessedit_adapt_to_char_fragments, true,
"Adapt to words that contain "
" a character composed form fragments");
BOOL_VAR_H(tessedit_adaption_debug, false,
"Generate and print debug information for adaption");
INT_VAR_H(bidi_debug, 0, "Debug level for BiDi");
INT_VAR_H(applybox_debug, 1, "Debug level");
INT_VAR_H(applybox_page, 0, "Page number to apply boxes from");
STRING_VAR_H(applybox_exposure_pattern, ".exp",
"Exposure value follows this pattern in the image"
" filename. The name of the image files are expected"
" to be in the form [lang].[fontname].exp[num].tif");
BOOL_VAR_H(applybox_learn_chars_and_char_frags_mode, false,
"Learn both character fragments (as is done in the"
" special low exposure mode) as well as unfragmented"
" characters.");
BOOL_VAR_H(applybox_learn_ngrams_mode, false,
"Each bounding box is assumed to contain ngrams. Only"
" learn the ngrams whose outlines overlap horizontally.");
BOOL_VAR_H(tessedit_display_outwords, false, "Draw output words");
BOOL_VAR_H(tessedit_training_tess, false, "Call Tess to learn blobs");
BOOL_VAR_H(tessedit_dump_choices, false, "Dump char choices");
BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,
"Try to improve fuzzy spaces");
BOOL_VAR_H(tessedit_unrej_any_wd, false,
"Dont bother with word plausibility");
BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?");
BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height");
BOOL_VAR_H(tessedit_enable_doc_dict, true,
"Add words to the document dictionary");
BOOL_VAR_H(tessedit_debug_fonts, false, "Output font info per char");
BOOL_VAR_H(tessedit_debug_block_rejection, false, "Block and Row stats");
BOOL_VAR_H(tessedit_enable_bigram_correction, false,
"Enable correction based on the word bigram dictionary.");
INT_VAR_H(tessedit_bigram_debug, 0, "Amount of debug output for bigram "
"correction.");
INT_VAR_H(debug_x_ht_level, 0, "Reestimate debug");
BOOL_VAR_H(debug_acceptable_wds, false, "Dump word pass/fail chk");
STRING_VAR_H(chs_leading_punct, "('`\"", "Leading punctuation");
STRING_VAR_H(chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation");
STRING_VAR_H(chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation");
double_VAR_H(quality_rej_pc, 0.08, "good_quality_doc lte rejection limit");
double_VAR_H(quality_blob_pc, 0.0, "good_quality_doc gte good blobs limit");
double_VAR_H(quality_outline_pc, 1.0,
"good_quality_doc lte outline error limit");
double_VAR_H(quality_char_pc, 0.95, "good_quality_doc gte good char limit");
INT_VAR_H(quality_min_initial_alphas_reqd, 2, "alphas in a good word");
BOOL_VAR_H(tessedit_tess_adapt_to_rejmap, false,
"Use reject map to control Tesseract adaption");
INT_VAR_H(tessedit_tess_adaption_mode, 0x27,
"Adaptation decision algorithm for tess");
BOOL_VAR_H(tessedit_minimal_rej_pass1, false,
"Do minimal rejection on pass 1 output");
BOOL_VAR_H(tessedit_test_adaption, false, "Test adaption criteria");
BOOL_VAR_H(tessedit_matcher_log, false, "Log matcher activity");
INT_VAR_H(tessedit_test_adaption_mode, 3,
"Adaptation decision algorithm for tess");
BOOL_VAR_H(save_blob_choices, false,
"Save the results of the recognition step"
" (blob_choices) within the corresponding WERD_CHOICE");
BOOL_VAR_H(test_pt, false, "Test for point");
double_VAR_H(test_pt_x, 99999.99, "xcoord");
double_VAR_H(test_pt_y, 99999.99, "ycoord");
INT_VAR_H(paragraph_debug_level, 0, "Print paragraph debug info.");
INT_VAR_H(cube_debug_level, 1, "Print cube debug info.");
STRING_VAR_H(outlines_odd, "%| ", "Non standard number of outlines");
STRING_VAR_H(outlines_2, "ij!?%\":;", "Non standard number of outlines");
BOOL_VAR_H(docqual_excuse_outline_errs, false,
"Allow outline errs in unrejection?");
BOOL_VAR_H(tessedit_good_quality_unrej, true,
"Reduce rejection on good docs");
BOOL_VAR_H(tessedit_use_reject_spaces, true, "Reject spaces?");
double_VAR_H(tessedit_reject_doc_percent, 65.00,
"%rej allowed before rej whole doc");
double_VAR_H(tessedit_reject_block_percent, 45.00,
"%rej allowed before rej whole block");
double_VAR_H(tessedit_reject_row_percent, 40.00,
"%rej allowed before rej whole row");
double_VAR_H(tessedit_whole_wd_rej_row_percent, 70.00,
"Number of row rejects in whole word rejects"
"which prevents whole row rejection");
BOOL_VAR_H(tessedit_preserve_blk_rej_perfect_wds, true,
"Only rej partially rejected words in block rejection");
BOOL_VAR_H(tessedit_preserve_row_rej_perfect_wds, true,
"Only rej partially rejected words in row rejection");
BOOL_VAR_H(tessedit_dont_blkrej_good_wds, false,
"Use word segmentation quality metric");
BOOL_VAR_H(tessedit_dont_rowrej_good_wds, false,
"Use word segmentation quality metric");
INT_VAR_H(tessedit_preserve_min_wd_len, 2,
"Only preserve wds longer than this");
BOOL_VAR_H(tessedit_row_rej_good_docs, true,
"Apply row rejection to good docs");
double_VAR_H(tessedit_good_doc_still_rowrej_wd, 1.1,
"rej good doc wd if more than this fraction rejected");
BOOL_VAR_H(tessedit_reject_bad_qual_wds, true,
"Reject all bad quality wds");
BOOL_VAR_H(tessedit_debug_doc_rejection, false, "Page stats");
BOOL_VAR_H(tessedit_debug_quality_metrics, false,
"Output data to debug file");
BOOL_VAR_H(bland_unrej, false, "unrej potential with no chekcs");
double_VAR_H(quality_rowrej_pc, 1.1,
"good_quality_doc gte good char limit");
BOOL_VAR_H(unlv_tilde_crunching, true,
"Mark v.bad words for tilde crunch");
BOOL_VAR_H(crunch_early_merge_tess_fails, true, "Before word crunch?");
BOOL_VAR_H(crunch_early_convert_bad_unlv_chs, false, "Take out ~^ early?");
double_VAR_H(crunch_terrible_rating, 80.0, "crunch rating lt this");
BOOL_VAR_H(crunch_terrible_garbage, true, "As it says");
double_VAR_H(crunch_poor_garbage_cert, -9.0,
"crunch garbage cert lt this");
double_VAR_H(crunch_poor_garbage_rate, 60, "crunch garbage rating lt this");
double_VAR_H(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this");
double_VAR_H(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this");
BOOL_VAR_H(crunch_pot_garbage, true, "POTENTIAL crunch garbage");
double_VAR_H(crunch_del_rating, 60, "POTENTIAL crunch rating lt this");
double_VAR_H(crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this");
double_VAR_H(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this");
double_VAR_H(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this");
double_VAR_H(crunch_del_min_width, 3.0, "Del if word width lt xht x this");
double_VAR_H(crunch_del_high_word, 1.5,
"Del if word gt xht x this above bl");
double_VAR_H(crunch_del_low_word, 0.5, "Del if word gt xht x this below bl");
double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this");
INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch");
INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed");
BOOL_VAR_H(crunch_leave_ok_strings, true, "Dont touch sensible strings");
BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring");
BOOL_VAR_H(crunch_leave_accept_strings, false,
"Dont pot crunch sensible strings");
BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures");
INT_VAR_H(crunch_leave_lc_strings, 4,
"Dont crunch words with long lower case strings");
INT_VAR_H(crunch_leave_uc_strings, 4,
"Dont crunch words with long lower case strings");
INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions");
INT_VAR_H(crunch_debug, 0, "As it says");
INT_VAR_H(fixsp_non_noise_limit, 1,
"How many non-noise blbs either side?");
double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this");
BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctation joins");
INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing");
INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug");
STRING_VAR_H(numeric_punctuation, ".,",
"Punct. chs expected WITHIN numbers");
INT_VAR_H(x_ht_acceptance_tolerance, 8,
"Max allowed deviation of blob top outside of font data");
INT_VAR_H(x_ht_min_change, 8, "Min change in xht before actually trying it");
BOOL_VAR_H(tessedit_write_block_separators, false,
"Write block separators in output");
BOOL_VAR_H(tessedit_write_rep_codes, false,
"Write repetition char code");
BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
STRING_VAR_H(unrecognised_char, "|",
"Output char for unidentified blobs");
INT_VAR_H(suspect_level, 99, "Suspect marker level");
INT_VAR_H(suspect_space_level, 100,
"Min suspect level for rejecting spaces");
INT_VAR_H(suspect_short_words, 2,
"Dont Suspect dict wds longer than this");
BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
double_VAR_H(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit");
double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures");
BOOL_VAR_H(tessedit_zero_rejection, false, "Dont reject ANYTHING");
BOOL_VAR_H(tessedit_word_for_word, false,
"Make output have exactly one word per WERD");
BOOL_VAR_H(tessedit_zero_kelvin_rejection, false,
"Dont reject ANYTHING AT ALL");
BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same");
INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm");
INT_VAR_H(tessedit_ok_mode, 5, "Acceptance decision algorithm");
BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug");
BOOL_VAR_H(tessedit_flip_0O, true, "Contextual 0O O0 flips");
double_VAR_H(tessedit_lower_flip_hyphen, 1.5,
"Aspect ratio dot/hyphen test");
double_VAR_H(tessedit_upper_flip_hyphen, 1.8,
"Aspect ratio dot/hyphen test");
BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector");
BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test");
BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Dont double check");
BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control");
BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control");
BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control");
BOOL_VAR_H(rej_use_sensible_wd, false, "Extend permuter check");
BOOL_VAR_H(rej_alphas_in_number_perm, false, "Extend permuter check");
double_VAR_H(rej_whole_of_mostly_reject_word_fract, 0.85, "if >this fract");
INT_VAR_H(tessedit_image_border, 2, "Rej blbs near image edge limit");
STRING_VAR_H(ok_repeated_ch_non_alphanum_wds, "-?*5",
"Allow NN to unrej");
STRING_VAR_H(conflict_set_I_l_1, "Il1[]", "Il1 conflict set");
INT_VAR_H(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this");
BOOL_VAR_H(tessedit_create_boxfile, false, "Output text with boxes");
INT_VAR_H(tessedit_page_number, -1,
"-1 -> All pages, else specifc page to process");
BOOL_VAR_H(tessedit_write_images, false, "Capture the image from the IPE");
BOOL_VAR_H(interactive_display_mode, false, "Run interactively?");
STRING_VAR_H(file_type, ".tif", "Filename extension");
BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word");
INT_VAR_H(tessdata_manager_debug_level, 0,
"Debug level for TessdataManager functions.");
STRING_VAR_H(tessedit_load_sublangs, "",
"List of languages to load with this one");
// Min acceptable orientation margin (difference in scores between top and 2nd
// choice in OSResults::orientations) to believe the page orientation.
double_VAR_H(min_orientation_margin, 7.0,
"Min acceptable orientation margin");
BOOL_VAR_H(textord_tabfind_show_vlines, false, "Debug line finding");
BOOL_VAR_H(textord_use_cjk_fp_model, FALSE, "Use CJK fixed pitch model");
BOOL_VAR_H(tessedit_init_config_only, false,
"Only initialize with the config file. Useful if the instance is "
"not going to be used for OCR but say only for layout analysis.");
BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");
回答by nguyenq
There are approximately 660 variables. You can obtain the complete list by executing the PrintVariablesToFile API function.
大约有 660 个变量。您可以通过执行 PrintVariablesToFile API 函数获取完整列表。
textord_debug_tabfind 0
textord_debug_bugs 0
textord_testregion_left -1
textord_testregion_top -1
textord_testregion_right 2147483647
textord_testregion_bottom 2147483647
textord_tabfind_show_partitions 0
....