Skip to content

Commit 634102f

Browse files
authored
🐛 update Use this model snippets for PaddleOCR models (#1645)
Hi HuggingFace team, Thank you very much for supporting the PaddleOCR snippets. However, the code required for PaddleOCR models differs depending on the specific task. To address this, I have updated the model tags to include task information. The snippets will also need to be updated accordingly. Please review this PR, and feel free to let me know if any modifications are needed. Thank you for your attention and support.
1 parent 60e4f27 commit 634102f

File tree

1 file changed

+48
-5
lines changed

1 file changed

+48
-5
lines changed

packages/tasks/src/model-libraries-snippets.ts

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,16 +1018,59 @@ export const paddlenlp = (model: ModelData): string[] => {
10181018
}
10191019
};
10201020

1021-
export const paddleocr = (model: ModelData): string[] => [
1022-
`# pip install paddleocr
1023-
from paddleocr import TextDetection
1024-
model = TextDetection(model_name="${model.id}")
1021+
export const paddleocr = (model: ModelData): string[] => {
1022+
const mapping: Record<string, { className: string }> = {
1023+
textline_detection: { className: "TextDetection" },
1024+
textline_recognition: { className: "TextRecognition" },
1025+
seal_text_detection: { className: "SealTextDetection" },
1026+
doc_img_unwarping: { className: "TextImageUnwarping" },
1027+
doc_img_orientation_classification: { className: "DocImgOrientationClassification" },
1028+
textline_orientation_classification: { className: "TextLineOrientationClassification" },
1029+
chart_parsing: { className: "ChartParsing" },
1030+
formula_recognition: { className: "FormulaRecognition" },
1031+
layout_detection: { className: "LayoutDetection" },
1032+
table_cells_detection: { className: "TableCellsDetection" },
1033+
wired_table_classification: { className: "TableClassification" },
1034+
table_structure_recognition: { className: "TableStructureRecognition" },
1035+
};
1036+
1037+
if (model.tags.includes("doc_vlm")) {
1038+
return [
1039+
`# pip install paddleocr
1040+
from paddleocr import DocVLM
1041+
model = DocVLM(model_name="${model.id}")
1042+
output = model.predict(
1043+
input={"image": "path/to/image.png", "query": "Parsing this image and output the content in Markdown format."},
1044+
batch_size=1
1045+
)
1046+
for res in output:
1047+
res.print()
1048+
res.save_to_img(save_path="./output/")
1049+
res.save_to_json(save_path="./output/res.json")`,
1050+
];
1051+
}
1052+
1053+
for (const tag of model.tags) {
1054+
if (tag in mapping) {
1055+
const { className } = mapping[tag];
1056+
return [
1057+
`# pip install paddleocr
1058+
from paddleocr import ${className}
1059+
model = ${className}(model_name="${model.id}")
10251060
output = model.predict(input="path/to/image.png", batch_size=1)
10261061
for res in output:
10271062
res.print()
10281063
res.save_to_img(save_path="./output/")
10291064
res.save_to_json(save_path="./output/res.json")`,
1030-
];
1065+
];
1066+
}
1067+
}
1068+
1069+
return [
1070+
`# Please refer to the document for information on how to use the model.
1071+
# https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/module_usage/module_overview.html`,
1072+
];
1073+
};
10311074

10321075
export const perception_encoder = (model: ModelData): string[] => {
10331076
const clip_model = `# Use PE-Core models as CLIP models

0 commit comments

Comments
 (0)