Skip to content

Commit 0e3dfdc

Browse files
authored
[WIP] Port hardsubx classifier (#1446)
* add tesseract-sys in dependencies of rust modules * add appropriate feature flags and required packages to cargo toml * expose classifier * Redefine structs that are required for hardsubx Note: rust-bindgen isn't being used directly for this because it will also redefine structures of leptonica, tesseract, and ffmpeg and we don't want that. We want to use definitions of structs as in the rust interfact libraries we are importing * write code to generate bindings for mprint * - write a function to convert rust strings to c strings - write a memory safe wrapper to mprint that uses above function * - add helper function to deal with tess strings in a memory safe manner - port get_ocr_text_simple - port get_ocr_text_wordwise * improve conversion of C string to Rust string by using built-in functions * replace mprint usage with warn! * port get_ocr_text_letterwise * remove redundant mprint function * improve readability _tess_string_helper by using more general variable names inside * make get_ocr_text_simple call get_ocr_text_simple_threshold to remove redundant codefix bugs * remove manual definition of cc_subtitle and use bindgen bindings * style changes to rust hardsubx classifier * add get_ocr_text_letterwise_threshold and make get_ocr_text_letterwise call it appropriately * move hardsubx context struct to mod.rs * add get_ocr_text_wordwise_threshold and make get_ocr_text_wordwise call it * use the ffmpeg-sys definition of Pix * hide ported functions under macros * use the AVPacket from bindings and not ffmpeg to make compatibility work for now. TODO: rewrite init_hardsubx and also deal with the ffmpeg stuff when that is done * improce _tess_string_helper by using appropriate built-in functions * linter recommended changes * clang style change * fix loop bug that didn't allow for re-evaluation of it on usage of continue statement * start porting of decoder with the _process_frame_color_basic function and related code * hide the C version of _process_frame_color_basic behind an #ifdef * add _process_frame_tickertext * hide the C version of _process_frame_tickertext behind ifdef and add #[no_mangle] to the rust version * check if word is empty as soon as word is detected * port _process_frame_white_basic * hide the C version _process_frame_white_basic behind compiler macros * stylistic changes * safety docs for hardsubx classifier * safety docs for decoder as of now * safe docs for utils.rs * style changes * format and style changes * modify safety docs * formatting fix
1 parent 4cb474c commit 0e3dfdc

File tree

10 files changed

+770
-15
lines changed

10 files changed

+770
-15
lines changed

src/lib_ccx/hardsubx_classifier.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#include "lib_ccx.h"
22
#include "utility.h"
33

4-
#ifdef ENABLE_HARDSUBX
5-
//TODO: Correct FFMpeg integration
4+
#if defined(ENABLE_HARDSUBX) && defined(DISABLE_RUST)
5+
// TODO: Correct FFMpeg integration
66
#include <libavcodec/avcodec.h>
77
#include <libavformat/avformat.h>
88
#include <libavutil/imgutils.h>

src/lib_ccx/hardsubx_decoder.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <tesseract/capi.h>
1212
#include "hardsubx.h"
1313

14+
#ifdef DISABLE_RUST
1415
char *_process_frame_white_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index)
1516
{
1617
// printf("frame : %04d\n", index);
@@ -77,21 +78,21 @@ char *_process_frame_white_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, i
7778
{
7879
case HARDSUBX_OCRMODE_WORD:
7980
if (ctx->conf_thresh > 0)
80-
subtitle_text = get_ocr_text_wordwise_threshold(ctx, lum_im, ctx->conf_thresh);
81+
subtitle_text = get_ocr_text_wordwise_threshold(ctx, feat_im, ctx->conf_thresh);
8182
else
82-
subtitle_text = get_ocr_text_wordwise(ctx, lum_im);
83+
subtitle_text = get_ocr_text_wordwise(ctx, feat_im);
8384
break;
8485
case HARDSUBX_OCRMODE_LETTER:
8586
if (ctx->conf_thresh > 0)
86-
subtitle_text = get_ocr_text_letterwise_threshold(ctx, lum_im, ctx->conf_thresh);
87+
subtitle_text = get_ocr_text_letterwise_threshold(ctx, feat_im, ctx->conf_thresh);
8788
else
88-
subtitle_text = get_ocr_text_letterwise(ctx, lum_im);
89+
subtitle_text = get_ocr_text_letterwise(ctx, feat_im);
8990
break;
9091
case HARDSUBX_OCRMODE_FRAME:
9192
if (ctx->conf_thresh > 0)
92-
subtitle_text = get_ocr_text_simple_threshold(ctx, lum_im, ctx->conf_thresh);
93+
subtitle_text = get_ocr_text_simple_threshold(ctx, feat_im, ctx->conf_thresh);
9394
else
94-
subtitle_text = get_ocr_text_simple(ctx, lum_im);
95+
subtitle_text = get_ocr_text_simple(ctx, feat_im);
9596
break;
9697
default:
9798
fatal(EXIT_MALFORMED_PARAMETER, "Invalid OCR Mode");
@@ -214,6 +215,7 @@ char *_process_frame_color_basic(struct lib_hardsubx_ctx *ctx, AVFrame *frame, i
214215

215216
return subtitle_text;
216217
}
218+
#endif
217219

218220
void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int timestamp)
219221
{
@@ -294,6 +296,7 @@ void _display_frame(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int
294296
pixDestroy(&feat_im);
295297
}
296298

299+
#ifdef DISABLE_RUST
297300
char *_process_frame_tickertext(struct lib_hardsubx_ctx *ctx, AVFrame *frame, int width, int height, int index)
298301
{
299302
PIX *im;
@@ -367,6 +370,7 @@ char *_process_frame_tickertext(struct lib_hardsubx_ctx *ctx, AVFrame *frame, in
367370

368371
return subtitle_text;
369372
}
373+
#endif
370374

371375
int hardsubx_process_frames_tickertext(struct lib_hardsubx_ctx *ctx, struct encoder_ctx *enc_ctx)
372376
{

src/rust/Cargo.lock

Lines changed: 56 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/rust/Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@ log = "0.4.0"
1515
env_logger = "0.8.4"
1616
iconv = "0.1.1"
1717
palette = "0.6.0"
18-
ffmpeg-sys-next = { version = "5.0.1", optional = true, default-features = false, features = ["avcodec", "build"]}
18+
ffmpeg-sys-next = { version = "5.0.1", optional = true, default-features = false, features = ["avcodec", "avformat", "swscale", "build"]}
19+
tesseract-sys = { version = "0.5.12", optional = true, default-features = false}
20+
leptonica-sys = { version = "0.4.1", optional = true, default-features = false}
1921

2022
[build-dependencies]
2123
bindgen = "0.58.1"
2224

2325
[features]
24-
hardsubx_ocr = ["ffmpeg-sys-next"]
26+
hardsubx_ocr = ["ffmpeg-sys-next", "tesseract-sys", "leptonica-sys"]
27+

src/rust/build.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,23 @@ fn main() {
1111
];
1212

1313
#[cfg(feature = "hardsubx_ocr")]
14-
allowlist_functions.extend_from_slice(&["edit_distance", "convert_pts_to_.*", "av_rescale_q"]);
14+
allowlist_functions.extend_from_slice(&[
15+
"edit_distance",
16+
"convert_pts_to_.*",
17+
"av_rescale_q",
18+
"mprint",
19+
]);
1520

16-
let mut allowlist_types = vec![".*(?i)_?dtvcc_.*", "encoder_ctx", "lib_cc_decode"];
21+
let mut allowlist_types = vec![
22+
".*(?i)_?dtvcc_.*",
23+
"encoder_ctx",
24+
"lib_cc_decode",
25+
"cc_subtitle",
26+
"ccx_output_format",
27+
];
1728

1829
#[cfg(feature = "hardsubx_ocr")]
19-
allowlist_types.extend_from_slice(&["AVRational"]);
30+
allowlist_types.extend_from_slice(&["AVRational", "AVPacket", "AVFrame"]);
2031

2132
let mut builder = bindgen::Builder::default()
2233
// The input header we would like to generate

0 commit comments

Comments
 (0)