llava_onevision_moviechat performance

I ran the code from the lmms-eval repository (https://github.com/EvolvingLMMs-Lab/lmms-eval) after making the following corrections:
	1.	Imported IGNORE_INDEX from llava.constants in llava_qwen.py.
	2.	Pasted the generate_moviechat() function in llava_qwen.py from this repository.
	3.	Modified LLAVA_NeXT.llava to llava in llava_onevision_moviechat.py.
	4.	Added the generate_until_multi_round() function in llava_onevision_moviechat.py to address the error indicating that this abstract method was not implemented. While I declared it, I did not actually use this function.

After these adjustments, I tested the llava_onevision_moviechat model using the following script:

```
python3 -m accelerate.commands.launch \
    --num_processes=8 \
    -m lmms_eval \
    --model llava_onevision_moviechat \
    --tasks moviechat_global \
    --batch_size 1 \
    --log_samples \
    --log_samples_suffix llava_onevision_moviechat \
    --output_path ./logs/
```

However, the accuracy I achieved was only 39% as below. Could you kindly help me identify what might have gone wrong?

{
  "results": {
    "moviechat_global": {
      "alias": "moviechat_global",
      "gpt_eval_score,none": 2.7583781547372777,
      "gpt_eval_score_stderr,none": "N/A",
      "gpt_eval_acc,none": 0.39098055440628876,
      "gpt_eval_acc_stderr,none": "N/A"
    }
  },
  "group_subtasks": {
    "moviechat_global": []
  },
  "configs": {
    "moviechat_global": {
      "task": "moviechat_global",
      "dataset_path": "Enxin/lmms_MovieChat_test",
      "dataset_kwargs": {
        "token": true
      },
      "test_split": "test",
      "full_docs": false,
      "process_results_use_image": false,
      "doc_to_visual": "<function moviechat_doc_to_visual at 0x7f8ab8418af0>",
      "doc_to_text": "<function moviechat_doc_to_text at 0x7f8ab8431670>",
      "doc_to_target": "<function moviechat_doc_to_answer at 0x7f8ab8431f70>",
      "process_results": "<function moviechat_process_results_generic at 0x7f8ab843baf0>",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "gpt_eval_score",
          "aggregation": "<function moviechat_aggregate_score at 0x7f8ab8442430>",
          "higher_is_better": true
        },
        {
          "metric": "gpt_eval_acc",
          "aggregation": "<function moviechat_aggregate_acc at 0x7f8ab8442d30>",
          "higher_is_better": true
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n"
        ],
        "do_sample": false
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 0.0,
        "gpt_eval_model_name": "gpt-3.5-turbo-0125"
      },
      "lmms_eval_specific_kwargs": {
        "default": {
          "pre_prompt": "You are able to understand the visual content that the user provides.Follow the instructions carefully and explain your answers in detail.",
          "post_prompt": ""
        },
        "pre_prompt": "You are able to understand the visual content that the user provides.Follow the instructions carefully and explain your answers in detail.",
        "post_prompt": ""
      }
    }
  },
  "versions": {
    "moviechat_global": 0.0
  },
  "n-shot": {
    "moviechat_global": 0
  },
  "higher_is_better": {
    "moviechat_global": {
      "gpt_eval_score": true,
      "gpt_eval_acc": true
    }
  },
  "n-samples": {
    "moviechat_global": {
      "original": 2417,
      "effective": 2417
    }
  },
  "config": {
    "model": "llava_onevision_moviechat",
    "model_args": "",
    "batch_size": "1",
    "batch_sizes": [],
    "device": null,
    "use_cache": null,
    "limit": null,
    "bootstrap_iters": 100000,
    "gen_kwargs": "",
    "random_seed": 0,
    "numpy_seed": 1234,
    "torch_seed": 1234,
    "fewshot_seed": 1234
  },
  "git_hash": "d2056e6",
  "date": "20241127_134436",
  "task_hashes": {
    "moviechat_global": "51d9d796ea5bc78838d989e9f7802a1cdf68efab207ef3ef66d59a9993836fef"
  },
  "model_source": "llava_onevision_moviechat",
  "model_name": "",
  "model_name_sanitized": "",
  "system_instruction": null,
  "system_instruction_sha": null,
  "fewshot_as_multiturn": false,
  "chat_template": null,
  "chat_template_sha": null,
  "start_time": 2483129.288830893,
  "end_time": 2503653.673547,
  "total_evaluation_time_seconds": "20524.384716107044"
} 


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

llava_onevision_moviechat performance #86

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

llava_onevision_moviechat performance #86

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions