Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
2ce2399
docs(pypi): Improve README display and badge reliability
aksg87 Jul 22, 2025
4fe7580
feat: add trusted publishing workflow and prepare v1.0.0 release
aksg87 Jul 22, 2025
e696a48
Fix: Resolve libmagic ImportError (#6)
aksg87 Aug 1, 2025
5447637
docs: clarify output_dir behavior in medication_examples.md
kleeena Aug 1, 2025
9c47b34
Merge pull request #11 from google/fix/libmagic-dependency-issue
aksg87 Aug 1, 2025
175e075
Removed inline comment in medication example
kleeena Aug 2, 2025
9472099
Merge pull request #15 from kleeena/docs/update-medication_examples.md
aksg87 Aug 2, 2025
e6c3dcd
docs: add output_dir="." to all save_annotated_documents examples
aksg87 Aug 2, 2025
1fb1f1d
Merge pull request #17 from google/fix/output-dir-consistency
aksg87 Aug 2, 2025
13fbd2c
build: add formatting & linting pipeline with pre-commit integration
aksg87 Aug 3, 2025
c8d2027
style: apply pyink, isort, and pre-commit formatting
aksg87 Aug 3, 2025
146a095
ci: enable format and lint checks in tox
aksg87 Aug 3, 2025
aa6da18
Merge pull request #24 from google/feat/code-formatting-pipeline
aksg87 Aug 3, 2025
ed65bca
Add LangExtractError base exception for centralized error handling
aksg87 Aug 3, 2025
6c4508b
Merge pull request #26 from google/feat/exception-hierarchy
aksg87 Aug 3, 2025
8b85225
fix: Remove LangFun and pylibmagic dependencies (v1.0.2)
aksg87 Aug 3, 2025
88520cc
Merge pull request #28 from google/fix/remove-breaking-dep-langfun
aksg87 Aug 3, 2025
75a6f12
Fix save_annotated_documents to handle string paths
aksg87 Aug 3, 2025
a415b94
Merge pull request #29 from google/fix-save-annotated-documents-mkdir
aksg87 Aug 3, 2025
8289b3a
feat: Add OpenAI language model support
aksg87 Aug 3, 2025
c8ef723
Merge pull request #31 from google/feature/add-oai-inference
aksg87 Aug 3, 2025
dfe8188
fix(ui): prevent current highlight border from being obscured. Chan…
tonebeta Aug 4, 2025
87c511e
feat: Add live API integration tests (#39)
aksg87 Aug 4, 2025
dc61372
Add PR template validation workflow (#45)
aksg87 Aug 4, 2025
da771e6
fix: Change OllamaLanguageModel parameter from 'model' to 'model_id' …
aksg87 Aug 5, 2025
e83d5cf
feat: Add CITATION.cff file for proper software citation
aksg87 Aug 5, 2025
337beee
feat: Add Ollama integration with Docker examples and CI tests (#62)
aksg87 Aug 5, 2025
a7ef0bd
chore: Bump version to 1.0.4 for release
aksg87 Aug 5, 2025
87beb4f
build(deps): bump tj-actions/changed-files (#66)
dependabot[bot] Aug 5, 2025
db140d1
Add PR validation workflows and update contribution guidelines (#74)
aksg87 Aug 5, 2025
ed97f73
Fix custom comment in linked issue check (#77)
aksg87 Aug 5, 2025
ad1f27b
Add infrastructure file protection workflow (#76)
aksg87 Aug 5, 2025
41bc9ed
Allow maintainers to bypass community support requirement
aksg87 Aug 5, 2025
54e57db
Add manual trigger capability to validation workflows (#75)
aksg87 Aug 5, 2025
25ebc17
Fix fork PR labeling by using pull_request_target
aksg87 Aug 5, 2025
b0d7ebb
Add Gemini Vertex AI integration with thinking budget support
NewcomerAI Aug 6, 2025
8069650
Fix code formatting and linting issues
NewcomerAI Aug 6, 2025
1290d63
Add workflow_dispatch trigger to CI workflow
aksg87 Aug 6, 2025
dd1654c
Merge branch 'main' into feature/vertex-ai-integration
NewcomerAI Aug 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ contact_links:
url: https://g.co/vulnz
about: >
To report a security issue, please use https://g.co/vulnz. The Google Security Team will
respond within 5 working days of your report on https://g.co/vulnz.
respond within 5 working days of your report on https://g.co/vulnz.
100 changes: 100 additions & 0 deletions .github/workflows/check-infrastructure-changes.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
name: Protect Infrastructure Files

on:
pull_request_target:
types: [opened, synchronize, reopened]
workflow_dispatch:

permissions:
contents: read
pull-requests: write

jobs:
protect-infrastructure:
if: github.event_name == 'workflow_dispatch' || github.event.pull_request.draft == false
runs-on: ubuntu-latest

steps:
- name: Check for infrastructure file changes
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Get the PR author and check if they're a maintainer
const prAuthor = context.payload.pull_request.user.login;
const { data: authorPermission } = await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: prAuthor
});

const isMaintainer = ['admin', 'maintain'].includes(authorPermission.permission);

// Get list of files changed in the PR
const { data: files } = await github.rest.pulls.listFiles({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: context.payload.pull_request.number
});

// Check for infrastructure file changes
const infrastructureFiles = files.filter(file =>
file.filename.startsWith('.github/') ||
file.filename === 'pyproject.toml' ||
file.filename === 'tox.ini' ||
file.filename === '.pre-commit-config.yaml' ||
file.filename === '.pylintrc' ||
file.filename === 'Dockerfile' ||
file.filename === 'autoformat.sh' ||
file.filename === '.gitignore' ||
file.filename === 'CONTRIBUTING.md' ||
file.filename === 'LICENSE' ||
file.filename === 'CITATION.cff'
);

if (infrastructureFiles.length > 0 && !isMaintainer) {
// Check if changes are only formatting/whitespace
let hasStructuralChanges = false;
for (const file of infrastructureFiles) {
const additions = file.additions || 0;
const deletions = file.deletions || 0;
const changes = file.changes || 0;

// If file has significant changes (not just whitespace), consider it structural
if (additions > 5 || deletions > 5 || changes > 10) {
hasStructuralChanges = true;
break;
}
}

const fileList = infrastructureFiles.map(f => ` - ${f.filename} (${f.changes} changes)`).join('\n');

// Post a comment explaining the issue
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.pull_request.number,
body: `❌ **Infrastructure File Protection**\n\n` +
`This PR modifies protected infrastructure files:\n\n${fileList}\n\n` +
`Only repository maintainers are allowed to modify infrastructure files (including \`.github/\`, build configuration, and repository documentation).\n\n` +
`**Note**: If these are only formatting changes, please:\n` +
`1. Revert changes to \`.github/\` files\n` +
`2. Use \`./autoformat.sh\` to format only source code directories\n` +
`3. Avoid running formatters on infrastructure files\n\n` +
`If structural changes are necessary:\n` +
`1. Open an issue describing the needed infrastructure changes\n` +
`2. A maintainer will review and implement the changes if approved\n\n` +
`For more information, see our [Contributing Guidelines](https://github.com/google/langextract/blob/main/CONTRIBUTING.md).`
});

core.setFailed(
`This PR modifies ${infrastructureFiles.length} protected infrastructure file(s). ` +
`Only maintainers can modify these files. ` +
`Use ./autoformat.sh to format code without touching infrastructure.`
);
} else if (infrastructureFiles.length > 0 && isMaintainer) {
core.info(`PR modifies ${infrastructureFiles.length} infrastructure file(s) - allowed for maintainer ${prAuthor}`);
} else {
core.info('No infrastructure files modified');
}
90 changes: 90 additions & 0 deletions .github/workflows/check-linked-issue.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
name: Require linked issue with community support

on:
pull_request_target:
types: [opened, edited, synchronize, reopened]
workflow_dispatch:

permissions:
contents: read
issues: read
pull-requests: write

jobs:
enforce:
if: github.event_name == 'workflow_dispatch' || github.event.pull_request.draft == false
runs-on: ubuntu-latest

steps:
- name: Verify linked issue
if: github.event_name == 'pull_request'
uses: nearform-actions/github-action-check-linked-issues@v1.2.7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
comment: true
exclude-branches: main
custom-body: |
No linked issues found. Please add "Fixes #<issue-number>" to your pull request description.

Per our [Contributing Guidelines](https://github.com/google/langextract/blob/main/CONTRIBUTING.md#pull-request-guidelines), all PRs must:
- Reference an issue with "Fixes #123" or "Closes #123"
- The linked issue should have 5+ 👍 reactions
- Include discussion demonstrating the importance of the change

Use GitHub automation to close the issue when this PR is merged.

- name: Check community support
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Check if PR author is a maintainer
const prAuthor = context.payload.pull_request.user.login;
const { data: authorPermission } = await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: prAuthor
});

const isMaintainer = ['admin', 'maintain'].includes(authorPermission.permission);

const body = context.payload.pull_request.body || '';
const match = body.match(/(?:Fixes|Closes|Resolves)\s+#(\d+)/i);

if (!match) {
core.setFailed('No linked issue found');
return;
}

const issueNumber = Number(match[1]);
const { repository } = await github.graphql(`
query($owner: String!, $repo: String!, $number: Int!) {
repository(owner: $owner, name: $repo) {
issue(number: $number) {
reactionGroups {
content
users {
totalCount
}
}
}
}
}
`, {
owner: context.repo.owner,
repo: context.repo.repo,
number: issueNumber
});

const reactions = repository.issue.reactionGroups;
const thumbsUp = reactions.find(g => g.content === 'THUMBS_UP')?.users.totalCount || 0;

core.info(`Issue #${issueNumber} has ${thumbsUp} 👍 reactions`);

const REQUIRED_THUMBS_UP = 5;
if (thumbsUp < REQUIRED_THUMBS_UP && !isMaintainer) {
core.setFailed(`Issue #${issueNumber} needs at least ${REQUIRED_THUMBS_UP} 👍 reactions (currently has ${thumbsUp})`);
} else if (isMaintainer && thumbsUp < REQUIRED_THUMBS_UP) {
core.info(`Maintainer ${prAuthor} bypassing community support requirement (issue has ${thumbsUp} 👍 reactions)`);
}
44 changes: 44 additions & 0 deletions .github/workflows/check-pr-size.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: Check PR size

on:
pull_request_target:
types: [opened, synchronize, reopened]
workflow_dispatch:

permissions:
contents: read
pull-requests: write

jobs:
size:
runs-on: ubuntu-latest
steps:
- name: Evaluate PR size
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const pr = context.payload.pull_request;
const totalChanges = pr.additions + pr.deletions;

core.info(`PR contains ${pr.additions} additions and ${pr.deletions} deletions (${totalChanges} total)`);

const sizeLabel =
totalChanges < 50 ? 'size/XS' :
totalChanges < 150 ? 'size/S' :
totalChanges < 600 ? 'size/M' :
totalChanges < 1000 ? 'size/L' : 'size/XL';

await github.rest.issues.addLabels({
...context.repo,
issue_number: pr.number,
labels: [sizeLabel]
});

const MAX_LINES = 1000;
if (totalChanges > MAX_LINES) {
core.setFailed(
`This PR contains ${totalChanges} lines of changes, which exceeds the maximum of ${MAX_LINES} lines. ` +
`Please split this into smaller, focused pull requests.`
);
}
92 changes: 89 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
name: CI

on:
workflow_dispatch:
push:
branches: ["main"]
pull_request:
Expand All @@ -28,7 +29,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4

Expand All @@ -42,6 +43,91 @@ jobs:
python -m pip install --upgrade pip
pip install -e ".[dev,test]"

- name: Run tox (lint + tests)
- name: Run unit tests and linting
run: |
tox
PY_VERSION=$(echo "${{ matrix.python-version }}" | tr -d '.')
tox -e py${PY_VERSION},format,lint-src,lint-tests

live-api-tests:
needs: test
runs-on: ubuntu-latest
if: |
github.event_name == 'push' ||
(github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.full_name == github.repository)

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev,test]"

- name: Run live API tests
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
LANGEXTRACT_API_KEY: ${{ secrets.GEMINI_API_KEY }} # For backward compatibility
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
if [[ -z "$GEMINI_API_KEY" && -z "$OPENAI_API_KEY" ]]; then
echo "::notice::Live API tests skipped - no provider secrets configured"
exit 0
fi
tox -e live-api

ollama-integration-test:
needs: test
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'

steps:
- uses: actions/checkout@v4

- name: Detect file changes
id: changes
uses: tj-actions/changed-files@v46
with:
files: |
langextract/inference.py
examples/ollama/**
tests/test_ollama_integration.py
.github/workflows/ci.yaml

- name: Skip if no Ollama changes
if: steps.changes.outputs.any_changed == 'false'
run: |
echo "No Ollama-related changes detected – skipping job."
exit 0

- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Launch Ollama container
run: |
docker run -d --name ollama \
-p 127.0.0.1:11434:11434 \
-v ollama:/root/.ollama \
ollama/ollama:0.5.4
for i in {1..20}; do
curl -fs http://localhost:11434/api/version && break
sleep 3
done

- name: Pull gemma2 model
run: docker exec ollama ollama pull gemma2:2b || true

- name: Install tox
run: |
python -m pip install --upgrade pip
pip install tox

- name: Run Ollama integration tests
run: tox -e ollama-integration
Loading