Skip to content

修改自定义分词 #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed .DS_Store
Binary file not shown.
264 changes: 5 additions & 259 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,259 +1,5 @@

# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates

# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs

# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/

# Visual Studio 2015 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/

# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*

# NUNIT
*.VisualState.xml
TestResult.xml

# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c

# DNX
project.lock.json
project.fragment.lock.json
artifacts/

*_i.c
*_p.c
*_i.h
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc

# Chutzpah Test files
_Chutzpah*

# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb

# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap

# TFS 2012 Local Workspace
$tf/

# Guidance Automation Toolkit
*.gpState

# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user

# JustCode is a .NET coding add-in
.JustCode

# TeamCity is a build add-in
_TeamCity*

# DotCover is a Code Coverage Tool
*.dotCover

# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*

# MightyMoose
*.mm.*
AutoTest.Net/

# Web workbench (sass)
.sass-cache/

# Installshield output folder
[Ee]xpress/

# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html

# Click-Once directory
publish/

# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# TODO: Comment the next line if you want to checkin your web deploy settings
# but database connection strings (with potential passwords) will be unencrypted
#*.pubxml
*.publishproj

# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/

# NuGet Packages
*.nupkg
# The packages folder can be ignored because of Package Restore
**/packages/*
# except build/, which is used as an MSBuild target.
!**/packages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/packages/repositories.config
# NuGet v3's project.json files produces more ignoreable files
*.nuget.props
*.nuget.targets

# Microsoft Azure Build Output
csx/
*.build.csdef

# Microsoft Azure Emulator
ecf/
rcf/

# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt

# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/

# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
node_modules/
orleans.codegen.cs

# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/

# RIA/Silverlight projects
Generated_Code/

# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm

# SQL Server files
*.mdf
*.ldf

# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings

# Microsoft Fakes
FakesAssemblies/

# GhostDoc plugin setting file
*.GhostDoc.xml

# Node.js Tools for Visual Studio
.ntvs_analysis.dat

# Visual Studio 6 build log
*.plg

# Visual Studio 6 workspace options file
*.opt

# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions

# Paket dependency manager
.paket/paket.exe
paket-files/

# FAKE - F# Make
.fake/

# JetBrains Rider
.idea/
*.sln.iml

# CodeRush
.cr/

# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
/.vs
**/obj
**/bin
**/PublishProfiles
*.user
Binary file removed Analyser/.DS_Store
Binary file not shown.
17 changes: 8 additions & 9 deletions Analyser/Analyser.csproj
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<PackageId>Lucene.JIEba.Analyzer</PackageId>
<PackageVersion>1.0.0</PackageVersion>
<Authors>SilentCC</Authors>
<PackageId>Lucene.JIEba.Analyzer.NetCore</PackageId>
<PackageVersion>1.0.1</PackageVersion>
<Authors>shshshddy</Authors>
<Description>JIEba.Lucene.Net is an analyzer tools for lucene.net which is kind to chinese</Description>
<PackageRequireLicenseAcceptance>false</PackageRequireLicenseAcceptance>
<PackageProjectUrl>https://github.com/SilentCC/JIEba-netcore2.0/</PackageProjectUrl>
<Copyright>Copyright 2019 (c) AgileLabs. All rights reserved.</Copyright>
<PackageProjectUrl>https://github.com/shshshdy/JIEba-netcore</PackageProjectUrl>
<Copyright>Copyright 2020 (c) AgileLabs. All rights reserved.</Copyright>
<PackageTags>Analyzer Segment JIEba.net core2.0</PackageTags>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<Company>shshshddy</Company>
<Version>1.0.1</Version>
<PackageReleaseNotes>删除多余依赖</PackageReleaseNotes>
</PropertyGroup>

<ItemGroup>
Expand All @@ -24,10 +27,6 @@
</EmbeddedResource>
</ItemGroup>

<ItemGroup>
<PackageReference Include="Microsoft.Extensions.FileProviders.Embedded" Version="1.0.0" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Segmenter\Segmenter.csproj" />
</ItemGroup>
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ ps: 修改了JIEba分词,导致的高亮bug
TokenStream = analyzer.GetTokenStream(str,indexReader);

```
>[具体demo参考](https://gitee.com/shshshdy/net-core-tool)

# 相关文档
[JIEba.net 集成到Lucene.Net的过程](https://www.cnblogs.com/dacc123/p/8431369.html)
# 感谢原作

在[原作](https://github.com/SilentCC/JIEba-netcore2.0/)的基础上,修改了加载用户分词的BUG.
Binary file removed Segmenter/.DS_Store
Binary file not shown.
42 changes: 36 additions & 6 deletions Segmenter/Common/FileExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,24 @@ public static string ReadEmbeddedAllLine(string path,Encoding encoding)

public static List<string> ReadEmbeddedAllLines(string path, Encoding encoding)
{
var provider = new EmbeddedFileProvider(typeof(FileExtension).GetTypeInfo().Assembly);
var fileInfo = provider.GetFileInfo(path);
List<string> list = new List<string>();
using (StreamReader streamReader = new StreamReader(fileInfo.CreateReadStream(), encoding))
var assmbly = typeof(FileExtension).GetTypeInfo().Assembly;
return ReadEmbeddedAllLines(assmbly, path, encoding);
}

public static List<string> ReadEmbeddedAllLines(string path)
{
return ReadEmbeddedAllLines(path, Encoding.UTF8);
}

public static List<string> ReadAllLines(string path)
{
return ReadAllLines(path, Encoding.UTF8);
}

public static List<string> ReadAllLines(string path, Encoding encoding)
{
var list = new List<string>();
using (StreamReader streamReader = new StreamReader(path, encoding))
{
string item;
while ((item = streamReader.ReadLine()) != null)
Expand All @@ -40,9 +54,25 @@ public static List<string> ReadEmbeddedAllLines(string path, Encoding encoding)
return list;
}

public static List<string> ReadEmbeddedAllLines(string path)
public static List<string> ReadEmbeddedAllLines(Assembly assembly, string path)
{
return ReadEmbeddedAllLines(path, Encoding.UTF8);
return ReadEmbeddedAllLines(assembly, path, Encoding.UTF8);
}

public static List<string> ReadEmbeddedAllLines(Assembly assembly, string path, Encoding encoding)
{
var provider = new EmbeddedFileProvider(assembly);
var fileInfo = provider.GetFileInfo(path);
List<string> list = new List<string>();
using (StreamReader streamReader = new StreamReader(fileInfo.CreateReadStream(), encoding))
{
string item;
while ((item = streamReader.ReadLine()) != null)
{
list.Add(item);
}
}
return list;
}
}
}
Loading