Add detected file language to code search (#10256)
Move langauge detection to separate module to be more reusable Add option to disable vendored file exclusion from file search Allways show all language stats for searchrelease
parent
efbd7ca39b
commit
3c45cf8494
@ -0,0 +1,36 @@
|
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package analyze
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/src-d/enry/v2"
|
||||
)
|
||||
|
||||
// GetCodeLanguageWithCallback detects code language based on file name and content using callback
|
||||
func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, error)) string {
|
||||
if language, ok := enry.GetLanguageByExtension(filename); ok {
|
||||
return language
|
||||
}
|
||||
|
||||
if language, ok := enry.GetLanguageByFilename(filename); ok {
|
||||
return language
|
||||
}
|
||||
|
||||
content, err := contentFunc()
|
||||
if err != nil {
|
||||
return enry.OtherLanguage
|
||||
}
|
||||
|
||||
return enry.GetLanguage(filepath.Base(filename), content)
|
||||
}
|
||||
|
||||
// GetCodeLanguage detects code language based on file name and content
|
||||
func GetCodeLanguage(filename string, content []byte) string {
|
||||
return GetCodeLanguageWithCallback(filename, func() ([]byte, error) {
|
||||
return content, nil
|
||||
})
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package keyword
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizer/single"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "keyword"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed(single.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package single
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "single"
|
||||
|
||||
type SingleTokenTokenizer struct {
|
||||
}
|
||||
|
||||
func NewSingleTokenTokenizer() *SingleTokenTokenizer {
|
||||
return &SingleTokenTokenizer{}
|
||||
}
|
||||
|
||||
func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
return analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: input,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: len(input),
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
return NewSingleTokenTokenizer(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
|
||||
}
|
Loading…
Reference in New Issue