1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// Copyright (c) The Diem Core Contributors
// SPDX-License-Identifier: Apache-2.0

use crate::{prelude::*, LintContext};
use std::str;

/// Represents a linter that checks file contents.
pub trait ContentLinter: Linter {
    /// Pre-run step -- avoids loading the contents if possible.
    ///
    /// The default implementation returns `Ok(RunStatus::Executed)`; individual lints may configure
    /// a more restricted set.
    fn pre_run<'l>(&self, _file_ctx: &FilePathContext<'l>) -> Result<RunStatus<'l>> {
        Ok(RunStatus::Executed)
    }

    /// Executes the lint against the given content context.
    fn run<'l>(
        &self,
        ctx: &ContentContext<'l>,
        out: &mut LintFormatter<'l, '_>,
    ) -> Result<RunStatus<'l>>;
}

#[derive(Debug)]
pub struct ContentContext<'l> {
    file_ctx: FilePathContext<'l>,
    content: Content,
}

#[allow(dead_code)]
impl<'l> ContentContext<'l> {
    /// The number of bytes that will be searched for null bytes in a file to figure out if it is
    /// binary.
    ///
    /// The value is [the same as Git's](https://stackoverflow.com/a/6134127).
    pub const BINARY_FILE_CUTOFF: usize = 8000;

    pub(super) fn new(file_ctx: FilePathContext<'l>, content: Vec<u8>) -> Self {
        Self {
            file_ctx,
            content: Content::new(content),
        }
    }

    /// Returns the file context.
    pub fn file_ctx(&self) -> &FilePathContext<'l> {
        &self.file_ctx
    }

    /// Returns the content, or `None` if this is a non-UTF-8 file.
    pub fn content(&self) -> Option<&str> {
        match &self.content {
            Content::Utf8(text) => Some(text.as_ref()),
            Content::NonUtf8(_) => None,
        }
    }

    /// Returns the raw bytes for the content.
    pub fn content_bytes(&self) -> &[u8] {
        match &self.content {
            Content::Utf8(text) => text.as_bytes(),
            Content::NonUtf8(bin) => bin.as_ref(),
        }
    }

    /// Returns true if this is a binary file.
    pub fn is_binary(&self) -> bool {
        match &self.content {
            Content::Utf8(_) => {
                // UTF-8 files are not binary by definition.
                false
            }
            Content::NonUtf8(bin) => bin[..Self::BINARY_FILE_CUTOFF].contains(&0),
        }
    }
}

impl<'l> LintContext<'l> for ContentContext<'l> {
    fn kind(&self) -> LintKind<'l> {
        LintKind::Content(self.file_ctx.file_path())
    }
}

#[derive(Debug)]
enum Content {
    Utf8(Box<str>),
    NonUtf8(Box<[u8]>),
}

impl Content {
    fn new(bytes: Vec<u8>) -> Self {
        match String::from_utf8(bytes) {
            Ok(s) => Content::Utf8(s.into()),
            Err(err) => Content::NonUtf8(err.into_bytes().into()),
        }
    }

    #[allow(dead_code)]
    fn len(&self) -> usize {
        match self {
            Content::Utf8(text) => text.len(),
            Content::NonUtf8(bin) => bin.len(),
        }
    }
}