verilator/src/V3String.cpp
Geza Lore 60d5f0e86b
Emit model implementation as loose methods. (#3006)
This patch introduces the concept of 'loose' methods, which semantically
are methods, but are declared as global functions, and are passed an
explicit 'self' pointer. This enables these methods to be declared
outside the class, only when they are needed, therefore removing the
header dependency. The bulk of the emitted model implementation now uses
loose methods.
2021-06-13 14:33:11 +01:00

595 lines
22 KiB
C++

// -*- mode: C++; c-file-style: "cc-mode" -*-
//*************************************************************************
// DESCRIPTION: Verilator: Options parsing
//
// Code available from: https://verilator.org
//
//*************************************************************************
//
// Copyright 2003-2021 by Wilson Snyder. This program is free software; you
// can redistribute it and/or modify it under the terms of either the GNU
// Lesser General Public License Version 3 or the Perl Artistic License
// Version 2.0.
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//*************************************************************************
#include "config_build.h"
#include "verilatedos.h"
// Limited V3 headers here - this is a base class for Vlc etc
#include "V3String.h"
#include "V3Error.h"
#include <algorithm>
size_t VName::s_minLength = 32;
size_t VName::s_maxLength = 0; // Disabled
std::map<string, string> VName::s_dehashMap;
//######################################################################
// Wildcard
// Double procedures, inlined, unrolls loop much better
inline bool VString::wildmatchi(const char* s, const char* p) {
for (; *p; s++, p++) {
if (*p != '*') {
if (((*s) != (*p)) && *p != '?') return false;
} else {
// Trailing star matches everything.
if (!*++p) return true;
while (!wildmatch(s, p)) {
if (*++s == '\0') return false;
}
return true;
}
}
return (*s == '\0');
}
bool VString::wildmatch(const char* s, const char* p) {
for (; *p; s++, p++) {
if (*p != '*') {
if (((*s) != (*p)) && *p != '?') return false;
} else {
// Trailing star matches everything.
if (!*++p) return true;
while (!wildmatchi(s, p)) {
if (*++s == '\0') return false;
}
return true;
}
}
return (*s == '\0');
}
bool VString::wildmatch(const string& s, const string& p) {
return wildmatch(s.c_str(), p.c_str());
}
string VString::dot(const string& a, const string& dot, const string& b) {
if (b == "") return a;
if (a == "") return b;
return a + dot + b;
}
string VString::downcase(const string& str) {
string out = str;
for (auto& cr : out) cr = tolower(cr);
return out;
}
string VString::upcase(const string& str) {
string out = str;
for (auto& cr : out) cr = toupper(cr);
return out;
}
string VString::quoteAny(const string& str, char tgt, char esc) {
string out;
for (const char c : str) {
if (c == tgt) out += esc;
out += c;
}
return out;
}
string VString::quoteStringLiteralForShell(const string& str) {
string result;
const char dquote = '"';
const char escape = '\\';
result.push_back(dquote); // Start quoted string
result.push_back(escape);
result.push_back(dquote); // "
for (const char c : str) {
if (c == dquote || c == escape) result.push_back(escape);
result.push_back(c);
}
result.push_back(escape);
result.push_back(dquote); // "
result.push_back(dquote); // Terminate quoted string
return result;
}
string VString::spaceUnprintable(const string& str) {
string out;
for (const char c : str) {
if (isprint(c)) {
out += c;
} else {
out += ' ';
}
}
return out;
}
string VString::removeWhitespace(const string& str) {
string out;
out.reserve(str.size());
for (const char c : str) {
if (!isspace(c)) out += c;
}
return out;
}
bool VString::isWhitespace(const string& str) {
for (const char c : str) {
if (!isspace(c)) return false;
}
return true;
}
double VString::parseDouble(const string& str, bool* successp) {
char* strgp = new char[str.size() + 1];
char* dp = strgp;
if (successp) *successp = true;
for (const char* sp = str.c_str(); *sp; ++sp) {
if (*sp != '_') *dp++ = *sp;
}
*dp++ = '\0';
char* endp = strgp;
double d = strtod(strgp, &endp);
size_t parsed_len = endp - strgp;
if (parsed_len != strlen(strgp)) {
if (successp) *successp = false;
}
VL_DO_DANGLING(delete[] strgp, strgp);
return d;
}
static bool isWordChar(char c) { return isalnum(c) || c == '_'; }
string VString::replaceWord(const string& str, const string& from, const string& to) {
string result = str;
const size_t len = from.size();
UASSERT_STATIC(len > 0, "Cannot replace empty string");
for (size_t pos = 0; (pos = result.find(from, pos)) != string::npos; pos += len) {
// Only replace whole words
if (((pos > 0) && isWordChar(result[pos - 1])) || //
((pos + len < result.size()) && isWordChar(result[pos + len]))) {
continue;
}
result.replace(pos, len, to);
}
return result;
}
//######################################################################
// VHashSha256
static const uint32_t sha256K[]
= {0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4,
0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe,
0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f,
0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc,
0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116,
0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7,
0xc67178f2};
static inline uint32_t shaRotr32(uint32_t lhs, uint32_t rhs) VL_ATTR_ALWINLINE;
static inline uint32_t shaRotr32(uint32_t lhs, uint32_t rhs) {
return lhs >> rhs | lhs << (32 - rhs);
}
static inline void sha256Block(uint32_t* h, const uint32_t* chunk) VL_ATTR_ALWINLINE;
static inline void sha256Block(uint32_t* h, const uint32_t* chunk) {
uint32_t ah[8];
const uint32_t* p = chunk;
// Initialize working variables to current hash value
for (unsigned i = 0; i < 8; i++) ah[i] = h[i];
// Compression function main loop
for (unsigned i = 0; i < 4; ++i) {
uint32_t w[16];
for (unsigned j = 0; j < 16; ++j) {
if (i == 0) {
w[j] = *p++;
} else {
// Extend the first 16 words into the remaining
// 48 words w[16..63] of the message schedule array:
const uint32_t s0 = shaRotr32(w[(j + 1) & 0xf], 7)
^ shaRotr32(w[(j + 1) & 0xf], 18) ^ (w[(j + 1) & 0xf] >> 3);
const uint32_t s1 = shaRotr32(w[(j + 14) & 0xf], 17)
^ shaRotr32(w[(j + 14) & 0xf], 19) ^ (w[(j + 14) & 0xf] >> 10);
w[j] = w[j] + s0 + w[(j + 9) & 0xf] + s1;
}
const uint32_t s1 = shaRotr32(ah[4], 6) ^ shaRotr32(ah[4], 11) ^ shaRotr32(ah[4], 25);
const uint32_t ch = (ah[4] & ah[5]) ^ (~ah[4] & ah[6]);
const uint32_t temp1 = ah[7] + s1 + ch + sha256K[i << 4 | j] + w[j];
const uint32_t s0 = shaRotr32(ah[0], 2) ^ shaRotr32(ah[0], 13) ^ shaRotr32(ah[0], 22);
const uint32_t maj = (ah[0] & ah[1]) ^ (ah[0] & ah[2]) ^ (ah[1] & ah[2]);
const uint32_t temp2 = s0 + maj;
ah[7] = ah[6];
ah[6] = ah[5];
ah[5] = ah[4];
ah[4] = ah[3] + temp1;
ah[3] = ah[2];
ah[2] = ah[1];
ah[1] = ah[0];
ah[0] = temp1 + temp2;
}
}
for (unsigned i = 0; i < 8; ++i) h[i] += ah[i];
}
void VHashSha256::insert(const void* datap, size_t length) {
UASSERT(!m_final, "Called VHashSha256::insert after finalized the hash value");
m_totLength += length;
string tempData;
int chunkLen;
const uint8_t* chunkp;
if (m_remainder == "") {
chunkLen = length;
chunkp = static_cast<const uint8_t*>(datap);
} else {
// If there are large inserts it would be more efficient to avoid this copy
// by copying bytes in the loop below from either m_remainder or the data
// as appropriate.
tempData = m_remainder + string(static_cast<const char*>(datap), length);
chunkLen = tempData.length();
chunkp = reinterpret_cast<const uint8_t*>(tempData.data());
}
// See wikipedia SHA-1 algorithm summary
uint32_t w[64]; // Round buffer, [0..15] are input data, rest used by rounds
int posBegin = 0; // Position in buffer for start of this block
int posEnd = 0; // Position in buffer for end of this block
// Process complete 64-byte blocks
while (posBegin <= chunkLen - 64) {
posEnd = posBegin + 64;
// 64 byte round input data, being careful to swap on big, keep on little
for (int roundByte = 0; posBegin < posEnd; posBegin += 4) {
w[roundByte++] = (static_cast<uint32_t>(chunkp[posBegin + 3])
| (static_cast<uint32_t>(chunkp[posBegin + 2]) << 8)
| (static_cast<uint32_t>(chunkp[posBegin + 1]) << 16)
| (static_cast<uint32_t>(chunkp[posBegin]) << 24));
}
sha256Block(m_inthash, w);
}
m_remainder = string(reinterpret_cast<const char*>(chunkp + posBegin), chunkLen - posEnd);
}
void VHashSha256::finalize() {
if (!m_final) {
// Make sure no 64 byte blocks left
insert("");
m_final = true;
// Process final possibly non-complete 64-byte block
uint32_t w[16]; // Round buffer, [0..15] are input data
for (int i = 0; i < 16; ++i) w[i] = 0;
size_t blockPos = 0;
for (; blockPos < m_remainder.length(); ++blockPos) {
w[blockPos >> 2]
|= ((static_cast<uint32_t>(m_remainder[blockPos])) << ((3 - (blockPos & 3)) << 3));
}
w[blockPos >> 2] |= 0x80 << ((3 - (blockPos & 3)) << 3);
if (m_remainder.length() >= 56) {
sha256Block(m_inthash, w);
for (int i = 0; i < 16; ++i) w[i] = 0;
}
w[15] = m_totLength << 3;
sha256Block(m_inthash, w);
m_remainder.clear();
}
}
string VHashSha256::digestBinary() {
finalize();
string out;
out.reserve(32);
for (size_t i = 0; i < 32; ++i) {
out += (m_inthash[i >> 2] >> (((3 - i) & 0x3) << 3)) & 0xff;
}
return out;
}
uint64_t VHashSha256::digestUInt64() {
const string& binhash = digestBinary();
uint64_t out = 0;
for (size_t byte = 0; byte < sizeof(uint64_t); ++byte) {
unsigned char c = binhash[byte];
out = (out << 8) | c;
}
return out;
}
string VHashSha256::digestHex() {
static const char* const digits = "0123456789abcdef";
const string& binhash = digestBinary();
string out;
out.reserve(70);
for (size_t byte = 0; byte < 32; ++byte) {
out += digits[(binhash[byte] >> 4) & 0xf];
out += digits[(binhash[byte] >> 0) & 0xf];
}
return out;
}
string VHashSha256::digestSymbol() {
// Make a symbol name from hash. Similar to base64, however base 64
// has + and / for last two digits, but need C symbol, and we also
// avoid conflicts with use of _, so use "AB" at the end.
// Thus this function is non-reversible.
static const char* const digits
= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789AB";
const string& binhash = digestBinary();
string out;
out.reserve(28);
int pos = 0;
for (; pos < (256 / 8) - 2; pos += 3) {
out += digits[((binhash[pos] >> 2) & 0x3f)];
out += digits[((binhash[pos] & 0x3) << 4)
| (static_cast<int>(binhash[pos + 1] & 0xf0) >> 4)];
out += digits[((binhash[pos + 1] & 0xf) << 2)
| (static_cast<int>(binhash[pos + 2] & 0xc0) >> 6)];
out += digits[((binhash[pos + 2] & 0x3f))];
}
// Any leftover bits don't matter for our purpose
return out;
}
void VHashSha256::selfTestOne(const string& data, const string& data2, const string& exp,
const string& exp64) {
VHashSha256 digest(data);
if (data2 != "") digest.insert(data2);
if (VL_UNCOVERABLE(digest.digestHex() != exp)) {
std::cerr << "%Error: When hashing '" << data + data2 << "'\n" // LCOV_EXCL_LINE
<< " ... got=" << digest.digestHex() << '\n' // LCOV_EXCL_LINE
<< " ... exp=" << exp << endl; // LCOV_EXCL_LINE
}
if (VL_UNCOVERABLE(digest.digestSymbol() != exp64)) {
std::cerr << "%Error: When hashing '" << data + data2 << "'\n" // LCOV_EXCL_LINE
<< " ... got=" << digest.digestSymbol() << '\n' // LCOV_EXCL_LINE
<< " ... exp=" << exp64 << endl; // LCOV_EXCL_LINE
}
}
void VHashSha256::selfTest() {
selfTestOne("", "", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
"47DEQpj8HBSaABTImWA5JCeuQeRkm5NMpJWZG3hS");
selfTestOne("a", "", "ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb",
"ypeBEsobvcr6wjGzmiPcTaeG7BgUfE5yuYB3haBu");
selfTestOne("The quick brown fox jumps over the lazy dog", "",
"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592",
"16j7swfXgJRpypq8sAguT41WUeRtPNt2LQLQvzfJ");
selfTestOne("The quick brown fox jumps over the lazy", " dog",
"d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592",
"16j7swfXgJRpypq8sAguT41WUeRtPNt2LQLQvzfJ");
selfTestOne("Test using larger than block-size key and larger than one block-size data", "",
"9dc35674a024b28e8440080b5331652e985f2d61d7a1fca80a648b7f9ffa0dd3",
"ncNWdKAkso6EQAgLUzFlLphfLWHXofyoCmSLf5B6");
selfTestOne("Test using", " larger than block-size key and larger than one block-size data",
"9dc35674a024b28e8440080b5331652e985f2d61d7a1fca80a648b7f9ffa0dd3",
"ncNWdKAkso6EQAgLUzFlLphfLWHXofyoCmSLf5B6");
}
//######################################################################
// VName
string VName::dehash(const string& in) {
static const char VHSH[] = "__Vhsh";
static const size_t DOT_LEN = strlen("__DOT__");
std::string dehashed;
// Need to split 'in' into components separated by __DOT__, 'last_dot_pos'
// keeps track of the position after the most recently found instance of __DOT__
for (string::size_type last_dot_pos = 0; last_dot_pos < in.size();) {
const string::size_type next_dot_pos = in.find("__DOT__", last_dot_pos);
// Two iterators defining the range between the last and next dots.
auto search_begin = std::begin(in) + last_dot_pos;
auto search_end
= next_dot_pos == string::npos ? std::end(in) : std::begin(in) + next_dot_pos;
// Search for __Vhsh between the two dots.
auto begin_vhsh
= std::search(search_begin, search_end, std::begin(VHSH), std::end(VHSH) - 1);
if (begin_vhsh != search_end) {
std::string vhsh(begin_vhsh, search_end);
const auto& it = s_dehashMap.find(vhsh);
UASSERT(it != s_dehashMap.end(), "String not in reverse hash map '" << vhsh << "'");
// Is this not the first component, but the first to require dehashing?
if (last_dot_pos > 0 && dehashed.empty()) {
// Seed 'dehashed' with the previously processed components.
dehashed = in.substr(0, last_dot_pos);
}
// Append the unhashed part of the component.
dehashed += std::string(search_begin, begin_vhsh);
// Append the bit that was lost to truncation but retrieved from the dehash map.
dehashed += it->second;
}
// This component doesn't need dehashing but a previous one might have.
else if (!dehashed.empty()) {
dehashed += std::string(search_begin, search_end);
}
if (next_dot_pos != string::npos) {
// Is there a __DOT__ to add to the dehashed version of 'in'?
if (!dehashed.empty()) { dehashed += "__DOT__"; }
last_dot_pos = next_dot_pos + DOT_LEN;
} else {
last_dot_pos = string::npos;
}
}
return dehashed.empty() ? in : dehashed;
}
string VName::hashedName() {
if (m_name == "") return "";
if (m_hashed != "") return m_hashed; // Memoized
if (s_maxLength == 0 || m_name.length() < s_maxLength) {
m_hashed = m_name;
return m_hashed;
} else {
VHashSha256 hash(m_name);
string suffix = "__Vhsh" + hash.digestSymbol();
if (s_minLength < s_maxLength) {
s_dehashMap[suffix] = m_name.substr(s_minLength);
m_hashed = m_name.substr(0, s_minLength) + suffix;
} else {
s_dehashMap[suffix] = m_name;
m_hashed = suffix;
}
return m_hashed;
}
}
//######################################################################
// VSpellCheck - Algorithm same as GCC's spellcheck.c
VSpellCheck::EditDistance VSpellCheck::editDistance(const string& s, const string& t) {
// Wagner-Fischer algorithm for the Damerau-Levenshtein distance
size_t sLen = s.length();
size_t tLen = t.length();
if (sLen == 0) return tLen;
if (tLen == 0) return sLen;
if (sLen >= LENGTH_LIMIT) return sLen;
if (tLen >= LENGTH_LIMIT) return tLen;
static std::array<EditDistance, LENGTH_LIMIT + 1> s_v_two_ago;
static std::array<EditDistance, LENGTH_LIMIT + 1> s_v_one_ago;
static std::array<EditDistance, LENGTH_LIMIT + 1> s_v_next;
for (size_t i = 0; i < sLen + 1; i++) s_v_one_ago[i] = i;
for (size_t i = 0; i < tLen; i++) {
s_v_next[0] = i + 1;
for (size_t j = 0; j < sLen; j++) {
EditDistance cost = (s[j] == t[i] ? 0 : 1);
EditDistance deletion = s_v_next[j] + 1;
EditDistance insertion = s_v_one_ago[j + 1] + 1;
EditDistance substitution = s_v_one_ago[j] + cost;
EditDistance cheapest = std::min(deletion, insertion);
cheapest = std::min(cheapest, substitution);
if (i > 0 && j > 0 && s[j] == t[i - 1] && s[j - 1] == t[i]) {
EditDistance transposition = s_v_two_ago[j - 1] + 1;
cheapest = std::min(cheapest, transposition);
}
s_v_next[j + 1] = cheapest;
}
for (size_t j = 0; j < sLen + 1; j++) {
s_v_two_ago[j] = s_v_one_ago[j];
s_v_one_ago[j] = s_v_next[j];
}
}
EditDistance result = s_v_next[sLen];
return result;
}
VSpellCheck::EditDistance VSpellCheck::cutoffDistance(size_t goal_len, size_t candidate_len) {
// Return max acceptable edit distance
size_t max_length = std::max(goal_len, candidate_len);
size_t min_length = std::min(goal_len, candidate_len);
if (max_length <= 1) return 0;
if (max_length - min_length <= 1) return std::max(max_length / 3, static_cast<size_t>(1));
return (max_length + 2) / 3;
}
string VSpellCheck::bestCandidateInfo(const string& goal, EditDistance& distancer) const {
string bestCandidate;
size_t gLen = goal.length();
distancer = LENGTH_LIMIT * 10;
for (const string& candidate : m_candidates) {
size_t cLen = candidate.length();
// Min distance must be inserting/deleting to make lengths match
EditDistance min_distance = (cLen > gLen ? (cLen - gLen) : (gLen - cLen));
if (min_distance >= distancer) continue; // Short-circuit if already better
EditDistance cutoff = cutoffDistance(gLen, cLen);
if (min_distance > cutoff) continue; // Short-circuit if already too bad
EditDistance dist = editDistance(goal, candidate);
UINFO(9, "EditDistance dist=" << dist << " cutoff=" << cutoff << " goal=" << goal
<< " candidate=" << candidate << endl);
if (dist < distancer && dist <= cutoff) {
distancer = dist;
bestCandidate = candidate;
}
}
// If goal matches candidate avoid suggesting replacing with self
if (distancer == 0) return "";
return bestCandidate;
}
void VSpellCheck::selfTestDistanceOne(const string& a, const string& b, EditDistance expected) {
UASSERT_SELFTEST(EditDistance, editDistance(a, b), expected);
UASSERT_SELFTEST(EditDistance, editDistance(b, a), expected);
}
void VSpellCheck::selfTestSuggestOne(bool matches, const string& c, const string& goal,
EditDistance dist) {
EditDistance gdist;
VSpellCheck speller;
speller.pushCandidate(c);
string got = speller.bestCandidateInfo(goal, gdist /*ref*/);
if (matches) {
UASSERT_SELFTEST(const string&, got, c);
UASSERT_SELFTEST(EditDistance, gdist, dist);
} else {
UASSERT_SELFTEST(const string&, got, "");
}
}
void VSpellCheck::selfTest() {
{
selfTestDistanceOne("ab", "ac", 1);
selfTestDistanceOne("ab", "a", 1);
selfTestDistanceOne("a", "b", 1);
}
{
selfTestSuggestOne(true, "DEL_ETE", "DELETE", 1);
selfTestSuggestOne(true, "abcdef", "acbdef", 1);
selfTestSuggestOne(true, "db", "dc", 1);
selfTestSuggestOne(true, "db", "dba", 1);
// Negative suggestions
selfTestSuggestOne(false, "x", "y", 1);
selfTestSuggestOne(false, "sqrt", "assert", 3);
}
{
VSpellCheck speller;
UASSERT_SELFTEST(string, "", speller.bestCandidate(""));
}
{
VSpellCheck speller;
speller.pushCandidate("fred");
speller.pushCandidate("wilma");
speller.pushCandidate("barney");
UASSERT_SELFTEST(string, "fred", speller.bestCandidate("fre"));
UASSERT_SELFTEST(string, "wilma", speller.bestCandidate("whilma"));
UASSERT_SELFTEST(string, "barney", speller.bestCandidate("Barney"));
UASSERT_SELFTEST(string, "", speller.bestCandidate("nothing close"));
}
}