Skip to content

Commit 354b8b9

Browse files
authored
Merge pull request #250 from serras/master
Adding support for String and Regex theory. This theory is provided by the SMT solvers Z3 and CVC4. For CVC4 we require to enable an additional option which seems to have a slightly negative impact when solving quantifier formulas. Z3 provides String theory without using additional options. The support for String and Regex theory is mostly stable and sufficient for further experiments.
2 parents ce41852 + 284eb40 commit 354b8b9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+3793
-44
lines changed

README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,13 @@ JavaSMT can express formulas in the following theories:
4646
- Integer
4747
- Rational
4848
- Bitvector
49-
- Floating point
49+
- Floating Point
5050
- Array
5151
- Uninterpreted Function
52+
- String and RegEx
53+
54+
The concrete support for a certain theory depends on the underlying SMT solver.
55+
Only a few SMT solvers provide support for theories like Arrays, Floating Point, String or RegEx.
5256

5357
Currently JavaSMT supports several SMT solvers (see [Getting Started](doc/Getting-started.md) for installation):
5458

src/org/sosy_lab/java_smt/api/FormulaManager.java

+7
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,13 @@ public interface FormulaManager {
7878
*/
7979
QuantifiedFormulaManager getQuantifiedFormulaManager();
8080

81+
/**
82+
* Returns the String Theory.
83+
*
84+
* @throws UnsupportedOperationException If the theory is not supported by the solver.
85+
*/
86+
StringFormulaManager getStringFormulaManager();
87+
8188
/**
8289
* Create variable of the type equal to {@code formulaType}.
8390
*

src/org/sosy_lab/java_smt/api/FormulaType.java

+50
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ public boolean isSLType() {
6262
return false;
6363
}
6464

65+
public boolean isStringType() {
66+
return false;
67+
}
68+
69+
public boolean isRegexType() {
70+
return false;
71+
}
72+
6573
@Override
6674
public abstract String toString();
6775

@@ -344,6 +352,44 @@ public String toSMTLIBString() {
344352
}
345353
}
346354

355+
public static final FormulaType<StringFormula> StringType =
356+
new FormulaType<>() {
357+
358+
@Override
359+
public boolean isStringType() {
360+
return true;
361+
}
362+
363+
@Override
364+
public String toString() {
365+
return "String";
366+
}
367+
368+
@Override
369+
public String toSMTLIBString() {
370+
return "String";
371+
}
372+
};
373+
374+
public static final FormulaType<BooleanFormula> RegexType =
375+
new FormulaType<>() {
376+
377+
@Override
378+
public boolean isRegexType() {
379+
return true;
380+
}
381+
382+
@Override
383+
public String toString() {
384+
return "RegLan";
385+
}
386+
387+
@Override
388+
public String toSMTLIBString() {
389+
return "RegLan";
390+
}
391+
};
392+
347393
/**
348394
* Parse a string and return the corresponding type. This method is the counterpart of {@link
349395
* #toString()}.
@@ -355,6 +401,10 @@ public static FormulaType<?> fromString(String t) {
355401
return IntegerType;
356402
} else if (RationalType.toString().equals(t)) {
357403
return RationalType;
404+
} else if (StringType.toString().equals(t)) {
405+
return StringType;
406+
} else if (RegexType.toString().equals(t)) {
407+
return RegexType;
358408
} else if (FloatingPointRoundingModeType.toString().equals(t)) {
359409
return FloatingPointRoundingModeType;
360410
} else if (t.startsWith("FloatingPoint<")) {

src/org/sosy_lab/java_smt/api/FunctionDeclarationKind.java

+28
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,34 @@ public enum FunctionDeclarationKind {
256256
FP_AS_IEEEBV,
257257
FP_FROM_IEEEBV,
258258

259+
// String and Regex theory
260+
261+
STR_CONCAT,
262+
STR_PREFIX,
263+
STR_SUFFIX,
264+
STR_CONTAINS,
265+
STR_SUBSTRING,
266+
STR_REPLACE,
267+
STR_REPLACE_ALL,
268+
STR_CHAR_AT,
269+
STR_LENGTH,
270+
STR_INDEX_OF,
271+
STR_TO_RE,
272+
STR_IN_RE,
273+
STR_TO_INT,
274+
INT_TO_STR,
275+
STR_LT,
276+
STR_LE,
277+
RE_PLUS,
278+
RE_STAR,
279+
RE_OPTIONAL,
280+
RE_CONCAT,
281+
RE_UNION,
282+
RE_RANGE,
283+
RE_INTERSECT,
284+
RE_COMPLEMENT,
285+
RE_DIFFERENCE,
286+
259287
// default case
260288

261289
/**

src/org/sosy_lab/java_smt/api/Model.java

+8
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ public interface Model extends Iterable<ValueAssignment>, AutoCloseable {
9090
@Nullable
9191
BigInteger evaluate(BitvectorFormula f);
9292

93+
/**
94+
* Type-safe evaluation for string formulas.
95+
*
96+
* <p>The formula does not need to be a variable, we also allow complex expression.
97+
*/
98+
@Nullable
99+
String evaluate(StringFormula f);
100+
93101
/**
94102
* Iterate over all values present in the model. Note that iterating multiple times may be
95103
* inefficient for some solvers, it is recommended to use {@link
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// This file is part of JavaSMT,
2+
// an API wrapper for a collection of SMT solvers:
3+
// https://github.com/sosy-lab/java-smt
4+
//
5+
// SPDX-FileCopyrightText: 2021 Alejandro Serrano Mena
6+
//
7+
// SPDX-License-Identifier: Apache-2.0
8+
9+
package org.sosy_lab.java_smt.api;
10+
11+
import com.google.errorprone.annotations.Immutable;
12+
13+
/** A formula of the string sort. */
14+
@Immutable
15+
public interface RegexFormula extends Formula {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// This file is part of JavaSMT,
2+
// an API wrapper for a collection of SMT solvers:
3+
// https://github.com/sosy-lab/java-smt
4+
//
5+
// SPDX-FileCopyrightText: 2021 Alejandro Serrano Mena
6+
//
7+
// SPDX-License-Identifier: Apache-2.0
8+
9+
package org.sosy_lab.java_smt.api;
10+
11+
import com.google.errorprone.annotations.Immutable;
12+
13+
/** A formula of the string sort. */
14+
@Immutable
15+
public interface StringFormula extends Formula {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
// This file is part of JavaSMT,
2+
// an API wrapper for a collection of SMT solvers:
3+
// https://github.com/sosy-lab/java-smt
4+
//
5+
// SPDX-FileCopyrightText: 2021 Alejandro Serrano Mena
6+
//
7+
// SPDX-License-Identifier: Apache-2.0
8+
9+
package org.sosy_lab.java_smt.api;
10+
11+
import java.util.Arrays;
12+
import java.util.List;
13+
import org.sosy_lab.java_smt.api.NumeralFormula.IntegerFormula;
14+
15+
/**
16+
* Manager for dealing with string formulas. Functions come from
17+
* http://smtlib.cs.uiowa.edu/theories-UnicodeStrings.shtml.
18+
*/
19+
public interface StringFormulaManager {
20+
21+
/**
22+
* Returns a {@link StringFormula} representing the given constant.
23+
*
24+
* @param value the string value the returned <code>Formula</code> should represent
25+
* @return a Formula representing the given value
26+
*/
27+
StringFormula makeString(String value);
28+
29+
/**
30+
* Creates a variable of type String with exactly the given name.
31+
*
32+
* <p>This variable (symbol) represents a "String" for which the SMT solver needs to find a model.
33+
*
34+
* <p>Please make sure that the given name is valid in SMT-LIB2. Take a look at {@link
35+
* FormulaManager#isValidName} for further information.
36+
*
37+
* <p>This method does not quote or unquote the given name, but uses the plain name "AS IS".
38+
* {@link Formula#toString} can return a different String than the given one.
39+
*/
40+
StringFormula makeVariable(String pVar);
41+
42+
// TODO: There is currently no way to use variables of type "Regex", i.e., that represent full
43+
// regular expression for which the SMT solver need to find a model.
44+
// The reason for this is that the current SMT solvers do not support this feature.
45+
// However, we can build a RegexFormula from basic parts like Strings (including
46+
// variables of type String) and operations like range, union, or complement.
47+
48+
BooleanFormula equal(StringFormula str1, StringFormula str2);
49+
50+
BooleanFormula greaterThan(StringFormula str1, StringFormula str2);
51+
52+
BooleanFormula greaterOrEquals(StringFormula str1, StringFormula str2);
53+
54+
BooleanFormula lessThan(StringFormula str1, StringFormula str2);
55+
56+
BooleanFormula lessOrEquals(StringFormula str1, StringFormula str2);
57+
58+
/** Check whether the given prefix is a real prefix of str. */
59+
BooleanFormula prefix(StringFormula prefix, StringFormula str);
60+
61+
/** Check whether the given suffix is a real suffix of str. */
62+
BooleanFormula suffix(StringFormula suffix, StringFormula str);
63+
64+
BooleanFormula contains(StringFormula str, StringFormula part);
65+
66+
/**
67+
* Get the first index for a substring in a String, or -1 if the substring is not found.
68+
* startIndex (inlcuded) denotes the start of the search for the index.
69+
*/
70+
IntegerFormula indexOf(StringFormula str, StringFormula part, IntegerFormula startIndex);
71+
72+
/**
73+
* Get a substring of length 1 from the given String.
74+
*
75+
* <p>The result is underspecified, if the index is out of bounds for the given String.
76+
*/
77+
StringFormula charAt(StringFormula str, IntegerFormula index);
78+
79+
/**
80+
* Get a substring from the given String.
81+
*
82+
* <p>The result is underspecified, if the start index is out of bounds for the given String or if
83+
* the requested length is negative. The length of the result is the minimum of the requested
84+
* length and the remaining length of the given String.
85+
*/
86+
StringFormula substring(StringFormula str, IntegerFormula index, IntegerFormula length);
87+
88+
/** Replace the first appearances of target in fullStr with the replacement. */
89+
StringFormula replace(StringFormula fullStr, StringFormula target, StringFormula replacement);
90+
91+
/** Replace all appearances of target in fullStr with the replacement. */
92+
StringFormula replaceAll(StringFormula fullStr, StringFormula target, StringFormula replacement);
93+
94+
IntegerFormula length(StringFormula str);
95+
96+
default StringFormula concat(StringFormula... parts) {
97+
return concat(Arrays.asList(parts));
98+
}
99+
100+
StringFormula concat(List<StringFormula> parts);
101+
102+
/**
103+
* @param str formula representing the string to match
104+
* @param regex formula representing the regular expression
105+
* @return a formula representing the acceptance of the string by the regular expression
106+
*/
107+
BooleanFormula in(StringFormula str, RegexFormula regex);
108+
109+
/**
110+
* Returns a {@link RegexFormula} representing the given constant.
111+
*
112+
* <p>This method does not parse an existing regex from String, but uses the String directly as a
113+
* constant.
114+
*
115+
* @param value the regular expression the returned <code>Formula</code> should represent
116+
*/
117+
RegexFormula makeRegex(String value);
118+
119+
// basic regex operations
120+
121+
/** @return formula denoting the empty set of strings */
122+
RegexFormula none();
123+
124+
/** @return formula denoting the set of all strings, also known as Regex <code>".*"</code>. */
125+
RegexFormula all();
126+
127+
/** @return formula denoting the range regular expression over two sequences of length 1. */
128+
RegexFormula range(StringFormula start, StringFormula end);
129+
130+
/**
131+
* @return formula denoting the range regular expression over two chars.
132+
* @see #range(StringFormula, StringFormula)
133+
*/
134+
default RegexFormula range(char start, char end) {
135+
return range(makeString(String.valueOf(start)), makeString(String.valueOf(end)));
136+
}
137+
138+
/** @return formula denoting the concatenation */
139+
default RegexFormula concat(RegexFormula... parts) {
140+
return concatRegex(Arrays.asList(parts));
141+
}
142+
143+
/** @return formula denoting the concatenation */
144+
// TODO the naming of this function collides with #concat(List<StringFormula>).
145+
// Maybe we should split String and Regex manager.
146+
RegexFormula concatRegex(List<RegexFormula> parts);
147+
148+
/** @return formula denoting the union */
149+
RegexFormula union(RegexFormula regex1, RegexFormula regex2);
150+
151+
/** @return formula denoting the intersection */
152+
RegexFormula intersection(RegexFormula regex1, RegexFormula regex2);
153+
154+
/** @return formula denoting the Kleene closure */
155+
RegexFormula complement(RegexFormula regex);
156+
157+
/** @return formula denoting the Kleene closure (0 or more), also known as STAR operand. */
158+
RegexFormula closure(RegexFormula regex);
159+
160+
// derived regex operations
161+
162+
/** @return formula denoting the difference */
163+
RegexFormula difference(RegexFormula regex1, RegexFormula regex2);
164+
165+
/** @return formula denoting the Kleene cross (1 or more), also known as PLUS operand. */
166+
RegexFormula cross(RegexFormula regex);
167+
168+
/** @return formula denoting the optionality, also known as QUESTIONMARK operand. */
169+
RegexFormula optional(RegexFormula regex);
170+
171+
/** @return formula denoting the concatenation n times */
172+
RegexFormula times(RegexFormula regex, int repetitions);
173+
174+
/**
175+
* Interpret a String formula as an Integer formula.
176+
*
177+
* <p>The number is interpreted in base 10 and has no leading zeros. The method works as expected
178+
* for positive numbers, including zero. It returns the constant value of <code>-1</code> for
179+
* negative numbers or invalid number representations, for example if any char is no digit.
180+
*/
181+
IntegerFormula toIntegerFormula(StringFormula str);
182+
183+
/**
184+
* Interpret an Integer formula as a String formula.
185+
*
186+
* <p>The number is in base 10. The method works as expected for positive numbers, including zero.
187+
* It returns the empty string <code>""</code> for negative numbers.
188+
*/
189+
StringFormula toStringFormula(IntegerFormula number);
190+
}

src/org/sosy_lab/java_smt/basicimpl/AbstractBaseFormulaManager.java

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ protected final TType toSolverType(FormulaType<?> formulaType) {
4646
t = getFormulaCreator().getIntegerType();
4747
} else if (formulaType.isRationalType()) {
4848
t = getFormulaCreator().getRationalType();
49+
} else if (formulaType.isStringType()) {
50+
t = getFormulaCreator().getStringType();
4951
} else if (formulaType.isBitvectorType()) {
5052
FormulaType.BitvectorType bitPreciseType = (FormulaType.BitvectorType) formulaType;
5153
t = getFormulaCreator().getBitvectorType(bitPreciseType.getSize());

0 commit comments

Comments
 (0)